[petsc-dev] PetscFindInt
Jed Brown
jedbrown at mcs.anl.gov
Sat Sep 22 15:31:10 CDT 2012
On Sat, Sep 22, 2012 at 2:54 PM, Matthew Knepley <knepley at gmail.com> wrote:
> Changed to mid = ((unsigned int)low + (unsigned int)high)) >> 1;, which
> is in the comment above.
>
Well, you wrote
PetscInt imid = ((unsigned PetscInt) imin + (unsigned PetscInt) imax) >>
1;
which doesn't compile because the unsigned keyword does not apply to
typedefs.
>
>
>> Your implementation also does unnecessary arithmetic, but due to possible
>> overflow, the compiler may not be able to remove it. Why not use the tight
>> structure I suggested?
>>
>
> http://en.wikipedia.org/wiki/Binary_search
The paragon of tight code... I prefer to avoid mixing C and Fortran
conventions,
PetscErrorCode PetscFindInt(PetscInt key,PetscInt n,const PetscInt
ii[],PetscInt *loc)
{
PetscInt lo = 0,hi = n;
/* guards */
while (hi - lo > 1) {
PetscInt mid = lo + (hi - lo) / 2;
if (key < ii[mid]) hi = mid;
else lo = mid;
}
*loc = key == ii[lo] ? lo : -1;
Under gcc -O3 code is
0000000000000000 <PetscFindInt_Jed> xor eax,eax
0000000000000002 <PetscFindInt_Jed+0x2> jmp 000000000000001d
<PetscFindInt_Jed+0x1d>
0000000000000004 <PetscFindInt_Jed+0x4> nop DWORD PTR [rax+0x0]
0000000000000008 <PetscFindInt_Jed+0x8> sar r8d,1
000000000000000b <PetscFindInt_Jed+0xb> add r8d,eax
000000000000000e <PetscFindInt_Jed+0xe> movsxd r9,r8d
0000000000000011 <PetscFindInt_Jed+0x11> cmp DWORD PTR [rdx+r9*4],edi
0000000000000015 <PetscFindInt_Jed+0x15> cmovle eax,r8d
0000000000000019 <PetscFindInt_Jed+0x19> cmovg esi,r8d
000000000000001d <PetscFindInt_Jed+0x1d> mov r8d,esi
0000000000000020 <PetscFindInt_Jed+0x20> sub r8d,eax
0000000000000023 <PetscFindInt_Jed+0x23> cmp r8d,0x1
0000000000000027 <PetscFindInt_Jed+0x27> jg 0000000000000008
<PetscFindInt_Jed+0x8>
0000000000000029 <PetscFindInt_Jed+0x29> movsxd rsi,eax
000000000000002c <PetscFindInt_Jed+0x2c> cmp DWORD PTR [rdx+rsi*4],edi
000000000000002f <PetscFindInt_Jed+0x2f> mov edx,0xffffffff
0000000000000034 <PetscFindInt_Jed+0x34> cmovne eax,edx
0000000000000037 <PetscFindInt_Jed+0x37> mov DWORD PTR [rcx],eax
0000000000000039 <PetscFindInt_Jed+0x39> xor eax,eax
000000000000003b <PetscFindInt_Jed+0x3b> ret
000000000000003c <PetscFindInt_Jed+0x3c> nop DWORD PTR [rax+0x0]
When I strip guards out of your version, I get the following which is twice
as big and has two conditional jumps per inner loop (at least the
conditional forward jump is predicted as not taken). We can benchmark, but
I'll be shocked if my version is not faster.
0000000000000040 <PetscFindInt_Matt> sub esi,0x1
0000000000000043 <PetscFindInt_Matt+0x3> xor eax,eax
0000000000000045 <PetscFindInt_Matt+0x5> jmp 0000000000000054
<PetscFindInt_Matt+0x14>
0000000000000047 <PetscFindInt_Matt+0x7> nop WORD PTR [rax+rax*1+0x0]
0000000000000050 <PetscFindInt_Matt+0x10> lea eax,[r10+0x1]
0000000000000054 <PetscFindInt_Matt+0x14> cmp esi,eax
0000000000000056 <PetscFindInt_Matt+0x16> jle 000000000000008d
<PetscFindInt_Matt+0x4d>
0000000000000058 <PetscFindInt_Matt+0x18> lea r8d,[rax+rsi*1]
000000000000005c <PetscFindInt_Matt+0x1c> shr r8d,1
000000000000005f <PetscFindInt_Matt+0x1f> mov r9d,r8d
0000000000000062 <PetscFindInt_Matt+0x22> mov r10d,r8d
0000000000000065 <PetscFindInt_Matt+0x25> cmp edi,DWORD PTR [rdx+r9*4]
0000000000000069 <PetscFindInt_Matt+0x29> jg 0000000000000050
<PetscFindInt_Matt+0x10>
000000000000006b <PetscFindInt_Matt+0x2b> cmp eax,r8d
000000000000006e <PetscFindInt_Matt+0x2e> jge 000000000000008a
<PetscFindInt_Matt+0x4a>
0000000000000070 <PetscFindInt_Matt+0x30> lea r9d,[r8+rax*1]
0000000000000074 <PetscFindInt_Matt+0x34> shr r9d,1
0000000000000077 <PetscFindInt_Matt+0x37> mov esi,r9d
000000000000007a <PetscFindInt_Matt+0x3a> mov r10d,r9d
000000000000007d <PetscFindInt_Matt+0x3d> cmp DWORD PTR [rdx+rsi*4],edi
0000000000000080 <PetscFindInt_Matt+0x40> jl 00000000000000a0
<PetscFindInt_Matt+0x60>
0000000000000082 <PetscFindInt_Matt+0x42> mov r8d,r9d
0000000000000085 <PetscFindInt_Matt+0x45> cmp eax,r8d
0000000000000088 <PetscFindInt_Matt+0x48> jl 0000000000000070
<PetscFindInt_Matt+0x30>
000000000000008a <PetscFindInt_Matt+0x4a> mov esi,r8d
000000000000008d <PetscFindInt_Matt+0x4d> cmp eax,esi
000000000000008f <PetscFindInt_Matt+0x4f> je 00000000000000a8
<PetscFindInt_Matt+0x68>
0000000000000091 <PetscFindInt_Matt+0x51> mov eax,0xffffffff
0000000000000096 <PetscFindInt_Matt+0x56> mov DWORD PTR [rcx],eax
0000000000000098 <PetscFindInt_Matt+0x58> xor eax,eax
000000000000009a <PetscFindInt_Matt+0x5a> ret
000000000000009b <PetscFindInt_Matt+0x5b> nop DWORD PTR [rax+rax*1+0x0]
00000000000000a0 <PetscFindInt_Matt+0x60> mov esi,r8d
00000000000000a3 <PetscFindInt_Matt+0x63> jmp 0000000000000050
<PetscFindInt_Matt+0x10>
00000000000000a5 <PetscFindInt_Matt+0x65> nop DWORD PTR [rax]
00000000000000a8 <PetscFindInt_Matt+0x68> movsxd rsi,eax
00000000000000ab <PetscFindInt_Matt+0x6b> cmp DWORD PTR [rdx+rsi*4],edi
00000000000000ae <PetscFindInt_Matt+0x6e> mov edx,0xffffffff
00000000000000b3 <PetscFindInt_Matt+0x73> cmovne eax,edx
00000000000000b6 <PetscFindInt_Matt+0x76> mov DWORD PTR [rcx],eax
00000000000000b8 <PetscFindInt_Matt+0x78> xor eax,eax
00000000000000ba <PetscFindInt_Matt+0x7a> ret
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/petsc-dev/attachments/20120922/f296b0af/attachment.html>
More information about the petsc-dev
mailing list