[petsc-dev] PetscFindInt

Jed Brown jedbrown at mcs.anl.gov
Sat Sep 22 15:31:10 CDT 2012


On Sat, Sep 22, 2012 at 2:54 PM, Matthew Knepley <knepley at gmail.com> wrote:

> Changed to mid = ((unsigned int)low + (unsigned int)high)) >> 1;, which
> is in the comment above.
>

Well, you wrote

  PetscInt imid = ((unsigned PetscInt) imin + (unsigned PetscInt) imax) >>
1;

which doesn't compile because the unsigned keyword does not apply to
typedefs.


>
>
>> Your implementation also does unnecessary arithmetic, but due to possible
>> overflow, the compiler may not be able to remove it. Why not use the tight
>> structure I suggested?
>>
>
> http://en.wikipedia.org/wiki/Binary_search


The paragon of tight code... I prefer to avoid mixing C and Fortran
conventions,

PetscErrorCode PetscFindInt(PetscInt key,PetscInt n,const PetscInt
ii[],PetscInt *loc)
{
  PetscInt lo = 0,hi = n;
  /* guards */
  while (hi - lo > 1) {
    PetscInt mid = lo + (hi - lo) / 2;
    if (key < ii[mid]) hi = mid;
    else               lo = mid;
  }
  *loc = key == ii[lo] ? lo : -1;


Under gcc -O3 code is
0000000000000000 <PetscFindInt_Jed> xor    eax,eax
0000000000000002 <PetscFindInt_Jed+0x2> jmp    000000000000001d
<PetscFindInt_Jed+0x1d>
0000000000000004 <PetscFindInt_Jed+0x4> nop    DWORD PTR [rax+0x0]
0000000000000008 <PetscFindInt_Jed+0x8> sar    r8d,1
000000000000000b <PetscFindInt_Jed+0xb> add    r8d,eax
000000000000000e <PetscFindInt_Jed+0xe> movsxd r9,r8d
0000000000000011 <PetscFindInt_Jed+0x11> cmp    DWORD PTR [rdx+r9*4],edi
0000000000000015 <PetscFindInt_Jed+0x15> cmovle eax,r8d
0000000000000019 <PetscFindInt_Jed+0x19> cmovg  esi,r8d
000000000000001d <PetscFindInt_Jed+0x1d> mov    r8d,esi
0000000000000020 <PetscFindInt_Jed+0x20> sub    r8d,eax
0000000000000023 <PetscFindInt_Jed+0x23> cmp    r8d,0x1
0000000000000027 <PetscFindInt_Jed+0x27> jg     0000000000000008
<PetscFindInt_Jed+0x8>
0000000000000029 <PetscFindInt_Jed+0x29> movsxd rsi,eax
000000000000002c <PetscFindInt_Jed+0x2c> cmp    DWORD PTR [rdx+rsi*4],edi
000000000000002f <PetscFindInt_Jed+0x2f> mov    edx,0xffffffff
0000000000000034 <PetscFindInt_Jed+0x34> cmovne eax,edx
0000000000000037 <PetscFindInt_Jed+0x37> mov    DWORD PTR [rcx],eax
0000000000000039 <PetscFindInt_Jed+0x39> xor    eax,eax
000000000000003b <PetscFindInt_Jed+0x3b> ret
000000000000003c <PetscFindInt_Jed+0x3c> nop    DWORD PTR [rax+0x0]

When I strip guards out of your version, I get the following which is twice
as big and has two conditional jumps per inner loop (at least the
conditional forward jump is predicted as not taken). We can benchmark, but
I'll be shocked if my version is not faster.

0000000000000040 <PetscFindInt_Matt> sub    esi,0x1
0000000000000043 <PetscFindInt_Matt+0x3> xor    eax,eax
0000000000000045 <PetscFindInt_Matt+0x5> jmp    0000000000000054
<PetscFindInt_Matt+0x14>
0000000000000047 <PetscFindInt_Matt+0x7> nop    WORD PTR [rax+rax*1+0x0]
0000000000000050 <PetscFindInt_Matt+0x10> lea    eax,[r10+0x1]
0000000000000054 <PetscFindInt_Matt+0x14> cmp    esi,eax
0000000000000056 <PetscFindInt_Matt+0x16> jle    000000000000008d
<PetscFindInt_Matt+0x4d>
0000000000000058 <PetscFindInt_Matt+0x18> lea    r8d,[rax+rsi*1]
000000000000005c <PetscFindInt_Matt+0x1c> shr    r8d,1
000000000000005f <PetscFindInt_Matt+0x1f> mov    r9d,r8d
0000000000000062 <PetscFindInt_Matt+0x22> mov    r10d,r8d
0000000000000065 <PetscFindInt_Matt+0x25> cmp    edi,DWORD PTR [rdx+r9*4]
0000000000000069 <PetscFindInt_Matt+0x29> jg     0000000000000050
<PetscFindInt_Matt+0x10>
000000000000006b <PetscFindInt_Matt+0x2b> cmp    eax,r8d
000000000000006e <PetscFindInt_Matt+0x2e> jge    000000000000008a
<PetscFindInt_Matt+0x4a>
0000000000000070 <PetscFindInt_Matt+0x30> lea    r9d,[r8+rax*1]
0000000000000074 <PetscFindInt_Matt+0x34> shr    r9d,1
0000000000000077 <PetscFindInt_Matt+0x37> mov    esi,r9d
000000000000007a <PetscFindInt_Matt+0x3a> mov    r10d,r9d
000000000000007d <PetscFindInt_Matt+0x3d> cmp    DWORD PTR [rdx+rsi*4],edi
0000000000000080 <PetscFindInt_Matt+0x40> jl     00000000000000a0
<PetscFindInt_Matt+0x60>
0000000000000082 <PetscFindInt_Matt+0x42> mov    r8d,r9d
0000000000000085 <PetscFindInt_Matt+0x45> cmp    eax,r8d
0000000000000088 <PetscFindInt_Matt+0x48> jl     0000000000000070
<PetscFindInt_Matt+0x30>
000000000000008a <PetscFindInt_Matt+0x4a> mov    esi,r8d
000000000000008d <PetscFindInt_Matt+0x4d> cmp    eax,esi
000000000000008f <PetscFindInt_Matt+0x4f> je     00000000000000a8
<PetscFindInt_Matt+0x68>
0000000000000091 <PetscFindInt_Matt+0x51> mov    eax,0xffffffff
0000000000000096 <PetscFindInt_Matt+0x56> mov    DWORD PTR [rcx],eax
0000000000000098 <PetscFindInt_Matt+0x58> xor    eax,eax
000000000000009a <PetscFindInt_Matt+0x5a> ret
000000000000009b <PetscFindInt_Matt+0x5b> nop    DWORD PTR [rax+rax*1+0x0]
00000000000000a0 <PetscFindInt_Matt+0x60> mov    esi,r8d
00000000000000a3 <PetscFindInt_Matt+0x63> jmp    0000000000000050
<PetscFindInt_Matt+0x10>
00000000000000a5 <PetscFindInt_Matt+0x65> nop    DWORD PTR [rax]
00000000000000a8 <PetscFindInt_Matt+0x68> movsxd rsi,eax
00000000000000ab <PetscFindInt_Matt+0x6b> cmp    DWORD PTR [rdx+rsi*4],edi
00000000000000ae <PetscFindInt_Matt+0x6e> mov    edx,0xffffffff
00000000000000b3 <PetscFindInt_Matt+0x73> cmovne eax,edx
00000000000000b6 <PetscFindInt_Matt+0x76> mov    DWORD PTR [rcx],eax
00000000000000b8 <PetscFindInt_Matt+0x78> xor    eax,eax
00000000000000ba <PetscFindInt_Matt+0x7a> ret
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mcs.anl.gov/pipermail/petsc-dev/attachments/20120922/f296b0af/attachment.html>


More information about the petsc-dev mailing list