Why compilers test the least significant bit in an address?












1















Consider this function in https://github.com/coolwanglu/PDFium.js/blob/master/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp:



FX_BOOL CPDF_StreamContentParser::OnOperator(FX_LPCSTR op)
{
int i = 0;
FX_DWORD opid = 0;
while (i < 4 && op[i]) {
opid = (opid << 8) + op[i];
i ++;
}
while (i < 4) {
opid <<= 8;
i ++;
};
int low = 0, high = sizeof g_OpCodes / sizeof(struct _OpCode) - 1;
while (low <= high) {
int middle = (low + high) / 2;
int compare = opid - g_OpCodes[middle].m_OpId;
if (compare == 0) {
(this->*g_OpCodes[middle].m_OpHandler)();
return TRUE;
} else if (compare < 0) {
high = middle - 1;
} else {
low = middle + 1;
}
}
return m_CompatCount != 0;
}


This function is used in FoxitReader 2.4 and it is compiled as:



Dump of assembler code for function _ZN24CPDF_StreamContentParser10OnOperatorEPKc:
0x0000000000bc71fe <+0>: xor edx,edx
0x0000000000bc7200 <+2>: xor eax,eax
0x0000000000bc7202 <+4>: movsx r8d,BYTE PTR [rsi+rdx*1]
0x0000000000bc7207 <+9>: mov ecx,edx
0x0000000000bc7209 <+11>: test r8b,r8b
0x0000000000bc720c <+14>: je 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc720e <+16>: shl eax,0x8
0x0000000000bc7211 <+19>: inc rdx
0x0000000000bc7214 <+22>: add eax,r8d
0x0000000000bc7217 <+25>: cmp rdx,0x4
0x0000000000bc721b <+29>: jne 0xbc7202 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+4>
0x0000000000bc721d <+31>: mov ecx,0x4
0x0000000000bc7222 <+36>: cmp ecx,0x4
0x0000000000bc7225 <+39>: je 0xbc722e <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+48>
0x0000000000bc7227 <+41>: shl eax,0x8
0x0000000000bc722a <+44>: inc ecx
0x0000000000bc722c <+46>: jmp 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc722e <+48>: lea r10,[rip+0x180f43b] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc7235 <+55>: mov cl,0x48
0x0000000000bc7237 <+57>: xor esi,esi
0x0000000000bc7239 <+59>: lea edx,[rsi+rcx*1]
0x0000000000bc723c <+62>: sar edx,1
0x0000000000bc723e <+64>: movsxd r9,edx
0x0000000000bc7241 <+67>: imul r8,r9,0x18
0x0000000000bc7245 <+71>: add r8,r10
0x0000000000bc7248 <+74>: cmp eax,DWORD PTR [r8]
0x0000000000bc724b <+77>: jne 0xbc727c <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+126>
0x0000000000bc724d <+79>: push rcx
0x0000000000bc724e <+80>: mov rax,QWORD PTR [r8+0x8]
0x0000000000bc7252 <+84>: test al,0x1
0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000bc7263 <+101>: imul r9,r9,0x18
0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
0x0000000000bc7273 <+117>: call rax
0x0000000000bc7275 <+119>: mov eax,0x1
0x0000000000bc727a <+124>: pop rdx
0x0000000000bc727b <+125>: ret
0x0000000000bc727c <+126>: jns 0xbc7283 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+133>
0x0000000000bc727e <+128>: lea ecx,[rdx-0x1]
0x0000000000bc7281 <+131>: jmp 0xbc7286 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+136>
0x0000000000bc7283 <+133>: lea esi,[rdx+0x1]
0x0000000000bc7286 <+136>: cmp esi,ecx
0x0000000000bc7288 <+138>: jle 0xbc7239 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+59>
0x0000000000bc728a <+140>: xor eax,eax
0x0000000000bc728c <+142>: cmp DWORD PTR [rdi+0x454],0x0
0x0000000000bc7293 <+149>: setne al
0x0000000000bc7296 <+152>: ret
End of assembler dump.


Please note instructions +84 through +96 which corresponds to (this->*g_OpCodes[middle].m_OpHandler)();. The register rax holds the pointer to the handler function, but instead of calling it directly the compiler adds those instructions and test for the least significant bit, why?



I found other instances of this in other functions. Like in the function void CFX_Renderer::render(const Scanline& sl) in
https://github.com/priority5/qt/blob/2a6b2348ced4cb42e8e3c320a6e5aa3606c0d5a1/qtwebengine/src/3rdparty/chromium/third_party/pdfium/core/fxge/agg/fx_agg_driver.cpp :



 (this->*composite_span)(..args..);


which corresponds to:



   0x0000000000c40a6c <+480>:   test   dl,0x1
0x0000000000c40a6f <+483>: mov rax,rdx
0x0000000000c40a72 <+486>: je 0xc40a7d <_ZN12CFX_Renderer6renderIN5fxagg10scanline_uIhEEEEvRKT_+497>
0x0000000000c40a74 <+488>: mov rax,QWORD PTR [rbx+rdi*1]
0x0000000000c40a78 <+492>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000c40a7d <+497>: mov edx,DWORD PTR [r15+0x4]









share|improve this question

























  • Most things, like functions and (most) variables are alligned on some size that's not a multiple of a single byte. so the last bit will never be anything but zero.

    – Jesper Juhl
    Nov 21 '18 at 22:31








  • 1





    Obvious question - which compiler?

    – Neil Butterworth
    Nov 21 '18 at 22:32











  • @NeilButterworth I dont know .. I downloaded the compiled binary from foxitsoftware.com/downloads

    – Mansour
    Nov 21 '18 at 22:52











  • @JesperJuhl I see your point, but still I dont see why the check in first place ..

    – Mansour
    Nov 21 '18 at 22:59











  • Multiple inheritance shenanigans, most likely. Matters a lot for member function pointers that might reference an object of a class that uses virtual inheritance. Googling "v-table thunks" for the compiler you use could well pay off.

    – Hans Passant
    Nov 21 '18 at 23:31
















1















Consider this function in https://github.com/coolwanglu/PDFium.js/blob/master/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp:



FX_BOOL CPDF_StreamContentParser::OnOperator(FX_LPCSTR op)
{
int i = 0;
FX_DWORD opid = 0;
while (i < 4 && op[i]) {
opid = (opid << 8) + op[i];
i ++;
}
while (i < 4) {
opid <<= 8;
i ++;
};
int low = 0, high = sizeof g_OpCodes / sizeof(struct _OpCode) - 1;
while (low <= high) {
int middle = (low + high) / 2;
int compare = opid - g_OpCodes[middle].m_OpId;
if (compare == 0) {
(this->*g_OpCodes[middle].m_OpHandler)();
return TRUE;
} else if (compare < 0) {
high = middle - 1;
} else {
low = middle + 1;
}
}
return m_CompatCount != 0;
}


This function is used in FoxitReader 2.4 and it is compiled as:



Dump of assembler code for function _ZN24CPDF_StreamContentParser10OnOperatorEPKc:
0x0000000000bc71fe <+0>: xor edx,edx
0x0000000000bc7200 <+2>: xor eax,eax
0x0000000000bc7202 <+4>: movsx r8d,BYTE PTR [rsi+rdx*1]
0x0000000000bc7207 <+9>: mov ecx,edx
0x0000000000bc7209 <+11>: test r8b,r8b
0x0000000000bc720c <+14>: je 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc720e <+16>: shl eax,0x8
0x0000000000bc7211 <+19>: inc rdx
0x0000000000bc7214 <+22>: add eax,r8d
0x0000000000bc7217 <+25>: cmp rdx,0x4
0x0000000000bc721b <+29>: jne 0xbc7202 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+4>
0x0000000000bc721d <+31>: mov ecx,0x4
0x0000000000bc7222 <+36>: cmp ecx,0x4
0x0000000000bc7225 <+39>: je 0xbc722e <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+48>
0x0000000000bc7227 <+41>: shl eax,0x8
0x0000000000bc722a <+44>: inc ecx
0x0000000000bc722c <+46>: jmp 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc722e <+48>: lea r10,[rip+0x180f43b] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc7235 <+55>: mov cl,0x48
0x0000000000bc7237 <+57>: xor esi,esi
0x0000000000bc7239 <+59>: lea edx,[rsi+rcx*1]
0x0000000000bc723c <+62>: sar edx,1
0x0000000000bc723e <+64>: movsxd r9,edx
0x0000000000bc7241 <+67>: imul r8,r9,0x18
0x0000000000bc7245 <+71>: add r8,r10
0x0000000000bc7248 <+74>: cmp eax,DWORD PTR [r8]
0x0000000000bc724b <+77>: jne 0xbc727c <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+126>
0x0000000000bc724d <+79>: push rcx
0x0000000000bc724e <+80>: mov rax,QWORD PTR [r8+0x8]
0x0000000000bc7252 <+84>: test al,0x1
0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000bc7263 <+101>: imul r9,r9,0x18
0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
0x0000000000bc7273 <+117>: call rax
0x0000000000bc7275 <+119>: mov eax,0x1
0x0000000000bc727a <+124>: pop rdx
0x0000000000bc727b <+125>: ret
0x0000000000bc727c <+126>: jns 0xbc7283 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+133>
0x0000000000bc727e <+128>: lea ecx,[rdx-0x1]
0x0000000000bc7281 <+131>: jmp 0xbc7286 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+136>
0x0000000000bc7283 <+133>: lea esi,[rdx+0x1]
0x0000000000bc7286 <+136>: cmp esi,ecx
0x0000000000bc7288 <+138>: jle 0xbc7239 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+59>
0x0000000000bc728a <+140>: xor eax,eax
0x0000000000bc728c <+142>: cmp DWORD PTR [rdi+0x454],0x0
0x0000000000bc7293 <+149>: setne al
0x0000000000bc7296 <+152>: ret
End of assembler dump.


Please note instructions +84 through +96 which corresponds to (this->*g_OpCodes[middle].m_OpHandler)();. The register rax holds the pointer to the handler function, but instead of calling it directly the compiler adds those instructions and test for the least significant bit, why?



I found other instances of this in other functions. Like in the function void CFX_Renderer::render(const Scanline& sl) in
https://github.com/priority5/qt/blob/2a6b2348ced4cb42e8e3c320a6e5aa3606c0d5a1/qtwebengine/src/3rdparty/chromium/third_party/pdfium/core/fxge/agg/fx_agg_driver.cpp :



 (this->*composite_span)(..args..);


which corresponds to:



   0x0000000000c40a6c <+480>:   test   dl,0x1
0x0000000000c40a6f <+483>: mov rax,rdx
0x0000000000c40a72 <+486>: je 0xc40a7d <_ZN12CFX_Renderer6renderIN5fxagg10scanline_uIhEEEEvRKT_+497>
0x0000000000c40a74 <+488>: mov rax,QWORD PTR [rbx+rdi*1]
0x0000000000c40a78 <+492>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000c40a7d <+497>: mov edx,DWORD PTR [r15+0x4]









share|improve this question

























  • Most things, like functions and (most) variables are alligned on some size that's not a multiple of a single byte. so the last bit will never be anything but zero.

    – Jesper Juhl
    Nov 21 '18 at 22:31








  • 1





    Obvious question - which compiler?

    – Neil Butterworth
    Nov 21 '18 at 22:32











  • @NeilButterworth I dont know .. I downloaded the compiled binary from foxitsoftware.com/downloads

    – Mansour
    Nov 21 '18 at 22:52











  • @JesperJuhl I see your point, but still I dont see why the check in first place ..

    – Mansour
    Nov 21 '18 at 22:59











  • Multiple inheritance shenanigans, most likely. Matters a lot for member function pointers that might reference an object of a class that uses virtual inheritance. Googling "v-table thunks" for the compiler you use could well pay off.

    – Hans Passant
    Nov 21 '18 at 23:31














1












1








1


1






Consider this function in https://github.com/coolwanglu/PDFium.js/blob/master/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp:



FX_BOOL CPDF_StreamContentParser::OnOperator(FX_LPCSTR op)
{
int i = 0;
FX_DWORD opid = 0;
while (i < 4 && op[i]) {
opid = (opid << 8) + op[i];
i ++;
}
while (i < 4) {
opid <<= 8;
i ++;
};
int low = 0, high = sizeof g_OpCodes / sizeof(struct _OpCode) - 1;
while (low <= high) {
int middle = (low + high) / 2;
int compare = opid - g_OpCodes[middle].m_OpId;
if (compare == 0) {
(this->*g_OpCodes[middle].m_OpHandler)();
return TRUE;
} else if (compare < 0) {
high = middle - 1;
} else {
low = middle + 1;
}
}
return m_CompatCount != 0;
}


This function is used in FoxitReader 2.4 and it is compiled as:



Dump of assembler code for function _ZN24CPDF_StreamContentParser10OnOperatorEPKc:
0x0000000000bc71fe <+0>: xor edx,edx
0x0000000000bc7200 <+2>: xor eax,eax
0x0000000000bc7202 <+4>: movsx r8d,BYTE PTR [rsi+rdx*1]
0x0000000000bc7207 <+9>: mov ecx,edx
0x0000000000bc7209 <+11>: test r8b,r8b
0x0000000000bc720c <+14>: je 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc720e <+16>: shl eax,0x8
0x0000000000bc7211 <+19>: inc rdx
0x0000000000bc7214 <+22>: add eax,r8d
0x0000000000bc7217 <+25>: cmp rdx,0x4
0x0000000000bc721b <+29>: jne 0xbc7202 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+4>
0x0000000000bc721d <+31>: mov ecx,0x4
0x0000000000bc7222 <+36>: cmp ecx,0x4
0x0000000000bc7225 <+39>: je 0xbc722e <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+48>
0x0000000000bc7227 <+41>: shl eax,0x8
0x0000000000bc722a <+44>: inc ecx
0x0000000000bc722c <+46>: jmp 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc722e <+48>: lea r10,[rip+0x180f43b] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc7235 <+55>: mov cl,0x48
0x0000000000bc7237 <+57>: xor esi,esi
0x0000000000bc7239 <+59>: lea edx,[rsi+rcx*1]
0x0000000000bc723c <+62>: sar edx,1
0x0000000000bc723e <+64>: movsxd r9,edx
0x0000000000bc7241 <+67>: imul r8,r9,0x18
0x0000000000bc7245 <+71>: add r8,r10
0x0000000000bc7248 <+74>: cmp eax,DWORD PTR [r8]
0x0000000000bc724b <+77>: jne 0xbc727c <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+126>
0x0000000000bc724d <+79>: push rcx
0x0000000000bc724e <+80>: mov rax,QWORD PTR [r8+0x8]
0x0000000000bc7252 <+84>: test al,0x1
0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000bc7263 <+101>: imul r9,r9,0x18
0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
0x0000000000bc7273 <+117>: call rax
0x0000000000bc7275 <+119>: mov eax,0x1
0x0000000000bc727a <+124>: pop rdx
0x0000000000bc727b <+125>: ret
0x0000000000bc727c <+126>: jns 0xbc7283 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+133>
0x0000000000bc727e <+128>: lea ecx,[rdx-0x1]
0x0000000000bc7281 <+131>: jmp 0xbc7286 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+136>
0x0000000000bc7283 <+133>: lea esi,[rdx+0x1]
0x0000000000bc7286 <+136>: cmp esi,ecx
0x0000000000bc7288 <+138>: jle 0xbc7239 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+59>
0x0000000000bc728a <+140>: xor eax,eax
0x0000000000bc728c <+142>: cmp DWORD PTR [rdi+0x454],0x0
0x0000000000bc7293 <+149>: setne al
0x0000000000bc7296 <+152>: ret
End of assembler dump.


Please note instructions +84 through +96 which corresponds to (this->*g_OpCodes[middle].m_OpHandler)();. The register rax holds the pointer to the handler function, but instead of calling it directly the compiler adds those instructions and test for the least significant bit, why?



I found other instances of this in other functions. Like in the function void CFX_Renderer::render(const Scanline& sl) in
https://github.com/priority5/qt/blob/2a6b2348ced4cb42e8e3c320a6e5aa3606c0d5a1/qtwebengine/src/3rdparty/chromium/third_party/pdfium/core/fxge/agg/fx_agg_driver.cpp :



 (this->*composite_span)(..args..);


which corresponds to:



   0x0000000000c40a6c <+480>:   test   dl,0x1
0x0000000000c40a6f <+483>: mov rax,rdx
0x0000000000c40a72 <+486>: je 0xc40a7d <_ZN12CFX_Renderer6renderIN5fxagg10scanline_uIhEEEEvRKT_+497>
0x0000000000c40a74 <+488>: mov rax,QWORD PTR [rbx+rdi*1]
0x0000000000c40a78 <+492>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000c40a7d <+497>: mov edx,DWORD PTR [r15+0x4]









share|improve this question
















Consider this function in https://github.com/coolwanglu/PDFium.js/blob/master/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp:



FX_BOOL CPDF_StreamContentParser::OnOperator(FX_LPCSTR op)
{
int i = 0;
FX_DWORD opid = 0;
while (i < 4 && op[i]) {
opid = (opid << 8) + op[i];
i ++;
}
while (i < 4) {
opid <<= 8;
i ++;
};
int low = 0, high = sizeof g_OpCodes / sizeof(struct _OpCode) - 1;
while (low <= high) {
int middle = (low + high) / 2;
int compare = opid - g_OpCodes[middle].m_OpId;
if (compare == 0) {
(this->*g_OpCodes[middle].m_OpHandler)();
return TRUE;
} else if (compare < 0) {
high = middle - 1;
} else {
low = middle + 1;
}
}
return m_CompatCount != 0;
}


This function is used in FoxitReader 2.4 and it is compiled as:



Dump of assembler code for function _ZN24CPDF_StreamContentParser10OnOperatorEPKc:
0x0000000000bc71fe <+0>: xor edx,edx
0x0000000000bc7200 <+2>: xor eax,eax
0x0000000000bc7202 <+4>: movsx r8d,BYTE PTR [rsi+rdx*1]
0x0000000000bc7207 <+9>: mov ecx,edx
0x0000000000bc7209 <+11>: test r8b,r8b
0x0000000000bc720c <+14>: je 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc720e <+16>: shl eax,0x8
0x0000000000bc7211 <+19>: inc rdx
0x0000000000bc7214 <+22>: add eax,r8d
0x0000000000bc7217 <+25>: cmp rdx,0x4
0x0000000000bc721b <+29>: jne 0xbc7202 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+4>
0x0000000000bc721d <+31>: mov ecx,0x4
0x0000000000bc7222 <+36>: cmp ecx,0x4
0x0000000000bc7225 <+39>: je 0xbc722e <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+48>
0x0000000000bc7227 <+41>: shl eax,0x8
0x0000000000bc722a <+44>: inc ecx
0x0000000000bc722c <+46>: jmp 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc722e <+48>: lea r10,[rip+0x180f43b] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc7235 <+55>: mov cl,0x48
0x0000000000bc7237 <+57>: xor esi,esi
0x0000000000bc7239 <+59>: lea edx,[rsi+rcx*1]
0x0000000000bc723c <+62>: sar edx,1
0x0000000000bc723e <+64>: movsxd r9,edx
0x0000000000bc7241 <+67>: imul r8,r9,0x18
0x0000000000bc7245 <+71>: add r8,r10
0x0000000000bc7248 <+74>: cmp eax,DWORD PTR [r8]
0x0000000000bc724b <+77>: jne 0xbc727c <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+126>
0x0000000000bc724d <+79>: push rcx
0x0000000000bc724e <+80>: mov rax,QWORD PTR [r8+0x8]
0x0000000000bc7252 <+84>: test al,0x1
0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000bc7263 <+101>: imul r9,r9,0x18
0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
0x0000000000bc7273 <+117>: call rax
0x0000000000bc7275 <+119>: mov eax,0x1
0x0000000000bc727a <+124>: pop rdx
0x0000000000bc727b <+125>: ret
0x0000000000bc727c <+126>: jns 0xbc7283 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+133>
0x0000000000bc727e <+128>: lea ecx,[rdx-0x1]
0x0000000000bc7281 <+131>: jmp 0xbc7286 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+136>
0x0000000000bc7283 <+133>: lea esi,[rdx+0x1]
0x0000000000bc7286 <+136>: cmp esi,ecx
0x0000000000bc7288 <+138>: jle 0xbc7239 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+59>
0x0000000000bc728a <+140>: xor eax,eax
0x0000000000bc728c <+142>: cmp DWORD PTR [rdi+0x454],0x0
0x0000000000bc7293 <+149>: setne al
0x0000000000bc7296 <+152>: ret
End of assembler dump.


Please note instructions +84 through +96 which corresponds to (this->*g_OpCodes[middle].m_OpHandler)();. The register rax holds the pointer to the handler function, but instead of calling it directly the compiler adds those instructions and test for the least significant bit, why?



I found other instances of this in other functions. Like in the function void CFX_Renderer::render(const Scanline& sl) in
https://github.com/priority5/qt/blob/2a6b2348ced4cb42e8e3c320a6e5aa3606c0d5a1/qtwebengine/src/3rdparty/chromium/third_party/pdfium/core/fxge/agg/fx_agg_driver.cpp :



 (this->*composite_span)(..args..);


which corresponds to:



   0x0000000000c40a6c <+480>:   test   dl,0x1
0x0000000000c40a6f <+483>: mov rax,rdx
0x0000000000c40a72 <+486>: je 0xc40a7d <_ZN12CFX_Renderer6renderIN5fxagg10scanline_uIhEEEEvRKT_+497>
0x0000000000c40a74 <+488>: mov rax,QWORD PTR [rbx+rdi*1]
0x0000000000c40a78 <+492>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000c40a7d <+497>: mov edx,DWORD PTR [r15+0x4]






c++






share|improve this question















share|improve this question













share|improve this question




share|improve this question








edited Nov 21 '18 at 22:32









Neil Butterworth

27.1k54680




27.1k54680










asked Nov 21 '18 at 22:23









MansourMansour

113




113













  • Most things, like functions and (most) variables are alligned on some size that's not a multiple of a single byte. so the last bit will never be anything but zero.

    – Jesper Juhl
    Nov 21 '18 at 22:31








  • 1





    Obvious question - which compiler?

    – Neil Butterworth
    Nov 21 '18 at 22:32











  • @NeilButterworth I dont know .. I downloaded the compiled binary from foxitsoftware.com/downloads

    – Mansour
    Nov 21 '18 at 22:52











  • @JesperJuhl I see your point, but still I dont see why the check in first place ..

    – Mansour
    Nov 21 '18 at 22:59











  • Multiple inheritance shenanigans, most likely. Matters a lot for member function pointers that might reference an object of a class that uses virtual inheritance. Googling "v-table thunks" for the compiler you use could well pay off.

    – Hans Passant
    Nov 21 '18 at 23:31



















  • Most things, like functions and (most) variables are alligned on some size that's not a multiple of a single byte. so the last bit will never be anything but zero.

    – Jesper Juhl
    Nov 21 '18 at 22:31








  • 1





    Obvious question - which compiler?

    – Neil Butterworth
    Nov 21 '18 at 22:32











  • @NeilButterworth I dont know .. I downloaded the compiled binary from foxitsoftware.com/downloads

    – Mansour
    Nov 21 '18 at 22:52











  • @JesperJuhl I see your point, but still I dont see why the check in first place ..

    – Mansour
    Nov 21 '18 at 22:59











  • Multiple inheritance shenanigans, most likely. Matters a lot for member function pointers that might reference an object of a class that uses virtual inheritance. Googling "v-table thunks" for the compiler you use could well pay off.

    – Hans Passant
    Nov 21 '18 at 23:31

















Most things, like functions and (most) variables are alligned on some size that's not a multiple of a single byte. so the last bit will never be anything but zero.

– Jesper Juhl
Nov 21 '18 at 22:31







Most things, like functions and (most) variables are alligned on some size that's not a multiple of a single byte. so the last bit will never be anything but zero.

– Jesper Juhl
Nov 21 '18 at 22:31






1




1





Obvious question - which compiler?

– Neil Butterworth
Nov 21 '18 at 22:32





Obvious question - which compiler?

– Neil Butterworth
Nov 21 '18 at 22:32













@NeilButterworth I dont know .. I downloaded the compiled binary from foxitsoftware.com/downloads

– Mansour
Nov 21 '18 at 22:52





@NeilButterworth I dont know .. I downloaded the compiled binary from foxitsoftware.com/downloads

– Mansour
Nov 21 '18 at 22:52













@JesperJuhl I see your point, but still I dont see why the check in first place ..

– Mansour
Nov 21 '18 at 22:59





@JesperJuhl I see your point, but still I dont see why the check in first place ..

– Mansour
Nov 21 '18 at 22:59













Multiple inheritance shenanigans, most likely. Matters a lot for member function pointers that might reference an object of a class that uses virtual inheritance. Googling "v-table thunks" for the compiler you use could well pay off.

– Hans Passant
Nov 21 '18 at 23:31





Multiple inheritance shenanigans, most likely. Matters a lot for member function pointers that might reference an object of a class that uses virtual inheritance. Googling "v-table thunks" for the compiler you use could well pay off.

– Hans Passant
Nov 21 '18 at 23:31












1 Answer
1






active

oldest

votes


















3














I am guessing that the compiler uses odd addresses to indicate funky function pointers, perhaps indicating indirection through the class vtable, or some other mapping, perhaps for dynamic loaded code, where r8 is the class pointer?



My best guess is this is a pointer to member function, and that could either be a simple member function, or a virtual function. If it is simple it is the raw address and if it is virtual it is the funky pointer value.



If the "pointer" is even, then it is used directly, if it is odd it plings through r8 (probably the object instance) and rdi, and I have no idea what is in rdi, before applying rax as an offset with the 1 subtracted. It will depend heavily on the calling conventions of the platform and whatever tricks the compiler is playing.



0x0000000000bc724e <+80>:    mov    rax,QWORD PTR [r8+0x8]
0x0000000000bc7252 <+84>: test al,0x1
0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000bc7263 <+101>: imul r9,r9,0x18
0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
0x0000000000bc7273 <+117>: call rax





share|improve this answer























    Your Answer






    StackExchange.ifUsing("editor", function () {
    StackExchange.using("externalEditor", function () {
    StackExchange.using("snippets", function () {
    StackExchange.snippets.init();
    });
    });
    }, "code-snippets");

    StackExchange.ready(function() {
    var channelOptions = {
    tags: "".split(" "),
    id: "1"
    };
    initTagRenderer("".split(" "), "".split(" "), channelOptions);

    StackExchange.using("externalEditor", function() {
    // Have to fire editor after snippets, if snippets enabled
    if (StackExchange.settings.snippets.snippetsEnabled) {
    StackExchange.using("snippets", function() {
    createEditor();
    });
    }
    else {
    createEditor();
    }
    });

    function createEditor() {
    StackExchange.prepareEditor({
    heartbeatType: 'answer',
    autoActivateHeartbeat: false,
    convertImagesToLinks: true,
    noModals: true,
    showLowRepImageUploadWarning: true,
    reputationToPostImages: 10,
    bindNavPrevention: true,
    postfix: "",
    imageUploader: {
    brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
    contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
    allowUrls: true
    },
    onDemand: true,
    discardSelector: ".discard-answer"
    ,immediatelyShowMarkdownHelp:true
    });


    }
    });














    draft saved

    draft discarded


















    StackExchange.ready(
    function () {
    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53421279%2fwhy-compilers-test-the-least-significant-bit-in-an-address%23new-answer', 'question_page');
    }
    );

    Post as a guest















    Required, but never shown

























    1 Answer
    1






    active

    oldest

    votes








    1 Answer
    1






    active

    oldest

    votes









    active

    oldest

    votes






    active

    oldest

    votes









    3














    I am guessing that the compiler uses odd addresses to indicate funky function pointers, perhaps indicating indirection through the class vtable, or some other mapping, perhaps for dynamic loaded code, where r8 is the class pointer?



    My best guess is this is a pointer to member function, and that could either be a simple member function, or a virtual function. If it is simple it is the raw address and if it is virtual it is the funky pointer value.



    If the "pointer" is even, then it is used directly, if it is odd it plings through r8 (probably the object instance) and rdi, and I have no idea what is in rdi, before applying rax as an offset with the 1 subtracted. It will depend heavily on the calling conventions of the platform and whatever tricks the compiler is playing.



    0x0000000000bc724e <+80>:    mov    rax,QWORD PTR [r8+0x8]
    0x0000000000bc7252 <+84>: test al,0x1
    0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
    0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
    0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
    0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
    0x0000000000bc7263 <+101>: imul r9,r9,0x18
    0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
    0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
    0x0000000000bc7273 <+117>: call rax





    share|improve this answer




























      3














      I am guessing that the compiler uses odd addresses to indicate funky function pointers, perhaps indicating indirection through the class vtable, or some other mapping, perhaps for dynamic loaded code, where r8 is the class pointer?



      My best guess is this is a pointer to member function, and that could either be a simple member function, or a virtual function. If it is simple it is the raw address and if it is virtual it is the funky pointer value.



      If the "pointer" is even, then it is used directly, if it is odd it plings through r8 (probably the object instance) and rdi, and I have no idea what is in rdi, before applying rax as an offset with the 1 subtracted. It will depend heavily on the calling conventions of the platform and whatever tricks the compiler is playing.



      0x0000000000bc724e <+80>:    mov    rax,QWORD PTR [r8+0x8]
      0x0000000000bc7252 <+84>: test al,0x1
      0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
      0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
      0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
      0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
      0x0000000000bc7263 <+101>: imul r9,r9,0x18
      0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
      0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
      0x0000000000bc7273 <+117>: call rax





      share|improve this answer


























        3












        3








        3







        I am guessing that the compiler uses odd addresses to indicate funky function pointers, perhaps indicating indirection through the class vtable, or some other mapping, perhaps for dynamic loaded code, where r8 is the class pointer?



        My best guess is this is a pointer to member function, and that could either be a simple member function, or a virtual function. If it is simple it is the raw address and if it is virtual it is the funky pointer value.



        If the "pointer" is even, then it is used directly, if it is odd it plings through r8 (probably the object instance) and rdi, and I have no idea what is in rdi, before applying rax as an offset with the 1 subtracted. It will depend heavily on the calling conventions of the platform and whatever tricks the compiler is playing.



        0x0000000000bc724e <+80>:    mov    rax,QWORD PTR [r8+0x8]
        0x0000000000bc7252 <+84>: test al,0x1
        0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
        0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
        0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
        0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
        0x0000000000bc7263 <+101>: imul r9,r9,0x18
        0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
        0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
        0x0000000000bc7273 <+117>: call rax





        share|improve this answer













        I am guessing that the compiler uses odd addresses to indicate funky function pointers, perhaps indicating indirection through the class vtable, or some other mapping, perhaps for dynamic loaded code, where r8 is the class pointer?



        My best guess is this is a pointer to member function, and that could either be a simple member function, or a virtual function. If it is simple it is the raw address and if it is virtual it is the funky pointer value.



        If the "pointer" is even, then it is used directly, if it is odd it plings through r8 (probably the object instance) and rdi, and I have no idea what is in rdi, before applying rax as an offset with the 1 subtracted. It will depend heavily on the calling conventions of the platform and whatever tricks the compiler is playing.



        0x0000000000bc724e <+80>:    mov    rax,QWORD PTR [r8+0x8]
        0x0000000000bc7252 <+84>: test al,0x1
        0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
        0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
        0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
        0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
        0x0000000000bc7263 <+101>: imul r9,r9,0x18
        0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
        0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
        0x0000000000bc7273 <+117>: call rax






        share|improve this answer












        share|improve this answer



        share|improve this answer










        answered Nov 21 '18 at 23:20









        Gem TaylorGem Taylor

        1,943217




        1,943217






























            draft saved

            draft discarded




















































            Thanks for contributing an answer to Stack Overflow!


            • Please be sure to answer the question. Provide details and share your research!

            But avoid



            • Asking for help, clarification, or responding to other answers.

            • Making statements based on opinion; back them up with references or personal experience.


            To learn more, see our tips on writing great answers.




            draft saved


            draft discarded














            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53421279%2fwhy-compilers-test-the-least-significant-bit-in-an-address%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown





















































            Required, but never shown














            Required, but never shown












            Required, but never shown







            Required, but never shown

































            Required, but never shown














            Required, but never shown












            Required, but never shown







            Required, but never shown







            Popular posts from this blog

            Create new schema in PostgreSQL using DBeaver

            Deepest pit of an array with Javascript: test on Codility

            Fotorealismo