Why compilers test the least significant bit in an address?
Consider this function in https://github.com/coolwanglu/PDFium.js/blob/master/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp:
FX_BOOL CPDF_StreamContentParser::OnOperator(FX_LPCSTR op)
{
int i = 0;
FX_DWORD opid = 0;
while (i < 4 && op[i]) {
opid = (opid << 8) + op[i];
i ++;
}
while (i < 4) {
opid <<= 8;
i ++;
};
int low = 0, high = sizeof g_OpCodes / sizeof(struct _OpCode) - 1;
while (low <= high) {
int middle = (low + high) / 2;
int compare = opid - g_OpCodes[middle].m_OpId;
if (compare == 0) {
(this->*g_OpCodes[middle].m_OpHandler)();
return TRUE;
} else if (compare < 0) {
high = middle - 1;
} else {
low = middle + 1;
}
}
return m_CompatCount != 0;
}
This function is used in FoxitReader 2.4 and it is compiled as:
Dump of assembler code for function _ZN24CPDF_StreamContentParser10OnOperatorEPKc:
0x0000000000bc71fe <+0>: xor edx,edx
0x0000000000bc7200 <+2>: xor eax,eax
0x0000000000bc7202 <+4>: movsx r8d,BYTE PTR [rsi+rdx*1]
0x0000000000bc7207 <+9>: mov ecx,edx
0x0000000000bc7209 <+11>: test r8b,r8b
0x0000000000bc720c <+14>: je 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc720e <+16>: shl eax,0x8
0x0000000000bc7211 <+19>: inc rdx
0x0000000000bc7214 <+22>: add eax,r8d
0x0000000000bc7217 <+25>: cmp rdx,0x4
0x0000000000bc721b <+29>: jne 0xbc7202 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+4>
0x0000000000bc721d <+31>: mov ecx,0x4
0x0000000000bc7222 <+36>: cmp ecx,0x4
0x0000000000bc7225 <+39>: je 0xbc722e <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+48>
0x0000000000bc7227 <+41>: shl eax,0x8
0x0000000000bc722a <+44>: inc ecx
0x0000000000bc722c <+46>: jmp 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc722e <+48>: lea r10,[rip+0x180f43b] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc7235 <+55>: mov cl,0x48
0x0000000000bc7237 <+57>: xor esi,esi
0x0000000000bc7239 <+59>: lea edx,[rsi+rcx*1]
0x0000000000bc723c <+62>: sar edx,1
0x0000000000bc723e <+64>: movsxd r9,edx
0x0000000000bc7241 <+67>: imul r8,r9,0x18
0x0000000000bc7245 <+71>: add r8,r10
0x0000000000bc7248 <+74>: cmp eax,DWORD PTR [r8]
0x0000000000bc724b <+77>: jne 0xbc727c <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+126>
0x0000000000bc724d <+79>: push rcx
0x0000000000bc724e <+80>: mov rax,QWORD PTR [r8+0x8]
0x0000000000bc7252 <+84>: test al,0x1
0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000bc7263 <+101>: imul r9,r9,0x18
0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
0x0000000000bc7273 <+117>: call rax
0x0000000000bc7275 <+119>: mov eax,0x1
0x0000000000bc727a <+124>: pop rdx
0x0000000000bc727b <+125>: ret
0x0000000000bc727c <+126>: jns 0xbc7283 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+133>
0x0000000000bc727e <+128>: lea ecx,[rdx-0x1]
0x0000000000bc7281 <+131>: jmp 0xbc7286 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+136>
0x0000000000bc7283 <+133>: lea esi,[rdx+0x1]
0x0000000000bc7286 <+136>: cmp esi,ecx
0x0000000000bc7288 <+138>: jle 0xbc7239 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+59>
0x0000000000bc728a <+140>: xor eax,eax
0x0000000000bc728c <+142>: cmp DWORD PTR [rdi+0x454],0x0
0x0000000000bc7293 <+149>: setne al
0x0000000000bc7296 <+152>: ret
End of assembler dump.
Please note instructions +84 through +96 which corresponds to (this->*g_OpCodes[middle].m_OpHandler)();
. The register rax holds the pointer to the handler function, but instead of calling it directly the compiler adds those instructions and test for the least significant bit, why?
I found other instances of this in other functions. Like in the function void CFX_Renderer::render(const Scanline& sl) in
https://github.com/priority5/qt/blob/2a6b2348ced4cb42e8e3c320a6e5aa3606c0d5a1/qtwebengine/src/3rdparty/chromium/third_party/pdfium/core/fxge/agg/fx_agg_driver.cpp :
(this->*composite_span)(..args..);
which corresponds to:
0x0000000000c40a6c <+480>: test dl,0x1
0x0000000000c40a6f <+483>: mov rax,rdx
0x0000000000c40a72 <+486>: je 0xc40a7d <_ZN12CFX_Renderer6renderIN5fxagg10scanline_uIhEEEEvRKT_+497>
0x0000000000c40a74 <+488>: mov rax,QWORD PTR [rbx+rdi*1]
0x0000000000c40a78 <+492>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000c40a7d <+497>: mov edx,DWORD PTR [r15+0x4]
c++
|
show 1 more comment
Consider this function in https://github.com/coolwanglu/PDFium.js/blob/master/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp:
FX_BOOL CPDF_StreamContentParser::OnOperator(FX_LPCSTR op)
{
int i = 0;
FX_DWORD opid = 0;
while (i < 4 && op[i]) {
opid = (opid << 8) + op[i];
i ++;
}
while (i < 4) {
opid <<= 8;
i ++;
};
int low = 0, high = sizeof g_OpCodes / sizeof(struct _OpCode) - 1;
while (low <= high) {
int middle = (low + high) / 2;
int compare = opid - g_OpCodes[middle].m_OpId;
if (compare == 0) {
(this->*g_OpCodes[middle].m_OpHandler)();
return TRUE;
} else if (compare < 0) {
high = middle - 1;
} else {
low = middle + 1;
}
}
return m_CompatCount != 0;
}
This function is used in FoxitReader 2.4 and it is compiled as:
Dump of assembler code for function _ZN24CPDF_StreamContentParser10OnOperatorEPKc:
0x0000000000bc71fe <+0>: xor edx,edx
0x0000000000bc7200 <+2>: xor eax,eax
0x0000000000bc7202 <+4>: movsx r8d,BYTE PTR [rsi+rdx*1]
0x0000000000bc7207 <+9>: mov ecx,edx
0x0000000000bc7209 <+11>: test r8b,r8b
0x0000000000bc720c <+14>: je 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc720e <+16>: shl eax,0x8
0x0000000000bc7211 <+19>: inc rdx
0x0000000000bc7214 <+22>: add eax,r8d
0x0000000000bc7217 <+25>: cmp rdx,0x4
0x0000000000bc721b <+29>: jne 0xbc7202 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+4>
0x0000000000bc721d <+31>: mov ecx,0x4
0x0000000000bc7222 <+36>: cmp ecx,0x4
0x0000000000bc7225 <+39>: je 0xbc722e <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+48>
0x0000000000bc7227 <+41>: shl eax,0x8
0x0000000000bc722a <+44>: inc ecx
0x0000000000bc722c <+46>: jmp 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc722e <+48>: lea r10,[rip+0x180f43b] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc7235 <+55>: mov cl,0x48
0x0000000000bc7237 <+57>: xor esi,esi
0x0000000000bc7239 <+59>: lea edx,[rsi+rcx*1]
0x0000000000bc723c <+62>: sar edx,1
0x0000000000bc723e <+64>: movsxd r9,edx
0x0000000000bc7241 <+67>: imul r8,r9,0x18
0x0000000000bc7245 <+71>: add r8,r10
0x0000000000bc7248 <+74>: cmp eax,DWORD PTR [r8]
0x0000000000bc724b <+77>: jne 0xbc727c <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+126>
0x0000000000bc724d <+79>: push rcx
0x0000000000bc724e <+80>: mov rax,QWORD PTR [r8+0x8]
0x0000000000bc7252 <+84>: test al,0x1
0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000bc7263 <+101>: imul r9,r9,0x18
0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
0x0000000000bc7273 <+117>: call rax
0x0000000000bc7275 <+119>: mov eax,0x1
0x0000000000bc727a <+124>: pop rdx
0x0000000000bc727b <+125>: ret
0x0000000000bc727c <+126>: jns 0xbc7283 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+133>
0x0000000000bc727e <+128>: lea ecx,[rdx-0x1]
0x0000000000bc7281 <+131>: jmp 0xbc7286 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+136>
0x0000000000bc7283 <+133>: lea esi,[rdx+0x1]
0x0000000000bc7286 <+136>: cmp esi,ecx
0x0000000000bc7288 <+138>: jle 0xbc7239 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+59>
0x0000000000bc728a <+140>: xor eax,eax
0x0000000000bc728c <+142>: cmp DWORD PTR [rdi+0x454],0x0
0x0000000000bc7293 <+149>: setne al
0x0000000000bc7296 <+152>: ret
End of assembler dump.
Please note instructions +84 through +96 which corresponds to (this->*g_OpCodes[middle].m_OpHandler)();
. The register rax holds the pointer to the handler function, but instead of calling it directly the compiler adds those instructions and test for the least significant bit, why?
I found other instances of this in other functions. Like in the function void CFX_Renderer::render(const Scanline& sl) in
https://github.com/priority5/qt/blob/2a6b2348ced4cb42e8e3c320a6e5aa3606c0d5a1/qtwebengine/src/3rdparty/chromium/third_party/pdfium/core/fxge/agg/fx_agg_driver.cpp :
(this->*composite_span)(..args..);
which corresponds to:
0x0000000000c40a6c <+480>: test dl,0x1
0x0000000000c40a6f <+483>: mov rax,rdx
0x0000000000c40a72 <+486>: je 0xc40a7d <_ZN12CFX_Renderer6renderIN5fxagg10scanline_uIhEEEEvRKT_+497>
0x0000000000c40a74 <+488>: mov rax,QWORD PTR [rbx+rdi*1]
0x0000000000c40a78 <+492>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000c40a7d <+497>: mov edx,DWORD PTR [r15+0x4]
c++
Most things, like functions and (most) variables are alligned on some size that's not a multiple of a single byte. so the last bit will never be anything but zero.
– Jesper Juhl
Nov 21 '18 at 22:31
1
Obvious question - which compiler?
– Neil Butterworth
Nov 21 '18 at 22:32
@NeilButterworth I dont know .. I downloaded the compiled binary from foxitsoftware.com/downloads
– Mansour
Nov 21 '18 at 22:52
@JesperJuhl I see your point, but still I dont see why the check in first place ..
– Mansour
Nov 21 '18 at 22:59
Multiple inheritance shenanigans, most likely. Matters a lot for member function pointers that might reference an object of a class that uses virtual inheritance. Googling "v-table thunks" for the compiler you use could well pay off.
– Hans Passant
Nov 21 '18 at 23:31
|
show 1 more comment
Consider this function in https://github.com/coolwanglu/PDFium.js/blob/master/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp:
FX_BOOL CPDF_StreamContentParser::OnOperator(FX_LPCSTR op)
{
int i = 0;
FX_DWORD opid = 0;
while (i < 4 && op[i]) {
opid = (opid << 8) + op[i];
i ++;
}
while (i < 4) {
opid <<= 8;
i ++;
};
int low = 0, high = sizeof g_OpCodes / sizeof(struct _OpCode) - 1;
while (low <= high) {
int middle = (low + high) / 2;
int compare = opid - g_OpCodes[middle].m_OpId;
if (compare == 0) {
(this->*g_OpCodes[middle].m_OpHandler)();
return TRUE;
} else if (compare < 0) {
high = middle - 1;
} else {
low = middle + 1;
}
}
return m_CompatCount != 0;
}
This function is used in FoxitReader 2.4 and it is compiled as:
Dump of assembler code for function _ZN24CPDF_StreamContentParser10OnOperatorEPKc:
0x0000000000bc71fe <+0>: xor edx,edx
0x0000000000bc7200 <+2>: xor eax,eax
0x0000000000bc7202 <+4>: movsx r8d,BYTE PTR [rsi+rdx*1]
0x0000000000bc7207 <+9>: mov ecx,edx
0x0000000000bc7209 <+11>: test r8b,r8b
0x0000000000bc720c <+14>: je 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc720e <+16>: shl eax,0x8
0x0000000000bc7211 <+19>: inc rdx
0x0000000000bc7214 <+22>: add eax,r8d
0x0000000000bc7217 <+25>: cmp rdx,0x4
0x0000000000bc721b <+29>: jne 0xbc7202 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+4>
0x0000000000bc721d <+31>: mov ecx,0x4
0x0000000000bc7222 <+36>: cmp ecx,0x4
0x0000000000bc7225 <+39>: je 0xbc722e <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+48>
0x0000000000bc7227 <+41>: shl eax,0x8
0x0000000000bc722a <+44>: inc ecx
0x0000000000bc722c <+46>: jmp 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc722e <+48>: lea r10,[rip+0x180f43b] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc7235 <+55>: mov cl,0x48
0x0000000000bc7237 <+57>: xor esi,esi
0x0000000000bc7239 <+59>: lea edx,[rsi+rcx*1]
0x0000000000bc723c <+62>: sar edx,1
0x0000000000bc723e <+64>: movsxd r9,edx
0x0000000000bc7241 <+67>: imul r8,r9,0x18
0x0000000000bc7245 <+71>: add r8,r10
0x0000000000bc7248 <+74>: cmp eax,DWORD PTR [r8]
0x0000000000bc724b <+77>: jne 0xbc727c <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+126>
0x0000000000bc724d <+79>: push rcx
0x0000000000bc724e <+80>: mov rax,QWORD PTR [r8+0x8]
0x0000000000bc7252 <+84>: test al,0x1
0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000bc7263 <+101>: imul r9,r9,0x18
0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
0x0000000000bc7273 <+117>: call rax
0x0000000000bc7275 <+119>: mov eax,0x1
0x0000000000bc727a <+124>: pop rdx
0x0000000000bc727b <+125>: ret
0x0000000000bc727c <+126>: jns 0xbc7283 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+133>
0x0000000000bc727e <+128>: lea ecx,[rdx-0x1]
0x0000000000bc7281 <+131>: jmp 0xbc7286 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+136>
0x0000000000bc7283 <+133>: lea esi,[rdx+0x1]
0x0000000000bc7286 <+136>: cmp esi,ecx
0x0000000000bc7288 <+138>: jle 0xbc7239 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+59>
0x0000000000bc728a <+140>: xor eax,eax
0x0000000000bc728c <+142>: cmp DWORD PTR [rdi+0x454],0x0
0x0000000000bc7293 <+149>: setne al
0x0000000000bc7296 <+152>: ret
End of assembler dump.
Please note instructions +84 through +96 which corresponds to (this->*g_OpCodes[middle].m_OpHandler)();
. The register rax holds the pointer to the handler function, but instead of calling it directly the compiler adds those instructions and test for the least significant bit, why?
I found other instances of this in other functions. Like in the function void CFX_Renderer::render(const Scanline& sl) in
https://github.com/priority5/qt/blob/2a6b2348ced4cb42e8e3c320a6e5aa3606c0d5a1/qtwebengine/src/3rdparty/chromium/third_party/pdfium/core/fxge/agg/fx_agg_driver.cpp :
(this->*composite_span)(..args..);
which corresponds to:
0x0000000000c40a6c <+480>: test dl,0x1
0x0000000000c40a6f <+483>: mov rax,rdx
0x0000000000c40a72 <+486>: je 0xc40a7d <_ZN12CFX_Renderer6renderIN5fxagg10scanline_uIhEEEEvRKT_+497>
0x0000000000c40a74 <+488>: mov rax,QWORD PTR [rbx+rdi*1]
0x0000000000c40a78 <+492>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000c40a7d <+497>: mov edx,DWORD PTR [r15+0x4]
c++
Consider this function in https://github.com/coolwanglu/PDFium.js/blob/master/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp:
FX_BOOL CPDF_StreamContentParser::OnOperator(FX_LPCSTR op)
{
int i = 0;
FX_DWORD opid = 0;
while (i < 4 && op[i]) {
opid = (opid << 8) + op[i];
i ++;
}
while (i < 4) {
opid <<= 8;
i ++;
};
int low = 0, high = sizeof g_OpCodes / sizeof(struct _OpCode) - 1;
while (low <= high) {
int middle = (low + high) / 2;
int compare = opid - g_OpCodes[middle].m_OpId;
if (compare == 0) {
(this->*g_OpCodes[middle].m_OpHandler)();
return TRUE;
} else if (compare < 0) {
high = middle - 1;
} else {
low = middle + 1;
}
}
return m_CompatCount != 0;
}
This function is used in FoxitReader 2.4 and it is compiled as:
Dump of assembler code for function _ZN24CPDF_StreamContentParser10OnOperatorEPKc:
0x0000000000bc71fe <+0>: xor edx,edx
0x0000000000bc7200 <+2>: xor eax,eax
0x0000000000bc7202 <+4>: movsx r8d,BYTE PTR [rsi+rdx*1]
0x0000000000bc7207 <+9>: mov ecx,edx
0x0000000000bc7209 <+11>: test r8b,r8b
0x0000000000bc720c <+14>: je 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc720e <+16>: shl eax,0x8
0x0000000000bc7211 <+19>: inc rdx
0x0000000000bc7214 <+22>: add eax,r8d
0x0000000000bc7217 <+25>: cmp rdx,0x4
0x0000000000bc721b <+29>: jne 0xbc7202 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+4>
0x0000000000bc721d <+31>: mov ecx,0x4
0x0000000000bc7222 <+36>: cmp ecx,0x4
0x0000000000bc7225 <+39>: je 0xbc722e <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+48>
0x0000000000bc7227 <+41>: shl eax,0x8
0x0000000000bc722a <+44>: inc ecx
0x0000000000bc722c <+46>: jmp 0xbc7222 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+36>
0x0000000000bc722e <+48>: lea r10,[rip+0x180f43b] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc7235 <+55>: mov cl,0x48
0x0000000000bc7237 <+57>: xor esi,esi
0x0000000000bc7239 <+59>: lea edx,[rsi+rcx*1]
0x0000000000bc723c <+62>: sar edx,1
0x0000000000bc723e <+64>: movsxd r9,edx
0x0000000000bc7241 <+67>: imul r8,r9,0x18
0x0000000000bc7245 <+71>: add r8,r10
0x0000000000bc7248 <+74>: cmp eax,DWORD PTR [r8]
0x0000000000bc724b <+77>: jne 0xbc727c <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+126>
0x0000000000bc724d <+79>: push rcx
0x0000000000bc724e <+80>: mov rax,QWORD PTR [r8+0x8]
0x0000000000bc7252 <+84>: test al,0x1
0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000bc7263 <+101>: imul r9,r9,0x18
0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
0x0000000000bc7273 <+117>: call rax
0x0000000000bc7275 <+119>: mov eax,0x1
0x0000000000bc727a <+124>: pop rdx
0x0000000000bc727b <+125>: ret
0x0000000000bc727c <+126>: jns 0xbc7283 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+133>
0x0000000000bc727e <+128>: lea ecx,[rdx-0x1]
0x0000000000bc7281 <+131>: jmp 0xbc7286 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+136>
0x0000000000bc7283 <+133>: lea esi,[rdx+0x1]
0x0000000000bc7286 <+136>: cmp esi,ecx
0x0000000000bc7288 <+138>: jle 0xbc7239 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+59>
0x0000000000bc728a <+140>: xor eax,eax
0x0000000000bc728c <+142>: cmp DWORD PTR [rdi+0x454],0x0
0x0000000000bc7293 <+149>: setne al
0x0000000000bc7296 <+152>: ret
End of assembler dump.
Please note instructions +84 through +96 which corresponds to (this->*g_OpCodes[middle].m_OpHandler)();
. The register rax holds the pointer to the handler function, but instead of calling it directly the compiler adds those instructions and test for the least significant bit, why?
I found other instances of this in other functions. Like in the function void CFX_Renderer::render(const Scanline& sl) in
https://github.com/priority5/qt/blob/2a6b2348ced4cb42e8e3c320a6e5aa3606c0d5a1/qtwebengine/src/3rdparty/chromium/third_party/pdfium/core/fxge/agg/fx_agg_driver.cpp :
(this->*composite_span)(..args..);
which corresponds to:
0x0000000000c40a6c <+480>: test dl,0x1
0x0000000000c40a6f <+483>: mov rax,rdx
0x0000000000c40a72 <+486>: je 0xc40a7d <_ZN12CFX_Renderer6renderIN5fxagg10scanline_uIhEEEEvRKT_+497>
0x0000000000c40a74 <+488>: mov rax,QWORD PTR [rbx+rdi*1]
0x0000000000c40a78 <+492>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000c40a7d <+497>: mov edx,DWORD PTR [r15+0x4]
c++
c++
edited Nov 21 '18 at 22:32
Neil Butterworth
27.1k54680
27.1k54680
asked Nov 21 '18 at 22:23
MansourMansour
113
113
Most things, like functions and (most) variables are alligned on some size that's not a multiple of a single byte. so the last bit will never be anything but zero.
– Jesper Juhl
Nov 21 '18 at 22:31
1
Obvious question - which compiler?
– Neil Butterworth
Nov 21 '18 at 22:32
@NeilButterworth I dont know .. I downloaded the compiled binary from foxitsoftware.com/downloads
– Mansour
Nov 21 '18 at 22:52
@JesperJuhl I see your point, but still I dont see why the check in first place ..
– Mansour
Nov 21 '18 at 22:59
Multiple inheritance shenanigans, most likely. Matters a lot for member function pointers that might reference an object of a class that uses virtual inheritance. Googling "v-table thunks" for the compiler you use could well pay off.
– Hans Passant
Nov 21 '18 at 23:31
|
show 1 more comment
Most things, like functions and (most) variables are alligned on some size that's not a multiple of a single byte. so the last bit will never be anything but zero.
– Jesper Juhl
Nov 21 '18 at 22:31
1
Obvious question - which compiler?
– Neil Butterworth
Nov 21 '18 at 22:32
@NeilButterworth I dont know .. I downloaded the compiled binary from foxitsoftware.com/downloads
– Mansour
Nov 21 '18 at 22:52
@JesperJuhl I see your point, but still I dont see why the check in first place ..
– Mansour
Nov 21 '18 at 22:59
Multiple inheritance shenanigans, most likely. Matters a lot for member function pointers that might reference an object of a class that uses virtual inheritance. Googling "v-table thunks" for the compiler you use could well pay off.
– Hans Passant
Nov 21 '18 at 23:31
Most things, like functions and (most) variables are alligned on some size that's not a multiple of a single byte. so the last bit will never be anything but zero.
– Jesper Juhl
Nov 21 '18 at 22:31
Most things, like functions and (most) variables are alligned on some size that's not a multiple of a single byte. so the last bit will never be anything but zero.
– Jesper Juhl
Nov 21 '18 at 22:31
1
1
Obvious question - which compiler?
– Neil Butterworth
Nov 21 '18 at 22:32
Obvious question - which compiler?
– Neil Butterworth
Nov 21 '18 at 22:32
@NeilButterworth I dont know .. I downloaded the compiled binary from foxitsoftware.com/downloads
– Mansour
Nov 21 '18 at 22:52
@NeilButterworth I dont know .. I downloaded the compiled binary from foxitsoftware.com/downloads
– Mansour
Nov 21 '18 at 22:52
@JesperJuhl I see your point, but still I dont see why the check in first place ..
– Mansour
Nov 21 '18 at 22:59
@JesperJuhl I see your point, but still I dont see why the check in first place ..
– Mansour
Nov 21 '18 at 22:59
Multiple inheritance shenanigans, most likely. Matters a lot for member function pointers that might reference an object of a class that uses virtual inheritance. Googling "v-table thunks" for the compiler you use could well pay off.
– Hans Passant
Nov 21 '18 at 23:31
Multiple inheritance shenanigans, most likely. Matters a lot for member function pointers that might reference an object of a class that uses virtual inheritance. Googling "v-table thunks" for the compiler you use could well pay off.
– Hans Passant
Nov 21 '18 at 23:31
|
show 1 more comment
1 Answer
1
active
oldest
votes
I am guessing that the compiler uses odd addresses to indicate funky function pointers, perhaps indicating indirection through the class vtable, or some other mapping, perhaps for dynamic loaded code, where r8 is the class pointer?
My best guess is this is a pointer to member function, and that could either be a simple member function, or a virtual function. If it is simple it is the raw address and if it is virtual it is the funky pointer value.
If the "pointer" is even, then it is used directly, if it is odd it plings through r8 (probably the object instance) and rdi, and I have no idea what is in rdi, before applying rax as an offset with the 1 subtracted. It will depend heavily on the calling conventions of the platform and whatever tricks the compiler is playing.
0x0000000000bc724e <+80>: mov rax,QWORD PTR [r8+0x8]
0x0000000000bc7252 <+84>: test al,0x1
0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000bc7263 <+101>: imul r9,r9,0x18
0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
0x0000000000bc7273 <+117>: call rax
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53421279%2fwhy-compilers-test-the-least-significant-bit-in-an-address%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
I am guessing that the compiler uses odd addresses to indicate funky function pointers, perhaps indicating indirection through the class vtable, or some other mapping, perhaps for dynamic loaded code, where r8 is the class pointer?
My best guess is this is a pointer to member function, and that could either be a simple member function, or a virtual function. If it is simple it is the raw address and if it is virtual it is the funky pointer value.
If the "pointer" is even, then it is used directly, if it is odd it plings through r8 (probably the object instance) and rdi, and I have no idea what is in rdi, before applying rax as an offset with the 1 subtracted. It will depend heavily on the calling conventions of the platform and whatever tricks the compiler is playing.
0x0000000000bc724e <+80>: mov rax,QWORD PTR [r8+0x8]
0x0000000000bc7252 <+84>: test al,0x1
0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000bc7263 <+101>: imul r9,r9,0x18
0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
0x0000000000bc7273 <+117>: call rax
add a comment |
I am guessing that the compiler uses odd addresses to indicate funky function pointers, perhaps indicating indirection through the class vtable, or some other mapping, perhaps for dynamic loaded code, where r8 is the class pointer?
My best guess is this is a pointer to member function, and that could either be a simple member function, or a virtual function. If it is simple it is the raw address and if it is virtual it is the funky pointer value.
If the "pointer" is even, then it is used directly, if it is odd it plings through r8 (probably the object instance) and rdi, and I have no idea what is in rdi, before applying rax as an offset with the 1 subtracted. It will depend heavily on the calling conventions of the platform and whatever tricks the compiler is playing.
0x0000000000bc724e <+80>: mov rax,QWORD PTR [r8+0x8]
0x0000000000bc7252 <+84>: test al,0x1
0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000bc7263 <+101>: imul r9,r9,0x18
0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
0x0000000000bc7273 <+117>: call rax
add a comment |
I am guessing that the compiler uses odd addresses to indicate funky function pointers, perhaps indicating indirection through the class vtable, or some other mapping, perhaps for dynamic loaded code, where r8 is the class pointer?
My best guess is this is a pointer to member function, and that could either be a simple member function, or a virtual function. If it is simple it is the raw address and if it is virtual it is the funky pointer value.
If the "pointer" is even, then it is used directly, if it is odd it plings through r8 (probably the object instance) and rdi, and I have no idea what is in rdi, before applying rax as an offset with the 1 subtracted. It will depend heavily on the calling conventions of the platform and whatever tricks the compiler is playing.
0x0000000000bc724e <+80>: mov rax,QWORD PTR [r8+0x8]
0x0000000000bc7252 <+84>: test al,0x1
0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000bc7263 <+101>: imul r9,r9,0x18
0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
0x0000000000bc7273 <+117>: call rax
I am guessing that the compiler uses odd addresses to indicate funky function pointers, perhaps indicating indirection through the class vtable, or some other mapping, perhaps for dynamic loaded code, where r8 is the class pointer?
My best guess is this is a pointer to member function, and that could either be a simple member function, or a virtual function. If it is simple it is the raw address and if it is virtual it is the funky pointer value.
If the "pointer" is even, then it is used directly, if it is odd it plings through r8 (probably the object instance) and rdi, and I have no idea what is in rdi, before applying rax as an offset with the 1 subtracted. It will depend heavily on the calling conventions of the platform and whatever tricks the compiler is playing.
0x0000000000bc724e <+80>: mov rax,QWORD PTR [r8+0x8]
0x0000000000bc7252 <+84>: test al,0x1
0x0000000000bc7254 <+86>: je 0xbc7263 <_ZN24CPDF_StreamContentParser10OnOperatorEPKc+101>
0x0000000000bc7256 <+88>: mov rdx,QWORD PTR [r8+0x10]
0x0000000000bc725a <+92>: mov rdx,QWORD PTR [rdi+rdx*1]
0x0000000000bc725e <+96>: mov rax,QWORD PTR [rdx+rax*1-0x1]
0x0000000000bc7263 <+101>: imul r9,r9,0x18
0x0000000000bc7267 <+105>: lea rdx,[rip+0x180f402] # 0x23d6670 <_ZL9g_OpCodes>
0x0000000000bc726e <+112>: add rdi,QWORD PTR [rdx+r9*1+0x10]
0x0000000000bc7273 <+117>: call rax
answered Nov 21 '18 at 23:20
Gem TaylorGem Taylor
1,943217
1,943217
add a comment |
add a comment |
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53421279%2fwhy-compilers-test-the-least-significant-bit-in-an-address%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Most things, like functions and (most) variables are alligned on some size that's not a multiple of a single byte. so the last bit will never be anything but zero.
– Jesper Juhl
Nov 21 '18 at 22:31
1
Obvious question - which compiler?
– Neil Butterworth
Nov 21 '18 at 22:32
@NeilButterworth I dont know .. I downloaded the compiled binary from foxitsoftware.com/downloads
– Mansour
Nov 21 '18 at 22:52
@JesperJuhl I see your point, but still I dont see why the check in first place ..
– Mansour
Nov 21 '18 at 22:59
Multiple inheritance shenanigans, most likely. Matters a lot for member function pointers that might reference an object of a class that uses virtual inheritance. Googling "v-table thunks" for the compiler you use could well pay off.
– Hans Passant
Nov 21 '18 at 23:31