Different gcc assembly when using designated initializers
I was checking some gcc generated assembly for ARM and noticed that I get strange results if I use designated initializers:
E.g. if I have this code:
struct test
{
int x;
int y;
};
__attribute__((noinline))
struct test get_struct_1(void)
{
struct test x;
x.x = 123456780;
x.y = 123456781;
return x;
}
__attribute__((noinline))
struct test get_struct_2(void)
{
return (struct test){ .x = 123456780, .y = 123456781 };
}
I get the following output with gcc -O2 -std=C11 for ARM (ARM GCC 6.3.0):
get_struct_1:
ldr r1, .L2
ldr r2, .L2+4
stm r0, {r1, r2}
bx lr
.L2:
.word 123456780
.word 123456781
get_struct_2: // <--- what is happening here
mov r3, r0
ldr r2, .L5
ldm r2, {r0, r1}
stm r3, {r0, r1}
mov r0, r3
bx lr
.L5:
.word .LANCHOR0
I can see the constants for the first function, but I don't understand how get_struct_2
works.
If I compile for x86, both functions just load the same single 64-bit value in a single instruction.
get_struct_1:
movabs rax, 530242836987890956
ret
get_struct_2:
movabs rax, 530242836987890956
ret
Am I provoking some undefined behavior, or is this .LANCHOR0
somehow related to these constants?
c gcc assembly arm
add a comment |
I was checking some gcc generated assembly for ARM and noticed that I get strange results if I use designated initializers:
E.g. if I have this code:
struct test
{
int x;
int y;
};
__attribute__((noinline))
struct test get_struct_1(void)
{
struct test x;
x.x = 123456780;
x.y = 123456781;
return x;
}
__attribute__((noinline))
struct test get_struct_2(void)
{
return (struct test){ .x = 123456780, .y = 123456781 };
}
I get the following output with gcc -O2 -std=C11 for ARM (ARM GCC 6.3.0):
get_struct_1:
ldr r1, .L2
ldr r2, .L2+4
stm r0, {r1, r2}
bx lr
.L2:
.word 123456780
.word 123456781
get_struct_2: // <--- what is happening here
mov r3, r0
ldr r2, .L5
ldm r2, {r0, r1}
stm r3, {r0, r1}
mov r0, r3
bx lr
.L5:
.word .LANCHOR0
I can see the constants for the first function, but I don't understand how get_struct_2
works.
If I compile for x86, both functions just load the same single 64-bit value in a single instruction.
get_struct_1:
movabs rax, 530242836987890956
ret
get_struct_2:
movabs rax, 530242836987890956
ret
Am I provoking some undefined behavior, or is this .LANCHOR0
somehow related to these constants?
c gcc assembly arm
You did not show.LANCHOR0
but I assume it has your two constants in it.
– Jester
Nov 26 '18 at 12:28
@Jester: that's right, if I turn off "unused labels filtering", I can find those values at.LANCHOR0
. That makes sense, even if I don't understand why the assembly is different.
– Lou
Nov 26 '18 at 12:31
add a comment |
I was checking some gcc generated assembly for ARM and noticed that I get strange results if I use designated initializers:
E.g. if I have this code:
struct test
{
int x;
int y;
};
__attribute__((noinline))
struct test get_struct_1(void)
{
struct test x;
x.x = 123456780;
x.y = 123456781;
return x;
}
__attribute__((noinline))
struct test get_struct_2(void)
{
return (struct test){ .x = 123456780, .y = 123456781 };
}
I get the following output with gcc -O2 -std=C11 for ARM (ARM GCC 6.3.0):
get_struct_1:
ldr r1, .L2
ldr r2, .L2+4
stm r0, {r1, r2}
bx lr
.L2:
.word 123456780
.word 123456781
get_struct_2: // <--- what is happening here
mov r3, r0
ldr r2, .L5
ldm r2, {r0, r1}
stm r3, {r0, r1}
mov r0, r3
bx lr
.L5:
.word .LANCHOR0
I can see the constants for the first function, but I don't understand how get_struct_2
works.
If I compile for x86, both functions just load the same single 64-bit value in a single instruction.
get_struct_1:
movabs rax, 530242836987890956
ret
get_struct_2:
movabs rax, 530242836987890956
ret
Am I provoking some undefined behavior, or is this .LANCHOR0
somehow related to these constants?
c gcc assembly arm
I was checking some gcc generated assembly for ARM and noticed that I get strange results if I use designated initializers:
E.g. if I have this code:
struct test
{
int x;
int y;
};
__attribute__((noinline))
struct test get_struct_1(void)
{
struct test x;
x.x = 123456780;
x.y = 123456781;
return x;
}
__attribute__((noinline))
struct test get_struct_2(void)
{
return (struct test){ .x = 123456780, .y = 123456781 };
}
I get the following output with gcc -O2 -std=C11 for ARM (ARM GCC 6.3.0):
get_struct_1:
ldr r1, .L2
ldr r2, .L2+4
stm r0, {r1, r2}
bx lr
.L2:
.word 123456780
.word 123456781
get_struct_2: // <--- what is happening here
mov r3, r0
ldr r2, .L5
ldm r2, {r0, r1}
stm r3, {r0, r1}
mov r0, r3
bx lr
.L5:
.word .LANCHOR0
I can see the constants for the first function, but I don't understand how get_struct_2
works.
If I compile for x86, both functions just load the same single 64-bit value in a single instruction.
get_struct_1:
movabs rax, 530242836987890956
ret
get_struct_2:
movabs rax, 530242836987890956
ret
Am I provoking some undefined behavior, or is this .LANCHOR0
somehow related to these constants?
c gcc assembly arm
c gcc assembly arm
asked Nov 26 '18 at 12:22
LouLou
1,5851549
1,5851549
You did not show.LANCHOR0
but I assume it has your two constants in it.
– Jester
Nov 26 '18 at 12:28
@Jester: that's right, if I turn off "unused labels filtering", I can find those values at.LANCHOR0
. That makes sense, even if I don't understand why the assembly is different.
– Lou
Nov 26 '18 at 12:31
add a comment |
You did not show.LANCHOR0
but I assume it has your two constants in it.
– Jester
Nov 26 '18 at 12:28
@Jester: that's right, if I turn off "unused labels filtering", I can find those values at.LANCHOR0
. That makes sense, even if I don't understand why the assembly is different.
– Lou
Nov 26 '18 at 12:31
You did not show
.LANCHOR0
but I assume it has your two constants in it.– Jester
Nov 26 '18 at 12:28
You did not show
.LANCHOR0
but I assume it has your two constants in it.– Jester
Nov 26 '18 at 12:28
@Jester: that's right, if I turn off "unused labels filtering", I can find those values at
.LANCHOR0
. That makes sense, even if I don't understand why the assembly is different.– Lou
Nov 26 '18 at 12:31
@Jester: that's right, if I turn off "unused labels filtering", I can find those values at
.LANCHOR0
. That makes sense, even if I don't understand why the assembly is different.– Lou
Nov 26 '18 at 12:31
add a comment |
1 Answer
1
active
oldest
votes
Looks like gcc shoots itself in the foot with an extra level of indirection after merging the loads of the constants into an ldm.
No idea why, but pretty obviously a missed optimization bug.
x86-64 is easy to optimize for; the entire 8-byte constant can go in one immediate. But ARM often uses PC-relative loads for constants that are too big for one immediate.
Not to mention copying r0 into r3 and back. I wonder if optimization was really enabled for that code as well as the first one. EDIT: yeah it's on ... strange.
– Jester
Nov 26 '18 at 12:31
Also, if I just call these functions frommain()
and add their results, gcc will calculate the compile time constant and return it, so it optimizes it correctly in that case. Perhaps I can report this somewhere to be fixed in future versions. Thanks!
– Lou
Nov 26 '18 at 12:37
@Lou: yes, report at gcc.gnu.org/bugzilla and use the "missed-optimization" keyword. It's no surprise that constant-propagation still works, but yeah it's good to know the scope of this bug is limited.
– Peter Cordes
Nov 26 '18 at 12:49
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53481048%2fdifferent-gcc-assembly-when-using-designated-initializers%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
Looks like gcc shoots itself in the foot with an extra level of indirection after merging the loads of the constants into an ldm.
No idea why, but pretty obviously a missed optimization bug.
x86-64 is easy to optimize for; the entire 8-byte constant can go in one immediate. But ARM often uses PC-relative loads for constants that are too big for one immediate.
Not to mention copying r0 into r3 and back. I wonder if optimization was really enabled for that code as well as the first one. EDIT: yeah it's on ... strange.
– Jester
Nov 26 '18 at 12:31
Also, if I just call these functions frommain()
and add their results, gcc will calculate the compile time constant and return it, so it optimizes it correctly in that case. Perhaps I can report this somewhere to be fixed in future versions. Thanks!
– Lou
Nov 26 '18 at 12:37
@Lou: yes, report at gcc.gnu.org/bugzilla and use the "missed-optimization" keyword. It's no surprise that constant-propagation still works, but yeah it's good to know the scope of this bug is limited.
– Peter Cordes
Nov 26 '18 at 12:49
add a comment |
Looks like gcc shoots itself in the foot with an extra level of indirection after merging the loads of the constants into an ldm.
No idea why, but pretty obviously a missed optimization bug.
x86-64 is easy to optimize for; the entire 8-byte constant can go in one immediate. But ARM often uses PC-relative loads for constants that are too big for one immediate.
Not to mention copying r0 into r3 and back. I wonder if optimization was really enabled for that code as well as the first one. EDIT: yeah it's on ... strange.
– Jester
Nov 26 '18 at 12:31
Also, if I just call these functions frommain()
and add their results, gcc will calculate the compile time constant and return it, so it optimizes it correctly in that case. Perhaps I can report this somewhere to be fixed in future versions. Thanks!
– Lou
Nov 26 '18 at 12:37
@Lou: yes, report at gcc.gnu.org/bugzilla and use the "missed-optimization" keyword. It's no surprise that constant-propagation still works, but yeah it's good to know the scope of this bug is limited.
– Peter Cordes
Nov 26 '18 at 12:49
add a comment |
Looks like gcc shoots itself in the foot with an extra level of indirection after merging the loads of the constants into an ldm.
No idea why, but pretty obviously a missed optimization bug.
x86-64 is easy to optimize for; the entire 8-byte constant can go in one immediate. But ARM often uses PC-relative loads for constants that are too big for one immediate.
Looks like gcc shoots itself in the foot with an extra level of indirection after merging the loads of the constants into an ldm.
No idea why, but pretty obviously a missed optimization bug.
x86-64 is easy to optimize for; the entire 8-byte constant can go in one immediate. But ARM often uses PC-relative loads for constants that are too big for one immediate.
answered Nov 26 '18 at 12:26
Peter CordesPeter Cordes
133k18203340
133k18203340
Not to mention copying r0 into r3 and back. I wonder if optimization was really enabled for that code as well as the first one. EDIT: yeah it's on ... strange.
– Jester
Nov 26 '18 at 12:31
Also, if I just call these functions frommain()
and add their results, gcc will calculate the compile time constant and return it, so it optimizes it correctly in that case. Perhaps I can report this somewhere to be fixed in future versions. Thanks!
– Lou
Nov 26 '18 at 12:37
@Lou: yes, report at gcc.gnu.org/bugzilla and use the "missed-optimization" keyword. It's no surprise that constant-propagation still works, but yeah it's good to know the scope of this bug is limited.
– Peter Cordes
Nov 26 '18 at 12:49
add a comment |
Not to mention copying r0 into r3 and back. I wonder if optimization was really enabled for that code as well as the first one. EDIT: yeah it's on ... strange.
– Jester
Nov 26 '18 at 12:31
Also, if I just call these functions frommain()
and add their results, gcc will calculate the compile time constant and return it, so it optimizes it correctly in that case. Perhaps I can report this somewhere to be fixed in future versions. Thanks!
– Lou
Nov 26 '18 at 12:37
@Lou: yes, report at gcc.gnu.org/bugzilla and use the "missed-optimization" keyword. It's no surprise that constant-propagation still works, but yeah it's good to know the scope of this bug is limited.
– Peter Cordes
Nov 26 '18 at 12:49
Not to mention copying r0 into r3 and back. I wonder if optimization was really enabled for that code as well as the first one. EDIT: yeah it's on ... strange.
– Jester
Nov 26 '18 at 12:31
Not to mention copying r0 into r3 and back. I wonder if optimization was really enabled for that code as well as the first one. EDIT: yeah it's on ... strange.
– Jester
Nov 26 '18 at 12:31
Also, if I just call these functions from
main()
and add their results, gcc will calculate the compile time constant and return it, so it optimizes it correctly in that case. Perhaps I can report this somewhere to be fixed in future versions. Thanks!– Lou
Nov 26 '18 at 12:37
Also, if I just call these functions from
main()
and add their results, gcc will calculate the compile time constant and return it, so it optimizes it correctly in that case. Perhaps I can report this somewhere to be fixed in future versions. Thanks!– Lou
Nov 26 '18 at 12:37
@Lou: yes, report at gcc.gnu.org/bugzilla and use the "missed-optimization" keyword. It's no surprise that constant-propagation still works, but yeah it's good to know the scope of this bug is limited.
– Peter Cordes
Nov 26 '18 at 12:49
@Lou: yes, report at gcc.gnu.org/bugzilla and use the "missed-optimization" keyword. It's no surprise that constant-propagation still works, but yeah it's good to know the scope of this bug is limited.
– Peter Cordes
Nov 26 '18 at 12:49
add a comment |
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53481048%2fdifferent-gcc-assembly-when-using-designated-initializers%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
You did not show
.LANCHOR0
but I assume it has your two constants in it.– Jester
Nov 26 '18 at 12:28
@Jester: that's right, if I turn off "unused labels filtering", I can find those values at
.LANCHOR0
. That makes sense, even if I don't understand why the assembly is different.– Lou
Nov 26 '18 at 12:31