Load/store splitting behavior is not consistent
Version and Platform (required):
- Binary Ninja Version: 4.1.5474-dev, f6ba0af7
- OS: macos
- OS Version: 14.5
- CPU Architecture: arm64
Internal binary major dine favor.
_DjiAircraftInfo_GetBaseInfo...
IDA does this
*&type = *&AIRCRAFT_INFO.aircraftType;
*&info->aircraftSeries = *&AIRCRAFT_INFO.aircraftSeries;
*&info->aircraftType = *&type;
info->mountPosition = AIRCRAFT_INFO.mountPosition;
whereas BN outputs this HLIL
000471b0 int64_t x0_9
000471b0 x0_9.d = AIRCRAFT_INFO.aircraftSeries
000471b0 x0_9:4.d = *(&AIRCRAFT_INFO + 4)
000471b0 int64_t x1_3 = AIRCRAFT_INFO.aircraftType.q
000471b4 info->aircraftSeries = x0_9.d
000471b4 info->mountPositionType = x0_9:4.d
000471b4 info->aircraftType = x1_3.d
000471b4 info->djiAdapterType = x1_3:4.d
000471bc info->mountPosition = AIRCRAFT_INFO.mountPosition
I thought BN needs help with the stack so I went to look at it. Unfortunately, the stack layout window doesn't let me copy the contents so I'm attaching a pic:
Where are x0_9 and x1_3 in the stack? How can I split them into 32-bit ints?
I thought that these could be at offset -0x50 but creating a variable there doesn't affect the use of x0_9 and x1_3.
Why do I have both var_10 and var_10_1 at offset -0x10 in the stack layout, at the same offset as err_code?
It would be very helpful to carry the variable names into the stack. For example, BN knows that struct T_DjiAircraftInfoBaseInfo* is at -0x38 but calls it var_38'. When try to rename it to infoBN doesn't detect it's an alias and wants to call itinfo_1`.
I can get highlights of err_code in the HLIL when I click on err_code in the stack layout but it doesn't happen with var_38. Why?
IDA stack
-0000000000000050 ; Frame size: 50; Saved regs: 0; Purge: 0
-0000000000000050 ;
-0000000000000050
-0000000000000050 var_50 DCQ ?
-0000000000000048 DCB ? ; undefined
-0000000000000047 DCB ? ; undefined
-0000000000000046 DCB ? ; undefined
-0000000000000045 DCB ? ; undefined
-0000000000000044 DCB ? ; undefined
-0000000000000043 DCB ? ; undefined
-0000000000000042 DCB ? ; undefined
-0000000000000041 DCB ? ; undefined
-0000000000000040 DCB ? ; undefined
-000000000000003F DCB ? ; undefined
-000000000000003E DCB ? ; undefined
-000000000000003D DCB ? ; undefined
-000000000000003C DCB ? ; undefined
-000000000000003B DCB ? ; undefined
-000000000000003A DCB ? ; undefined
-0000000000000039 DCB ? ; undefined
-0000000000000038 var_38 DCQ ?
-0000000000000030 DCB ? ; undefined
-000000000000002F DCB ? ; undefined
-000000000000002E DCB ? ; undefined
-000000000000002D DCB ? ; undefined
-000000000000002C DCB ? ; undefined
-000000000000002B DCB ? ; undefined
-000000000000002A DCB ? ; undefined
-0000000000000029 DCB ? ; undefined
-0000000000000028 err_desc DCQ ?
-0000000000000020 var_20 DCQ ?
-0000000000000018 var_18 DCQ ?
-0000000000000010 err DCQ ?
-0000000000000008 osal DCQ ?
+0000000000000000
+0000000000000000 ; end of stack variables
Full IDA Pro output
int64_t __fastcall DjiAircraftInfo_GetBaseInfo(T_DjiAircraftInfoBaseInfo *info)
{
E_DjiAircraftType type; // w1
ErrDesc err_desc; // [xsp+28h] [xbp+28h] BYREF
int64_t err; // [xsp+40h] [xbp+40h]
T_DjiOsalHandler *osal; // [xsp+48h] [xbp+48h]
osal = DjiPlatform_GetOsalHandler();
if ( info )
{
err = osal->MutexLock(AIRCRAFT_INFO_MUTEX);
if ( err )
{
DjiLogger_Output("infor", 0, "[%s:%d) Lock mutex error.", "_DjiAircraftInfo_GetBaseInfo", 365LL);
return err;
}
else
{
*&type = *&AIRCRAFT_INFO.aircraftType;
*&info->aircraftSeries = *&AIRCRAFT_INFO.aircraftSeries;
*&info->aircraftType = *&type;
info->mountPosition = AIRCRAFT_INFO.mountPosition;
err = osal->MutexUnlock(AIRCRAFT_INFO_MUTEX);
if ( err )
{
DjiLogger_Output("infor", 0, "[%s:%d) Unlock mutex error.", "_DjiAircraftInfo_GetBaseInfo", 373LL);
return err;
}
else
{
return 0LL;
}
}
}
else
{
err = 227LL;
if ( !DjiError_IsSuccess_localalias_0(227LL) )
{
memset(&err_desc, 0, sizeof(err_desc));
DjiError_GetErrorMsgElements(err, &err_desc);
DjiLogger_Output(
"infor",
0,
"[%s:%d) %s%s%s",
"_DjiAircraftInfo_GetBaseInfo",
359LL,
err_desc.description,
err_desc.suggestion,
err_desc.recovery);
}
return err;
}
}
Full BN output
00047094 int64_t _DjiAircraftInfo_GetBaseInfo(struct T_DjiAircraftInfoBaseInfo* info)
000470a0 struct T_DjiOsalHandler* osal = DjiPlatform_GetOsalHandler()
000470b0 uint64_t result
00047094
000470b0 if (info != 0)
00047154 uint64_t err = osal->MutexLock(mutex: AIRCRAFT_INFO_MUTEX)
00047154
00047164 if (err != 0)
00047190 DjiLogger_Output(tag: "infor", level: 0, fmt: "[%s:%d) Lock mutex error.", "_DjiAircraftInfo_GetBaseInfo", 0x16d)
00047194 result = err
00047164 else
000471b0 int64_t x0_9
000471b0 x0_9.d = AIRCRAFT_INFO.aircraftSeries
000471b0 x0_9:4.d = *(&AIRCRAFT_INFO + 4)
000471b0 int64_t x1_3 = AIRCRAFT_INFO.aircraftType.q
000471b4 info->aircraftSeries = x0_9.d
000471b4 info->mountPositionType = x0_9:4.d
000471b4 info->aircraftType = x1_3.d
000471b4 info->djiAdapterType = x1_3:4.d
000471bc info->mountPosition = AIRCRAFT_INFO.mountPosition
000471d4 uint64_t result_1 = osal->MutexUnlock(mutex: AIRCRAFT_INFO_MUTEX)
000471b0
000471e4 if (result_1 == 0)
0004721c result = 0
000471e4 else
00047210 DjiLogger_Output(tag: "infor", level: 0, fmt: "[%s:%d) Unlock mutex error.", "_DjiAircraftInfo_GetBaseInfo", 0x175)
00047214 result = result_1
000470b0 else
000470b8 int64_t err_code = 0xe3
000470b8
000470d4 if (zx.d(DjiError_IsSuccess(0xe3) ^ 1) != 0)
000470dc struct ErrDesc desc
000470dc __builtin_memset(s: &desc, c: 0, n: 0x18)
000470f0 DjiError_GetErrorMsgElements(err_code, err_desc: &desc)
00047134 DjiLogger_Output(tag: "infor", level: 0, fmt: "[%s:%d) %s%s%s", "_DjiAircraftInfo_GetBaseInfo", 0x167, desc.description, desc.suggestion, desc.recovery)
000470b8
00047138 result = err_code
00047094
00047224 return result
What is the size of the enum E_DjiAircraftType in IDA? Is it 4 bytes or 8 bytes wide? Your enum in BN is typed to uint32_t and the access is 8 bytes wide.
It's 4 bytes, same as BN
FFFFFFFF ; enum E_DjiAircraftType, copyof_59, width 4 bytes
FFFFFFFF DJI_AIRCRAFT_TYPE_UNKNOWN EQU 0
FFFFFFFF DJI_AIRCRAFT_TYPE_M200_V2 EQU 0x2C
FFFFFFFF DJI_AIRCRAFT_TYPE_M210_V2 EQU 0x2D
FFFFFFFF DJI_AIRCRAFT_TYPE_M210RTK_V2 EQU 0x2E
FFFFFFFF DJI_AIRCRAFT_TYPE_M300_RTK EQU 0x3C
FFFFFFFF DJI_AIRCRAFT_TYPE_M30 EQU 0x43
FFFFFFFF DJI_AIRCRAFT_TYPE_M30T EQU 0x44
FFFFFFFF DJI_AIRCRAFT_TYPE_M3E EQU 0x4D
FFFFFFFF DJI_AIRCRAFT_TYPE_FC30 EQU 0x4E
FFFFFFFF DJI_AIRCRAFT_TYPE_M3T EQU 0x4F
FFFFFFFF DJI_AIRCRAFT_TYPE_M350_RTK EQU 0x59
FFFFFFFF DJI_AIRCRAFT_TYPE_M3D EQU 0x5B
FFFFFFFF DJI_AIRCRAFT_TYPE_M3TD EQU 0x5D
So, firstly, this has nothing to do with the stack. The variables x0_10 and x1_3 are actually in registers. You can confirm this by hovering over the variable:
It also shows this at the top of the function if you turn on Hamburger Icon -> Show Variable Types -> At Top of Function:
Instead, it has everything to do with our split load/store simplification. These are both equivalent and correct:
// IDA Pro
*&type = *&AIRCRAFT_INFO.aircraftType;
*&info->aircraftSeries = *&AIRCRAFT_INFO.aircraftSeries;
*&info->aircraftType = *&type;
info->mountPosition = AIRCRAFT_INFO.mountPosition;
// Binary Ninja
x0_10.d = s_aircraftBaseInfo.aircraftSeries // This read has been split into two, 4-byte reads
x0_10:4.d = *(&s_aircraftBaseInfo + 4)
int64_t x1_3 = s_aircraftBaseInfo.aircraftType.q
arg1->aircraftSeries = x0_10.d
arg1->mountPositionType = x0_10:4.d
arg1->aircraftType = x1_3.d
arg1->djiAdapterType = x1_3:4.d
arg1->mountPosition = s_aircraftBaseInfo.mountPosition
Our output is definitely less clear at the top (because we split up the reads for reasons I don't quite understand), but I think it's more clear at the bottom (because we show each individual write to a structure field).
If you agree, I think it's reasonable for us to investigate why the x0_10 assignment is being split in half. I can see that being confusing. But, these are 8-byte reads, so I expect both Binary Ninja and IDA to use 8-byte types here.
Co-authored by @fuzyll
If you agree, I think it's reasonable for us to investigate why the x0_10 assignment is being split in half. I can see that being confusing.
I definitely agree!
But, these are 8-byte reads, so I expect both Binary Ninja and IDA to use 8-byte types here.
I can split the expressions with IDA so that this
*&type = *&AIRCRAFT_INFO.aircraftType;
*&info->aircraftSeries = *&AIRCRAFT_INFO.aircraftSeries;
*&info->aircraftType = *&type;
info->mountPosition = AIRCRAFT_INFO.mountPosition;
becomes this
mount_pos_type = AIRCRAFT_INFO.mountPositionType;
type = AIRCRAFT_INFO.aircraftType;
sdk_adapter_type = AIRCRAFT_INFO.djiAdapterType;
info->aircraftSeries = AIRCRAFT_INFO.aircraftSeries;
info->mountPositionType = mount_pos_type;
info->aircraftType = type;
info->djiAdapterType = sdk_adapter_type;
info->mountPosition = AIRCRAFT_INFO.mountPosition;
Yes, you are correct. In order of issue priority:
- The behavior of the load/store splitting is consistent, i.e. if a split happens to
x0_3thenx1_3should be split, and vice versa. - The behavior of the load/store splitting chooses to split in this case.
- We should be able to see in the assignments to
infothat the partial accesses (onx0_3andx1_3) are the only use and move them into that expression.
After more discussion 3. is a little bit more tricky.
When we go to fold we check to make sure we are not folding across memory versions. However in the case of this series of instructions (ldp, stp) it is safe to do so. We have two paths forward:
- Fold across memory version boundaries on same instruction.
- Lift instructions as atomic and only increment memory version once.
What is a memory version?
What is a memory version?
SSA for memory :) https://www.airs.com/dnovillo/Papers/mem-ssa.pdf
Note the @ mem#7
000471b0 (HLIL_ASSIGN (HLIL_STRUCT_FIELD (HLIL_VAR_SSA x0_12#1).d) = (HLIL_DEREF_FIELD_SSA s_aircraftBaseInfo.aircraftSeries @ mem#7))
000471b0 (HLIL_ASSIGN (HLIL_STRUCT_FIELD (HLIL_VAR_SSA x0_12#2):4.d) = (HLIL_DEREF_FIELD_SSA s_aircraftBaseInfo.mountPositionType @ mem#7))
000471b4 (HLIL_ASSIGN_MEM_SSA (HLIL_DEREF_FIELD_SSA (HLIL_VAR_SSA arg1#0)->aircraftSeries @ mem#7) @ mem#9 = (HLIL_STRUCT_FIELD (HLIL_VAR_SSA x0_12#2).d) @ mem#7)
000471b4 (HLIL_ASSIGN_MEM_SSA (HLIL_DEREF_FIELD_SSA (HLIL_VAR_SSA arg1#0)->mountPositionType @ mem#9) @ mem#10 = (HLIL_STRUCT_FIELD (HLIL_VAR_SSA x0_12#2):4.d) @ mem#9)