binaryninja-api icon indicating copy to clipboard operation
binaryninja-api copied to clipboard

Indicate and allow control over when pure function elimination occurs

Open joelreymont opened this issue 1 year ago • 8 comments

Version and Platform (required):

  • Binary Ninja Version: 4.1.5474-dev, f6ba0af7
  • OS: macos
  • OS Version: 14.5
  • CPU Architecture: arm64

Internal binary major dine favor.

IDA

__int64 __fastcall DjiCore_SetFirmwareVersion(int a1)
{
  DjiDataBuriedPoint_ApiHitRecord("DjiCore_SetFirmwareVersion", 221LL);
  FIRMWARE_VERSION = a1;
  HAVE_FIRMWARE_VER = 1;
  return 0LL;
}

BN HLIL is missing the call to DjiDataBuriedPoint_ApiHitRecord

00049084  T_DjiReturnCode DjiCore_SetFirmwareVersion(struct T_DjiFirmwareVersion version)

0004908c      uint32_t ver
0004908c      ver.b = version.majorVersion
0004908c      ver:1.b = version.minorVersion
0004908c      ver:2.b = version.modifyVersion
0004908c      ver:3.b = version.debugVersion
00049094      int64_t x0
00049094      x0.b = 0xa8
00049094      x0:1.b = 0x81
00049094      x0:2.b = 0x12
00049094      x0:3.b = 0
000490ac      FIRMWARE_VERSION = ver
000490bc      HAVE_FIRMWARE_VERSION = 1
000490c0      int64_t x0_1
000490c0      x0_1.b = 0
000490c0      x0_1:1.b = 0
000490c0      x0_1:2.b = 0
000490c0      x0_1:3.b = 0
000490c8      return 0

even though it's present in the disassembly

00049084  T_DjiReturnCode DjiCore_SetFirmwareVersion(struct T_DjiFirmwareVersion version)

00049084  fd7bbea9   stp     x29, x30, [sp, #-0x20]! {__saved_x29} {__saved_x30}
00049088  fd030091   mov     x29, sp {__saved_x29}
0004908c  a01b00b9   str     w0, [x29, #0x18 {ver} {ver+0x1} {ver+0x2} {ver+0x3}]
00049090  e00600f0   adrp    x0, 0x128000
00049094  00a00691   add     x0, x0, #0x1a8  {__FUNCTION__.7263, "DjiCore_SetFirmwareVersion"}
00049098  a11b8052   mov     w1, #0xdd
0004909c  b5780094   bl      DjiDataBuriedPoint_ApiHitRecord
000490a0  a11b40b9   ldr     w1, [x29, #0x18 {ver}]
000490a4  000a00d0   adrp    x0, 0x18b000
000490a8  00200691   add     x0, x0, #0x188  {FIRMWARE_VERSION}
000490ac  010000b9   str     w1, [x0]  {FIRMWARE_VERSION}
000490b0  000a00d0   adrp    x0, 0x18b000
000490b4  00c00691   add     x0, x0, #0x1b0  {HAVE_FIRMWARE_VERSION}
000490b8  21008052   mov     w1, #0x1
000490bc  01000039   strb    w1, [x0]  {0x1}  {HAVE_FIRMWARE_VERSION}
000490c0  000080d2   mov     x0, #0
000490c4  fd7bc2a8   ldp     x29, x30, [sp], #0x20 {__saved_x29} {__saved_x30}
000490c8  c0035fd6   ret     

and even MLIL

00049084  T_DjiReturnCode DjiCore_SetFirmwareVersion(struct T_DjiFirmwareVersion version)

   0 @ 0004908c  ver:0.b = version.majorVersion
   1 @ 0004908c  ver:1.b = version.minorVersion
   2 @ 0004908c  ver:2.b = version.modifyVersion
   3 @ 0004908c  ver:3.b = version.debugVersion
   4 @ 00049094  x0:0.b = 0xa8
   5 @ 00049094  x0:1.b = 0x81
   6 @ 00049094  x0:2.b = 0x12
   7 @ 00049094  x0:3.b = 0
   8 @ 00049098  x1 = 0xdd
   9 @ 0004909c  DjiDataBuriedPoint_ApiHitRecord()
  10 @ 000490a0  x1_1 = ver
  11 @ 000490ac  [&FIRMWARE_VERSION].d = x1_1
  12 @ 000490bc  [&HAVE_FIRMWARE_VERSION].b = 1
  13 @ 000490c0  x0_1:0.b = 0
  14 @ 000490c0  x0_1:1.b = 0
  15 @ 000490c0  x0_1:2.b = 0
  16 @ 000490c0  x0_1:3.b = 0
  17 @ 000490c8  return 0

joelreymont avatar Jun 12 '24 11:06 joelreymont

Also, see the elided call to DjiDataBuriedPoint_ApiHitRecord here

0009dc3c  int64_t DjiPowerManagement_Init()
0009dc58      DjiDataBuriedPoint_ModuleUsageRecord(0xb)
0009dc74      int64_t result
0009dc3c      
0009dc74      if (DjiPowerManagementParamConfig_Get(config: &POWER_MGR_PARAM_CONFIG) != 0)
0009dca0          DjiLogger_Output(tag: &data_147618, level: 0, fmt: "[%s:%d) Can't get param config", "DjiPowerManagement_Init", 0x56)
0009dca4          result = 0xe3
0009dc74      else if (data_292718 != 0)
0009dce8          DjiLogger_Output(tag: &data_147618, level: 0, fmt: "[%s:%d) Can't support this funct…", "DjiPowerManagement_Init", 0x5b)
0009dcec          result = 0xe0
0009dcbc      else
0009dcfc          void* const handlers = &s_powerManagementCmdList
0009dd04          int16_t var_10_1 = 5
0009dd1c          int64_t x0_6 = DjiCommand_RegRecvCmdHandler(broker: DjiAccessAdapter_GetCmdHandle(), &handlers)
0009dcfc          
0009dd2c          if (x0_6 == 0)
0009dd68              result = 0
0009dd2c          else
0009dd5c              DjiLogger_Output(tag: &data_147618, level: 0, fmt: "[%s:%d) reg power management com…", "DjiPowerManagement_Init", 0x65, x0_6)
0009dd60              result = 0xec
0009dc3c      
0009dd70      return result

compare to IDA

__int64 DjiPowerManagement_Init()
{
  MessageBroker *CmdHandle; // x0
  MessageHandlers v2; // [xsp+18h] [xbp+18h] BYREF
  __int64 v3; // [xsp+28h] [xbp+28h]

  DjiDataBuriedPoint_ApiHitRecord("DjiPowerManagement_Init", 80LL);
  DjiDataBuriedPoint_ModuleUsageRecord(11LL);
  v3 = DjiPowerManagementParamConfig_Get(&s_powerManagementParamConfig);
  if ( v3 )
  {
    DjiLogger_Output("pmu", 0, "[%s:%d) Can't get param config", "DjiPowerManagement_Init", 86LL);
    return 227LL;
  }
  else if ( dword_292718 )
  {
    DjiLogger_Output("pmu", 0, "[%s:%d) Can't support this function", "DjiPowerManagement_Init", 91LL);
    return 224LL;
  }
  else
  {
    v2.handlers = &s_powerManagementCmdList;
    LOWORD(v2.n_handlers) = 5;
    CmdHandle = DjiAccessAdapter_GetCmdHandle();
    v3 = DjiCommand_RegRecvCmdHandler(CmdHandle, &v2);
    if ( v3 )
    {
      DjiLogger_Output(
        "pmu",
        0,
        "[%s:%d) reg power management command handler error: 0x%08llX.",
        "DjiPowerManagement_Init",
        101LL,
        v3);
      return 236LL;
    }
    else
    {
      return 0LL;
    }
  }
}

joelreymont avatar Jun 12 '24 13:06 joelreymont

The function DjiDataBuriedPoint_ApiHitRecord is marked as pure and as such it is subject to removal if the return is not used.

If you want to show it, mark the function as not pure using the Edit Function Properties dialog (docs).

Did you retype the function DjiDataBuriedPoint_ApiHitRecord in IDA or did it already pickup the two params?

emesare avatar Jun 13 '24 14:06 emesare

If you want to show it, mark the function as not pure

There's a bit of a UX issue here. I need to know the function is called to ensure it's visible in HLIL I wouldn't have known the function was called if I hadn't checked IDA. I guess I can always cross-check HLIL with MLIL but should I have to?

IDA automatically picked up the 2 arguments. This is the function itself

void DjiDataBuriedPoint_ApiHitRecord()
{
  ;
}

joelreymont avatar Jun 13 '24 15:06 joelreymont

I think the path forward would be to add a tag to the call site to inform the user the function has been removed.

I will probably have to get back to you on the arguments missing.

emesare avatar Jun 13 '24 15:06 emesare

This is the expected behavior -- as emesare said, the function DjiDataBuriedPoint_ApiHitRecord is pure and removed in HLIL

xusheng6 avatar Jun 17 '24 06:06 xusheng6

I have a few questions:

First, is the pure analysis correct? If so, is the decompilation incorrect functionally as a result of it being optimized away? We also don't show NOP instructions and it's to improve understanding, this would be in the same category.

If marking the function as pure is not correct, then we have a bug.

Next, I agree that a tag would be helpful here regardless. If we don't already have an issue tracking that maybe that would be best to use this and we should reopen it?

Just trying to understand the impact. If this is just "Ida shows a function and BN doesn't" but it's not a correctness issue (pure functions whose output is ignored are functional NOPs) then that is different versus the pure analysis being wrong.

psifertex avatar Jun 17 '24 13:06 psifertex

Imagine that I didn't have DWARF symbols and function names (happens regularly).

DjiCore_SetFirmwareVersion would look like this and the string argument would be the only indication of its purpose.

Elide the call to DjiDataBuriedPoint_ApiHitRecord in the name of (dubious) efficiency and you have complicated my job significantly!

__int64 __fastcall sp_49084(int a1)
{
 sp_67370("DjiCore_SetFirmwareVersion", 221LL);
  ...
  return 0LL;
}

Yes, the function is pure.

text:0000000000067370 ; void DjiDataBuriedPoint_ApiHitRecord()
.text:0000000000067370                 EXPORT DjiDataBuriedPoint_ApiHitRecord
.text:0000000000067370 DjiDataBuriedPoint_ApiHitRecord         ; CODE XREF: DjiAircraftInfo_GetBaseInfo+18↑p
.text:0000000000067370                                         ; DjiAircraftInfo_GetMobileAppInfo+18↑p ...
.text:0000000000067370
.text:0000000000067370 var_1A          = -0x1A
.text:0000000000067370 var_18          = -0x18
.text:0000000000067370 var_8           = -8
.text:0000000000067370 var_4           = -4
.text:0000000000067370
.text:0000000000067370                 SUB             SP, SP, #0x20
.text:0000000000067374                 STR             X0, [SP,#0x20+var_18]
.text:0000000000067378                 STRH            W1, [SP,#0x20+var_1A]
.text:000000000006737C                 STR             WZR, [SP,#0x20+var_4]
.text:0000000000067380                 STR             WZR, [SP,#0x20+var_8]
.text:0000000000067384                 NOP
.text:0000000000067388                 ADD             SP, SP, #0x20 ; ' '
.text:000000000006738C                 RET
.text:000000000006738C ; End of function DjiDataBuriedPoint_ApiHitRecord

joelreymont avatar Jun 17 '24 14:06 joelreymont

I am reopening this as "Indicate when pure function elimination occurs", specifically for edge cases like this where the usage of said function is important for function/program understanding.

The question is how do we let users know? A tag would be pushed to the next visible instruction in the listing, doing what we do for dead code elimination might be a regression for cases which depend on the complete elimination of said pure function.

emesare avatar Jun 17 '24 15:06 emesare

This was never referenced but in the event that we wanted to observe nothing from a function call (in the case of your function being patched at runtime) we also would want to be able to annotate the function as opaque.

https://github.com/Vector35/binaryninja-api/issues/2923

However in the auto-analysis we really should just not annotate the function as pure, or annotate it as such and not auto eliminate calls. The auto elimination happened as the functions purity was used to key for call removal, which needs to occur completely separate.

While we can say that a return value is not used, we do benefit from inlining the pure function (which will inherently remove the call), in the event that we stopped removing call-sites where the return value is not used we would still have the case where the return value is inlined in cases where it can be resolved.

Assuming we kept the same behavior for inlining pure function calls we still use the "purity" to remove the call.

emesare avatar Sep 11 '24 19:09 emesare

Changed the title of the issue as we do allow you to control this behavior in multiple different ways, by toggling the pure flag in the function attributes dialog and by changing the analysis setting to prevent pure function inlining altogether

plafosse avatar May 27 '25 14:05 plafosse