rr icon indicating copy to clipboard operation
rr copied to clipboard

replaying backwards into an executable mmaped region causes rr to crash

Open ArbitRandomUser opened this issue 1 year ago • 1 comments

i have the following nasm which loads a part of itself into a region in memory and executes from there .

replaying it backwards with rr crashes when it enters the mmaped region.

nasm code test.nasm :

%define SYS_MMAP 9
%define SPACE_SIZE (4096*4096) ;4mb codespace
%define EXITSUCCESS 0
%define SYS_EXIT 60

;from mman-linux.h
%define PROT_READ	0x1		
%define PROT_WRITE	0x2		
%define PROT_EXEC	0x4		
%define PROT_NONE	0x0		
%define PROT_GROWSDOWN	0x01000000	
%define PROT_GROWSUP	0x02000000	

%define MAP_SHARED	0x01		
%define MAP_PRIVATE	0x02		
%define MAP_SHARED_VALIDATE	0x03	
%define MAP_TYPE	0x0f		

%define MAP_FIXED	0x10		
%define MAP_FILE	0
%define MAP_ANONYMOUS	0x20		
%define MAP_GROWSDOWN 0x00100
%define MAP_HUGE_SHIFT	26
%define MAP_HUGE_MASK	0x3f

section .data
;we copy registers to `printer` to write to stdout
printer db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;16 0's


section .text
global _start
delta equ _start-somefunc ; size of somefunc 

codespace_create:
  ;create an mmaped space of SPACE_SIZE(4mb), pointer to this space
  ;is in rax after return
  mov rdi,0
  ;mov fs,rax
  mov rsi,SPACE_SIZE
  mov rdx,PROT_WRITE | PROT_READ | PROT_EXEC
  mov r10,MAP_ANONYMOUS | MAP_SHARED ;| MAP_GROWSDOWN
  mov rax,SYS_MMAP
  syscall
  ret

codeloader:
  ;load contents starting from rcx, upt rcx+rbx into memory starting from 
  ;address pointed by rax
  mov r10,0 ; our offset variable
  ;copies 8 bytes at a time
looper1:
  mov r11,qword [rcx+r10]
  mov qword [rax+r10],r11; move r11 (8bytes) in mmaped space + r10 offset
  add r10,8 ; increment offset by 8 bytes
  cmp r10,rbx; check if we have copied `rbx` amount of bytes
  jg return; if yes we return
  jmp looper1; else loop around
return:
  ret

somefunc:
  ;print the address of somefunc 
  lea r13,[rel $]
  mov qword [printer],r13
  mov rax,1
  mov rdi,1
  mov rsi,printer
  mov rdx,16
  syscall
  ret

_start:
  call codespace_create ; now rax has points to a mmap region 
  mov rcx,somefunc
  mov rbx,delta
  call codeloader; we load from address `somefunc` to address `_start` into mmaped space
  ; after codespace_create rax contains mmaped address
  call rax; execute the mmaped space, 
  mov rax, SYS_EXIT
  mov rdi,EXITSUCCESS
  syscall

compile and link

nasm -felf64 -g test.nasm -o test.o
ld  test.o -o test.exe

record program with rr, replay program to the end step backwards by instruction (rsi) till it execution reaches from mmap region.

rr crashes with a message like

[FATAL /usr/src/debug/rr-git/rr/src/ReplayTimeline.cc:458:replay_step_to_mark() errno: EIO] 
 (task 148763 (rec:148656) at time 18)
 -> Assertion `before.key <= mark.ptr->proto.key' failed to hold. Current mark {time:18,ticks:6,st:0,regs_ip:0x7dc209de402a} is already after target {time:17,ticks:6,st:1,regs_ip:0x7dc209de4028}
Tail of trace dump:
=== Start rr backtrace:
rr(_ZN2rr13dump_rr_stackEv+0x5e)[0x5d3bd8352a0e]
rr(_ZN2rr15emergency_debugEPNS_4TaskE+0x1a7)[0x5d3bd822e787]
rr(+0xdab0c)[0x5d3bd8234b0c]
rr(+0xdc8b7)[0x5d3bd82368b7]
rr(_ZN2rr14ReplayTimeline19replay_step_to_markERKNS0_4MarkERNS0_24ReplayStepToMarkStrategyE+0x6be)[0x5d3bd82e168e]
rr(_ZN2rr14ReplayTimeline12seek_to_markERKNS0_4MarkE+0xda)[0x5d3bd82e1cca]
rr(_ZN2rr14ReplayTimeline18reverse_singlestepERKNS0_4MarkERKNS_10TaskishUidINS_4TaskEEElRKSt8functionIFbPNS_10ReplayTaskERKNS_11BreakStatusEEERKS9_IFbvEE+0x2b8a)[0x5d3bd82e97da]
rr(_ZN2rr14ReplayTimeline18reverse_singlestepERKNS_10TaskishUidINS_4TaskEEElRKSt8functionIFbPNS_10ReplayTaskERKNS_11BreakStatusEEERKS6_IFbvEE+0x69)[0x5d3bd82ece49]
rr(_ZN2rr9GdbServer14debug_one_stepERNS_10GdbRequestE+0x64f)[0x5d3bd821248f]
rr(_ZN2rr9GdbServer12serve_replayESt10shared_ptrINS_13ReplaySessionEERKNS0_6TargetEPVbRKNS0_15ConnectionFlagsE+0xaec)[0x5d3bd8218a6c]
rr(_ZN2rr13ReplayCommand3runERSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS7_EE+0x1892)[0x5d3bd82bbee2]
rr(main+0x168)[0x5d3bd81a8588]
/usr/lib/libc.so.6(+0x25c88)[0x76d1ef639c88]
/usr/lib/libc.so.6(__libc_start_main+0x8c)[0x76d1ef639d4c]
rr(_start+0x25)[0x5d3bd81a9c95]
=== End rr backtrace

ArbitRandomUser avatar Jun 10 '24 17:06 ArbitRandomUser

This doesn't work because we implement the rsi by setting a breakpoint in the code region and replaying forward to the breakpoint. But the breakpoint is overwritten by the code copy, so that doesn't work.

We can avoid it by using a hardware breakpoint, but that would steal a hardware breakpoint from users, which would be worse in general. This bug hardly ever comes up.

FWIW this isn't a problem in Pernosco which takes a totally different approach to debugging replay.

Maybe we could add an option to always use a hardware breakpoint for replay, and suggest using it if we crash in this way. This would be pretty easy on top of PR #3731.

rocallahan avatar Jun 12 '24 12:06 rocallahan