asm-cli-rust icon indicating copy to clipboard operation
asm-cli-rust copied to clipboard

Panic on empty string

Open nyw0102 opened this issue 9 months ago • 0 comments

platform: Linux version: Latest

Overview: Main() panicked with reachable assertion in MatchAndEmitATTInstruction() function.

Error Message ``asm-cli-rust: /home/nyw0102/.cargo/git/checkouts/keystone6a7a70f3378d0b72/1856935/bindings/rust/keystone-sys/keystone/llvm/include/llvm/ADT/StringRef.h:210: char llvm_ks::StringRef::operator const: Assertion `Index < Length && "Invalid index!"' failed. Aborted


**Description**

After running “asm-cli-rust” with flag “—syntax att” and give “” as input in repl, the string value goes into “rust/keystone-sys/keystone/llvm/keystone/ks.cpp” function as pointer to the input string.

```  1. int ks_asm(ks_engine *ks,
  2.         const char *assembly,
  3.         uint64_t address,
  4.         unsigned char **insn, size_t *insn_size,
  5.         size_t *stat_count)
  6. {
  7.     MCCodeEmitter *CE;
  8.     MCStreamer *Streamer;
  9.     unsigned char *encoding;
 10.     SmallString<1024> Msg;
 11.     raw_svector_ostream OS(Msg);
 12.  
 13.     if (ks->arch == KS_ARCH_EVM) {
 14.         // handle EVM differently
 15.         unsigned short opcode = EVM_opcode(assembly);
 16.         if (opcode == (unsigned short)-1) {
 17.             // invalid instruction
 18.             return -1;
 19.         }
 20.  
 21.         *insn_size = 1;
 22.         *stat_count = 1;
 23.         encoding = (unsigned char *)malloc(*insn_size);
 24.         encoding[0] = opcode;
 25.         *insn = encoding;
 26.         return 0;
 27.     }
 28.  
 29.     *insn = NULL;
 30.     *insn_size = 0;
 31.  
 32.     MCContext Ctx(ks->MAI, ks->MRI, &ks->MOFI, &ks->SrcMgr, true, address);
 33.     ks->MOFI.InitMCObjectFileInfo(Triple(ks->TripleName), Ctx);
 34.     CE = ks->TheTarget->createMCCodeEmitter(*ks->MCII, *ks->MRI, Ctx);
 35.     if (!CE) {
 36.         // memory insufficient
 37.         return KS_ERR_NOMEM;
 38.     }
 39.     Streamer = ks->TheTarget->createMCObjectStreamer(
 40.             Triple(ks->TripleName), Ctx, *ks->MAB, OS, CE, *ks->STI, ks->MCOptions.MCRelaxAll,
 41.             /*DWARFMustBeAtTheEnd*/ false);
 42.             
 43.     if (!Streamer) {
 44.         // memory insufficient
 45.         delete CE;
 46.         return KS_ERR_NOMEM;
 47.     }
 48.  
 49.     // Tell SrcMgr about this buffer, which is what the parser will pick up.
 50.     ErrorOr<std::unique_ptr<MemoryBuffer>> BufferPtr = MemoryBuffer::getMemBuffer(assembly);
 51.     if (BufferPtr.getError()) {
 52.         delete Streamer;
 53.         delete CE;
 54.         return KS_ERR_NOMEM;
 55.     }
 56.  
 57.     ks->SrcMgr.clearBuffers();
 58.     ks->SrcMgr.AddNewSourceBuffer(std::move(*BufferPtr), SMLoc());
 59.  
 60.     Streamer->setSymResolver((void *)(ks->sym_resolver));
 61.  
 62.     MCAsmParser *Parser = createMCAsmParser(ks->SrcMgr, Ctx, *Streamer, *ks->MAI);
 63.     if (!Parser) {
 64.         delete Streamer;
 65.         delete CE;
 66.         // memory insufficient
 67.         return KS_ERR_NOMEM;
 68.     }
 69.     MCTargetAsmParser *TAP = ks->TheTarget->createMCAsmParser(*ks->STI, *Parser, *ks->MCII, ks->MCOptions);
 70.     if (!TAP) { 
 71.         // memory insufficient
 72.         delete Parser;
 73.         delete Streamer;
 74.         delete CE;
 75.         return KS_ERR_NOMEM;
 76.     }
 77.     TAP->KsSyntax = ks->syntax;
 78.  
 79.     Parser->setTargetParser(*TAP);
 80.  
 81.     // TODO: optimize this to avoid setting up NASM every time we call ks_asm()
 82.     if (ks->arch == KS_ARCH_X86 && ks->syntax == KS_OPT_SYNTAX_NASM) {
 83.         Parser->initializeDirectiveKindMap(KS_OPT_SYNTAX_NASM);
 84.         ks->MAI->setCommentString(";");
 85.     }
 86.  
 87.     *stat_count = Parser->Run(false, address);
 88.  
 89.     // PPC counts empty statement
 90.     if (ks->arch == KS_ARCH_PPC)
 91.         *stat_count = *stat_count / 2;
 92.  
 93.     ks->errnum = Parser->KsError;
 94.  
 95.     delete TAP;
 96.     delete Parser;
 97.     delete CE;
 98.     delete Streamer;
 99.  
100.     if (ks->errnum >= KS_ERR_ASM)
101.         return -1;
102.     else {
103.         *insn_size = Msg.size();
104.         encoding = (unsigned char *)malloc(*insn_size);
105.         if (!encoding) {
106.             return KS_ERR_NOMEM;
107.         }
108.         memcpy(encoding, Msg.data(), *insn_size);
109.         *insn = encoding;
110.         return 0;
111.     }
112. }
113.  

In this function, SrcMgr, which is the element in Parser gets the address of the crash input through “getMemBuffer” function. And Parser is initialized by following function.

 2.                      const MCAsmInfo &MAI)
 3.     : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
 4.       PlatformParser(nullptr), CurBuffer(SM.getMainFileID()),
 5.       MacrosEnabledFlag(true), HadError(false), CppHashLineNumber(0),
 6.       AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false),
 7.       NasmDefaultRel(false) {
 8.   // Save the old handler.
 9.   SavedDiagHandler = SrcMgr.getDiagHandler();
10.   SavedDiagContext = SrcMgr.getDiagContext();
11.   // Set our own handler which calls the saved handler.
12.   SrcMgr.setDiagHandler(DiagHandler, this);
13.   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
14.   .
15.   .
16.   .

In this function, Parser gives the address of the input to ‘Lexer’ After this initialization of ‘Parser’, the function “ks_asm” runs “Parser::Run”

2.  while (Lexer.isNot(AsmToken::Eof)) {
3.     ParseStatementInfo Info;
4.     if (!parseStatement(Info, nullptr, Address)) {
5.       count++;
6.       continue;
7.     }

This function runs calls a function “parseStatement” and the function calls “X86AsmParser::MatchAndEmitATTInstruction” initializing the function’s parameter ‘Operand’

 2.                                               OperandVector &Operands,
 3.                                               MCStreamer &Out,
 4.                                               uint64_t &ErrorInfo,
 5.                                               bool MatchingInlineAsm, unsigned int &ErrorCode, uint64_t &Address)
 6. {
 7.   assert(!Operands.empty() && "Unexpect empty operand list!");
 8.   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
 9.   assert(Op.isToken() && "Leading operand should always be a mnemonic!");
10.   //ArrayRef<SMRange> EmptyRanges = None;
11.  
12.   // First, handle aliases that expand to multiple instructions.
13.   MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
14.  
15.   bool WasOriginallyInvalidOperand = false;
16.   MCInst Inst;
17.  
18.   // First, try a direct match.
19.   switch (MatchInstructionImpl(Operands, Inst,
20.                                ErrorInfo, MatchingInlineAsm,
21.                                isParsingIntelSyntax())) {
22.   default: llvm_unreachable("Unexpected match result!");
23.   case Match_Success:
24.     // Some instructions need post-processing to, for example, tweak which
25.     // encoding is selected. Loop on it while changes happen so the
26.     // individual transformations can chain off each other.
27.     if (!MatchingInlineAsm)
28.       while (processInstruction(Inst, Operands))
29.         ;
30.  
31.     Inst.setLoc(IDLoc);
32.     if (!MatchingInlineAsm) {
33.       EmitInstruction(Inst, Operands, Out, ErrorCode);
34.       if (ErrorCode)
35.           return true;
36.     }
37.     Opcode = Inst.getOpcode();
38.     return false;
39.   case Match_MissingFeature:
40.     return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
41.   case Match_InvalidOperand:
42.     WasOriginallyInvalidOperand = true;
43.     break;
44.   case Match_MnemonicFail:
45.     break;
46.   }
47.  
48.   // FIXME: Ideally, we would only attempt suffix matches for things which are
49.   // valid prefixes, and we could just infer the right unambiguous
50.   // type. However, that requires substantially more matcher support than the
51.   // following hack.
52.  
53.   // Change the operand to point to a temporary token.
54.   StringRef Base = Op.getToken();
55.   SmallString<16> Tmp;
56.   Tmp += Base;
57.   Tmp += ' ';
58.   Op.setTokenValue(Tmp);
59.  
60.   // If this instruction starts with an 'f', then it is a floating point stack
61.   // instruction.  These come in up to three forms for 32-bit, 64-bit, and
62.   // 80-bit floating point, which use the suffixes s,l,t respectively.
63.   //
64.   // Otherwise, we assume that this may be an integer instruction, which comes
65.   // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
66.   const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";

In this function, the Operands passes assert(!Operands.empty() && "Unexpect empty operand list!"); And the value “Base” is initialized as “”. In the last line, the function dereferences the value in index 0 of Base. In this operation, the overloaded operator “[]” gets assertion().

 2.  
 3. StringRef(const char *Str)
 4.       : Data(Str) {
 5.         //assert(Str && "StringRef cannot be built from a NULL argument");
 6.         if (!Str)
 7.             Length = 0;
 8.         else 
 9.             Length = ::strlen(Str); // invoking strlen(NULL) is undefined behavior
10.       }
11.  
12. char operator[](size_t Index) const {
13.       assert(Index < Length && "Invalid index!");
14.       return Data[Index];
15.     }
16.  

In this file, the value “Length” is initilaized as 0 because it is empty string. So, in overloaded opertor “[]”, it produces assertion due to the equality between Index(0) and Length(0).

How to Reproduce

  1. Run “asm-cli-rust” with flag “—syntax att”
  2. Give input “” (empty string)

nyw0102 avatar Apr 09 '25 11:04 nyw0102