json-tutorial icon indicating copy to clipboard operation
json-tutorial copied to clipboard

tutorial07 中关于将数据类型字符串 转 json文本字符串的 问题

Open smalluncle opened this issue 3 years ago • 0 comments

问题:TEST_ROUNDTRIP(""\uD834\uDD1E"") 不通过。 D:\code\CPP_projects\json\json-tutorial-master\tutorial07_answer\test.c:414: expect: "\uD834\uDD1E" actual: "饾劄" 394/395 (99.75%) passed

1、在json解析器中,如这样的字符串""\uD834\uDD1E""会被解析为这种格式"\xF0\x9D\x84\x9E" 的字符串; 2、在json生成器中,将传入这种格式"\xF0\x9D\x84\x9E"的字符串去生成json字符串文本,但代码中遇到某个字节ch大于0x20时直接PUTC(c,ch)进去,这是不是有问题呢?是不是应该考虑转义成unicode。

下面是answer中未优化的代码 static void lept_stringify_string(lept_context* c, const char* s, size_t len) { size_t i; assert(s != NULL); PUTC(c, '"'); for (i = 0; i < len; i++) { unsigned char ch = (unsigned char)s[i]; switch (ch) { case '\"': PUTS(c, "\\\"", 2); break; case '\\': PUTS(c, "\\\\", 2); break; case '\b': PUTS(c, "\\b", 2); break; case '\f': PUTS(c, "\\f", 2); break; case '\n': PUTS(c, "\\n", 2); break; case '\r': PUTS(c, "\\r", 2); break; case '\t': PUTS(c, "\\t", 2); break; default: if (ch < 0x20) { char buffer[7]; sprintf(buffer, "\\u%04X", ch); PUTS(c, buffer, 6); } else PUTC(c, s[i]); } } PUTC(c, '"'); }

下面是我写的代码,对于上述问题的测试是通过了的,不知道我理解的对不对 static void hex2str(unsigned short code, char* str, int len) { str[len] = 0; for (int i = len - 1; i >= 0; i--, code >>= 4) { if ((code & 0xf) <= 9) str[i] = (code & 0xf) + '0'; else str[i] = (code & 0xf) + 'A' - 0x0a; } } `static void lept_stringify_string(lept_context* c, const char* s, size_t len) { PUTC(c,'"'); unsigned char ch = 0; unsigned short code = 0; unsigned int codePoint = 0; unsigned short highPoint = 0; unsigned short lowPoint = 0; char* str = (char*)malloc(5); for (size_t i = 0; i < len; ++i) { ch = s[i]; code = 0; codePoint = 0; highPoint = 0; lowPoint = 0;

    codePoint = 0;
    if (ch == 0) {
        code |= ch;
    }
    else if (ch >0 && ch <= 0x20) {
        switch (ch)
        {
        case '\b': PUTS(c,"\\b",2); break;
        case '\f': PUTS(c,"\\f",2); break;
        case '\n': PUTS(c,"\\n",2); break;
        case '\r': PUTS(c,"\\r",2); break;
        case '\t': PUTS(c,"\\t",2); break;
        case ' ':  PUTC(c, ' '); break;
        default:
            break;
        }
        continue;
    } else if (ch > 0x20 && ch <= 0x7f) {
        if (ch == '\"' || ch == '\\' ) {
            PUTC(c, '\\');
        }
        PUTC(c, ch);
        continue;
    }
    else if (ch >= 0xc0 && ch <= 0xdf) {
        code |= ((unsigned short)(ch & 0x1f) << 6);
        ch = s[++i];
        code |= (ch & 0b00111111);
    }
    else if (ch >= 0xe0 && ch <= 0xef) {
        code |= ((unsigned short)(ch & 0x0f) << 12);
        ch = s[++i];
        code |= ((unsigned short)(ch & 0b00111111) << 6);
        ch = s[++i];
        code |= (ch & 0b00111111);
    }
    else if (ch >= 0xf0 && ch <= 0xf7) {
        codePoint |= ((unsigned int)(ch & 0x00000111) << 18);
        ch = s[++i];
        codePoint |= ((unsigned int)(ch & 0b00111111) << 12);
        ch = s[++i];
        codePoint |= ((unsigned int)(ch & 0b00111111) << 6);
        ch = s[++i];
        codePoint |= (ch & 0b00111111);
        highPoint = (((codePoint - 0x10000) & 0xf000) / 0x400) + 0xd800;
        lowPoint = ((codePoint - 0x10000) & 0x0fff) + 0xdc00;

        PUTS(c, "\\u", 2);
        hex2str(highPoint, str, 4);
        PUTS(c, str, 4);

        PUTS(c, "\\u", 2);
        hex2str(lowPoint, str, 4);
        PUTS(c, str, 4);
        continue;
    }
    PUTS(c, "\\u", 2);
    hex2str(code, str, 4);
    PUTS(c, str, 4);
}
PUTC(c, '\"');
free(str);

} `

smalluncle avatar Jul 23 '22 06:07 smalluncle