tutorial07 中关于将数据类型字符串 转 json文本字符串的 问题
问题:TEST_ROUNDTRIP(""\uD834\uDD1E"") 不通过。 D:\code\CPP_projects\json\json-tutorial-master\tutorial07_answer\test.c:414: expect: "\uD834\uDD1E" actual: "饾劄" 394/395 (99.75%) passed
1、在json解析器中,如这样的字符串""\uD834\uDD1E""会被解析为这种格式"\xF0\x9D\x84\x9E" 的字符串; 2、在json生成器中,将传入这种格式"\xF0\x9D\x84\x9E"的字符串去生成json字符串文本,但代码中遇到某个字节ch大于0x20时直接PUTC(c,ch)进去,这是不是有问题呢?是不是应该考虑转义成unicode。
下面是answer中未优化的代码
static void lept_stringify_string(lept_context* c, const char* s, size_t len) { size_t i; assert(s != NULL); PUTC(c, '"'); for (i = 0; i < len; i++) { unsigned char ch = (unsigned char)s[i]; switch (ch) { case '\"': PUTS(c, "\\\"", 2); break; case '\\': PUTS(c, "\\\\", 2); break; case '\b': PUTS(c, "\\b", 2); break; case '\f': PUTS(c, "\\f", 2); break; case '\n': PUTS(c, "\\n", 2); break; case '\r': PUTS(c, "\\r", 2); break; case '\t': PUTS(c, "\\t", 2); break; default: if (ch < 0x20) { char buffer[7]; sprintf(buffer, "\\u%04X", ch); PUTS(c, buffer, 6); } else PUTC(c, s[i]); } } PUTC(c, '"'); }
下面是我写的代码,对于上述问题的测试是通过了的,不知道我理解的对不对
static void hex2str(unsigned short code, char* str, int len) { str[len] = 0; for (int i = len - 1; i >= 0; i--, code >>= 4) { if ((code & 0xf) <= 9) str[i] = (code & 0xf) + '0'; else str[i] = (code & 0xf) + 'A' - 0x0a; } }
`static void lept_stringify_string(lept_context* c, const char* s, size_t len) {
PUTC(c,'"');
unsigned char ch = 0;
unsigned short code = 0;
unsigned int codePoint = 0;
unsigned short highPoint = 0;
unsigned short lowPoint = 0;
char* str = (char*)malloc(5);
for (size_t i = 0; i < len; ++i) {
ch = s[i];
code = 0;
codePoint = 0;
highPoint = 0;
lowPoint = 0;
codePoint = 0;
if (ch == 0) {
code |= ch;
}
else if (ch >0 && ch <= 0x20) {
switch (ch)
{
case '\b': PUTS(c,"\\b",2); break;
case '\f': PUTS(c,"\\f",2); break;
case '\n': PUTS(c,"\\n",2); break;
case '\r': PUTS(c,"\\r",2); break;
case '\t': PUTS(c,"\\t",2); break;
case ' ': PUTC(c, ' '); break;
default:
break;
}
continue;
} else if (ch > 0x20 && ch <= 0x7f) {
if (ch == '\"' || ch == '\\' ) {
PUTC(c, '\\');
}
PUTC(c, ch);
continue;
}
else if (ch >= 0xc0 && ch <= 0xdf) {
code |= ((unsigned short)(ch & 0x1f) << 6);
ch = s[++i];
code |= (ch & 0b00111111);
}
else if (ch >= 0xe0 && ch <= 0xef) {
code |= ((unsigned short)(ch & 0x0f) << 12);
ch = s[++i];
code |= ((unsigned short)(ch & 0b00111111) << 6);
ch = s[++i];
code |= (ch & 0b00111111);
}
else if (ch >= 0xf0 && ch <= 0xf7) {
codePoint |= ((unsigned int)(ch & 0x00000111) << 18);
ch = s[++i];
codePoint |= ((unsigned int)(ch & 0b00111111) << 12);
ch = s[++i];
codePoint |= ((unsigned int)(ch & 0b00111111) << 6);
ch = s[++i];
codePoint |= (ch & 0b00111111);
highPoint = (((codePoint - 0x10000) & 0xf000) / 0x400) + 0xd800;
lowPoint = ((codePoint - 0x10000) & 0x0fff) + 0xdc00;
PUTS(c, "\\u", 2);
hex2str(highPoint, str, 4);
PUTS(c, str, 4);
PUTS(c, "\\u", 2);
hex2str(lowPoint, str, 4);
PUTS(c, str, 4);
continue;
}
PUTS(c, "\\u", 2);
hex2str(code, str, 4);
PUTS(c, str, 4);
}
PUTC(c, '\"');
free(str);
} `