eval/decode: Fix surrogate pairs processing

This commit is contained in:
ZyX
2016-02-03 21:46:01 +03:00
parent ea82270d30
commit 5814e29cdb
4 changed files with 51 additions and 17 deletions

View File

@@ -340,12 +340,12 @@ int json_decode_string(const char *const buf, const size_t len,
goto json_decode_string_fail;
}
char *str = xmalloc(len + 1);
uint16_t fst_in_pair = 0;
int fst_in_pair = 0;
char *str_end = str;
for (const char *t = s; t < p; t++) {
if (t[0] != '\\' || t[1] != 'u') {
if (fst_in_pair != 0) {
str_end += utf_char2bytes((int) fst_in_pair, (char_u *) str_end);
str_end += utf_char2bytes(fst_in_pair, (char_u *) str_end);
fst_in_pair = 0;
}
}
@@ -353,20 +353,21 @@ int json_decode_string(const char *const buf, const size_t len,
t++;
switch (*t) {
case 'u': {
char ubuf[] = { t[1], t[2], t[3], t[4], 0 };
const char ubuf[] = { t[1], t[2], t[3], t[4], 0 };
t += 4;
unsigned long ch;
vim_str2nr((char_u *) ubuf, NULL, NULL, 0, 0, 2, NULL, &ch);
if (0xD800UL <= ch && ch <= 0xDB7FUL) {
fst_in_pair = (uint16_t) ch;
} else if (0xDC00ULL <= ch && ch <= 0xDB7FUL) {
if (fst_in_pair != 0) {
int full_char = (
(int) (ch - 0xDC00UL)
+ (((int) (fst_in_pair - 0xD800)) << 10)
);
str_end += utf_char2bytes(full_char, (char_u *) str_end);
}
if (SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) {
fst_in_pair = (int) ch;
} else if (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END
&& fst_in_pair != 0) {
const int full_char = (
(int) (ch - SURROGATE_LO_START)
+ ((fst_in_pair - SURROGATE_HI_START) << 10)
+ SURROGATE_FIRST_CHAR
);
str_end += utf_char2bytes(full_char, (char_u *) str_end);
fst_in_pair = 0;
} else {
str_end += utf_char2bytes((int) ch, (char_u *) str_end);
}

View File

@@ -970,7 +970,7 @@ static inline int convert_to_json_string(garray_T *const gap,
default: {
if (vim_isprintc(ch)) {
ga_concat_len(gap, buf + i, shift);
} else if (ch <= 0xFFFF) {
} else if (ch < SURROGATE_FIRST_CHAR) {
ga_concat_len(gap, ((const char []) {
'\\', 'u',
xdigits[(ch >> (4 * 3)) & 0xF],
@@ -979,9 +979,9 @@ static inline int convert_to_json_string(garray_T *const gap,
xdigits[(ch >> (4 * 0)) & 0xF],
}), sizeof("\\u1234") - 1);
} else {
uint32_t tmp = (uint32_t) ch - 0x010000;
uint16_t hi = 0xD800 + ((tmp >> 10) & 0x03FF);
uint16_t lo = 0xDC00 + ((tmp >> 0) & 0x03FF);
uint32_t tmp = (uint32_t) ch - SURROGATE_FIRST_CHAR;
uint16_t hi = SURROGATE_HI_START + ((tmp >> 10) & ((1 << 10) - 1));
uint16_t lo = SURROGATE_LO_END + ((tmp >> 0) & ((1 << 10) - 1));
ga_concat_len(gap, ((const char []) {
'\\', 'u',
xdigits[(hi >> (4 * 3)) & 0xF],

View File

@@ -54,6 +54,21 @@ static inline ListReaderState encode_init_lrstate(const list_T *const list)
/// Array mapping values from SpecialVarValue enum to names
extern const char *const encode_special_var_names[];
/// First codepoint in high surrogates block
#define SURROGATE_HI_START 0xD800
/// Last codepoint in high surrogates block
#define SURROGATE_HI_END 0xDBFF
/// First codepoint in low surrogates block
#define SURROGATE_LO_START 0xDC00
/// Last codepoint in low surrogates block
#define SURROGATE_LO_END 0xDFFF
/// First character that needs to be encoded as surrogate pair
#define SURROGATE_FIRST_CHAR 0x10000
#ifdef INCLUDE_GENERATED_DECLARATIONS
# include "eval/encode.h.generated.h"
#endif