|
|
json.c - json2tsv - JSON to TSV converter |
|
|
 |
git clone git://git.codemadness.org/json2tsv (git://git.codemadness.org) |
|
|
 |
Log |
|
|
 |
Files |
|
|
 |
Refs |
|
|
 |
README |
|
|
 |
LICENSE |
|
|
|
--- |
|
|
|
json.c (8152B) |
|
|
|
--- |
|
|
|
1 #include <errno.h> |
|
|
|
2 #include <stdint.h> |
|
|
|
3 #include <stdio.h> |
|
|
|
4 #include <stdlib.h> |
|
|
|
5 #include <string.h> |
|
|
|
6 |
|
|
|
7 #ifndef GETNEXT |
|
|
|
8 #define GETNEXT getchar_unlocked |
|
|
|
9 #endif |
|
|
|
10 |
|
|
|
11 #include "json.h" |
|
|
|
12 |
|
|
|
13 /* ctype-like macros, but always compatible with ASCII / UTF-8 */ |
|
|
|
14 #define ISDIGIT(c) (((unsigned)c) - '0' < 10) |
|
|
|
15 #define ISXDIGIT(c) ((((unsigned)c) - '0' < 10) || ((unsigned)c | 32) - 'a' < 6) |
|
|
|
16 |
|
|
|
17 static int |
|
|
|
18 codepointtoutf8(long r, char *s) |
|
|
|
19 { |
|
|
|
20 if (r == 0) { |
|
|
|
21 return 0; /* NUL byte */ |
|
|
|
22 } else if (r <= 0x7F) { |
|
|
|
23 /* 1 byte: 0aaaaaaa */ |
|
|
|
24 s[0] = r; |
|
|
|
25 return 1; |
|
|
|
26 } else if (r <= 0x07FF) { |
|
|
|
27 /* 2 bytes: 00000aaa aabbbbbb */ |
|
|
|
28 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ |
|
|
|
29 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ |
|
|
|
30 return 2; |
|
|
|
31 } else if (r <= 0xFFFF) { |
|
|
|
32 /* 3 bytes: aaaabbbb bbcccccc */ |
|
|
|
33 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ |
|
|
|
34 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ |
|
|
|
35 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ |
|
|
|
36 return 3; |
|
|
|
37 } else { |
|
|
|
38 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */ |
|
|
|
39 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ |
|
|
|
40 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ |
|
|
|
41 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ |
|
|
|
42 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ |
|
|
|
43 return 4; |
|
|
|
44 } |
|
|
|
45 } |
|
|
|
46 |
|
|
|
47 static int |
|
|
|
48 hexdigit(int c) |
|
|
|
49 { |
|
|
|
50 if (c >= '0' && c <= '9') |
|
|
|
51 return c - '0'; |
|
|
|
52 else if (c >= 'a' && c <= 'f') |
|
|
|
53 return 10 + (c - 'a'); |
|
|
|
54 else if (c >= 'A' && c <= 'F') |
|
|
|
55 return 10 + (c - 'A'); |
|
|
|
56 return 0; |
|
|
|
57 } |
|
|
|
58 |
|
|
|
59 static int |
|
|
|
60 capacity(char **value, size_t *sz, size_t cur, size_t inc) |
|
|
|
61 { |
|
|
|
62 size_t need, newsiz; |
|
|
|
63 char *newp; |
|
|
|
64 |
|
|
|
65 /* check for addition overflow */ |
|
|
|
66 if (cur > SIZE_MAX - inc) { |
|
|
|
67 errno = ENOMEM; |
|
|
|
68 return -1; |
|
|
|
69 } |
|
|
|
70 need = cur + inc; |
|
|
|
71 |
|
|
|
72 if (need > *sz) { |
|
|
|
73 if (need > SIZE_MAX / 2) { |
|
|
|
74 newsiz = SIZE_MAX; |
|
|
|
75 } else { |
|
|
|
76 for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= need; newsiz *= 2) |
|
|
|
77 ; |
|
|
|
78 } |
|
|
|
79 if (!(newp = realloc(*value, newsiz))) |
|
|
|
80 return -1; /* up to caller to free *value */ |
|
|
|
81 *value = newp; |
|
|
|
82 *sz = newsiz; |
|
|
|
83 } |
|
|
|
84 return 0; |
|
|
|
85 } |
|
|
|
86 |
|
|
|
87 #define EXPECT_VALUE "{[\"-0123456789tfn" |
|
|
|
88 #define EXPECT_STRING "\"" |
|
|
|
89 #define EXPECT_END "}]," |
|
|
|
90 #define EXPECT_OBJECT_STRING EXPECT_STRING "}" |
|
|
|
91 #define EXPECT_OBJECT_KEY ":" |
|
|
|
92 #define EXPECT_ARRAY_VALUE EXPECT_VALUE "]" |
|
|
|
93 |
|
|
|
94 #define JSON_INVALID() do { ret = JSON_ERROR_INVALID; goto end; } while (0); |
|
|
|
95 |
|
|
|
96 int |
|
|
|
97 parsejson(void (*cb)(struct json_node *, size_t, const char *, size_t)) |
|
|
|
98 { |
|
|
|
99 struct json_node nodes[JSON_MAX_NODE_DEPTH] = { { 0 } }; |
|
|
|
100 size_t depth = 0, p = 0, len, sz = 0; |
|
|
|
101 long cp, hi, lo; |
|
|
|
102 char pri[128], *str = NULL; |
|
|
|
103 int c, i, escape, iskey = 0, ret = JSON_ERROR_MEM; |
|
|
|
104 const char *expect = EXPECT_VALUE; |
|
|
|
105 |
|
|
|
106 if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1) |
|
|
|
107 goto end; |
|
|
|
108 nodes[0].name[0] = '\0'; |
|
|
|
109 |
|
|
|
110 while (1) { |
|
|
|
111 c = GETNEXT(); |
|
|
|
112 handlechr: |
|
|
|
113 if (c == EOF) |
|
|
|
114 break; |
|
|
|
115 |
|
|
|
116 /* skip JSON white-space, (NOTE: no \v, \f, \b etc) */ |
|
|
|
117 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') |
|
|
|
118 continue; |
|
|
|
119 |
|
|
|
120 if (!c || !strchr(expect, c)) |
|
|
|
121 JSON_INVALID(); |
|
|
|
122 |
|
|
|
123 switch (c) { |
|
|
|
124 case ':': |
|
|
|
125 iskey = 0; |
|
|
|
126 expect = EXPECT_VALUE; |
|
|
|
127 break; |
|
|
|
128 case '"': |
|
|
|
129 nodes[depth].type = JSON_TYPE_STRING; |
|
|
|
130 escape = 0; |
|
|
|
131 len = 0; |
|
|
|
132 while (1) { |
|
|
|
133 c = GETNEXT(); |
|
|
|
134 chr: |
|
|
|
135 /* EOF or control char: 0x7f is not defined as a control char in RFC 8259 */ |
|
|
|
136 if (c < 0x20) |
|
|
|
137 JSON_INVALID(); |
|
|
|
138 |
|
|
|
139 if (escape) { |
|
|
|
140 escchr: |
|
|
|
141 escape = 0; |
|
|
|
142 switch (c) { |
|
|
|
143 case '"': /* FALLTHROUGH */ |
|
|
|
144 case '\\': |
|
|
|
145 case '/': break; |
|
|
|
146 case 'b': c = '\b'; break; |
|
|
|
147 case 'f': c = '\f'; break; |
|
|
|
148 case 'n': c = '\n'; break; |
|
|
|
149 case 'r': c = '\r'; break; |
|
|
|
150 case 't': c = '\t'; break; |
|
|
|
151 case 'u': /* hex hex hex hex */ |
|
|
|
152 if (capacity(&str, &sz, len, 4) == -1) |
|
|
|
153 goto end; |
|
|
|
154 for (i = 12, cp = 0; i >= 0; i -= 4) { |
|
|
|
155 if ((c = GETNEXT()) == EOF || !ISXDIGIT(c)) |
|
|
|
156 JSON_INVALID(); /* invalid code point */ |
|
|
|
157 cp |= (hexdigit(c) << i); |
|
|
|
158 } |
|
|
|
159 /* RFC 8259 - 7. Strings - surrogates. |
|
|
|
160 * 0xd800 - 0xdbff - high surrogates */ |
|
|
|
161 if (cp >= 0xd800 && cp <= 0xdbff) { |
|
|
|
162 if ((c = GETNEXT()) != '\\') { |
|
|
|
163 len += codepointtoutf8(cp, &str[len]); |
|
|
|
164 goto chr; |
|
|
|
165 } |
|
|
|
166 if ((c = GETNEXT()) != 'u') { |
|
|
|
167 len += codepointtoutf8(cp, &str[len]); |
|
|
|
168 goto escchr; |
|
|
|
169 } |
|
|
|
170 for (hi = cp, i = 12, lo = 0; i >= 0; i -= 4) { |
|
|
|
171 if ((c = GETNEXT()) == EOF || !ISXDIGIT(c)) |
|
|
|
172 JSON_INVALID(); /* invalid code point */ |
|
|
|
173 lo |= (hexdigit(c) << i); |
|
|
|
174 } |
|
|
|
175 /* 0xdc00 - 0xdfff - low surrogates */ |
|
|
|
176 if (lo >= 0xdc00 && lo <= 0xdfff) { |
|
|
|
177 cp = (hi << 10) + lo - 56613888; /* - offset */ |
|
|
|
178 } else { |
|
|
|
179 /* handle graceful: raw invalid output bytes */ |
|
|
|
180 len += codepointtoutf8(hi, &str[len]); |
|
|
|
181 if (capacity(&str, &sz, len, 4) == -1) |
|
|
|
182 goto end; |
|
|
|
183 len += codepointtoutf8(lo, &str[len]); |
|
|
|
184 continue; |
|
|
|
185 } |
|
|
|
186 } |
|
|
|
187 len += codepointtoutf8(cp, &str[len]); |
|
|
|
188 continue; |
|
|
|
189 default: |
|
|
|
190 JSON_INVALID(); /* invalid escape char */ |
|
|
|
191 } |
|
|
|
192 if (capacity(&str, &sz, len, 1) == -1) |
|
|
|
193 goto end; |
|
|
|
194 str[len++] = c; |
|
|
|
195 } else if (c == '\\') { |
|
|
|
196 escape = 1; |
|
|
|
197 } else if (c == '"') { |
|
|
|
198 if (capacity(&str, &sz, len, 1) == -1) |
|
|
|
199 goto end; |
|
|
|
200 str[len++] = '\0'; |
|
|
|
201 |
|
|
|
202 if (iskey) { |
|
|
|
203 /* copy string as key, including NUL byte */ |
|
|
|
204 if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), len, 1) == -1) |
|
|
|
205 goto end; |
|
|
|
206 memcpy(nodes[depth].name, str, len); |
|
|
|
207 } else { |
|
|
|
208 cb(nodes, depth + 1, str, len - 1); /* length excluding NUL byte */ |
|
|
|
209 } |
|
|
|
210 break; |
|
|
|
211 } else { |
|
|
|
212 if (capacity(&str, &sz, len, 1) == -1) |
|
|
|
213 goto end; |
|
|
|
214 str[len++] = c; |
|
|
|
215 } |
|
|
|
216 } |
|
|
|
217 if (iskey) |
|
|
|
218 expect = EXPECT_OBJECT_KEY; |
|
|
|
219 else |
|
|
|
220 expect = EXPECT_END; |
|
|
|
221 break; |
|
|
|
222 case '[': |
|
|
|
223 case '{': |
|
|
|
224 if (depth + 1 >= JSON_MAX_NODE_DEPTH) |
|
|
|
225 JSON_INVALID(); /* too deep */ |
|
|
|
226 |
|
|
|
227 nodes[depth].index = 0; |
|
|
|
228 if (c == '[') { |
|
|
|
229 nodes[depth].type = JSON_TYPE_ARRAY; |
|
|
|
230 expect = EXPECT_ARRAY_VALUE; |
|
|
|
231 } else if (c == '{') { |
|
|
|
232 iskey = 1; |
|
|
|
233 nodes[depth].type = JSON_TYPE_OBJECT; |
|
|
|
234 expect = EXPECT_OBJECT_STRING; |
|
|
|
235 } |
|
|
|
236 |
|
|
|
237 cb(nodes, depth + 1, "", 0); |
|
|
|
238 |
|
|
|
239 depth++; |
|
|
|
240 nodes[depth].index = 0; |
|
|
|
241 if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), 0, 1) == -1) |
|
|
|
242 goto end; |
|
|
|
243 nodes[depth].name[0] = '\0'; |
|
|
|
244 break; |
|
|
|
245 case ']': |
|
|
|
246 case '}': |
|
|
|
247 if (!depth || |
|
|
|
248 (c == ']' && nodes[depth - 1].type != JSON_TYPE_ARRAY) || |
|
|
|
249 (c == '}' && nodes[depth - 1].type != JSON_TYPE_OBJECT)) |
|
|
|
250 JSON_INVALID(); /* unbalanced nodes */ |
|
|
|
251 |
|
|
|
252 depth--; |
|
|
|
253 nodes[depth].index++; |
|
|
|
254 expect = EXPECT_END; |
|
|
|
255 break; |
|
|
|
256 case ',': |
|
|
|
257 if (!depth) |
|
|
|
258 JSON_INVALID(); /* unbalanced nodes */ |
|
|
|
259 |
|
|
|
260 nodes[depth - 1].index++; |
|
|
|
261 if (nodes[depth - 1].type == JSON_TYPE_OBJECT) { |
|
|
|
262 iskey = 1; |
|
|
|
263 expect = EXPECT_STRING; |
|
|
|
264 } else { |
|
|
|
265 iskey = 0; |
|
|
|
266 expect = EXPECT_VALUE; |
|
|
|
267 } |
|
|
|
268 break; |
|
|
|
269 case 't': /* true */ |
|
|
|
270 if (GETNEXT() != 'r' || GETNEXT() != 'u' || GETNEXT() != 'e') |
|
|
|
271 JSON_INVALID(); |
|
|
|
272 nodes[depth].type = JSON_TYPE_BOOL; |
|
|
|
273 cb(nodes, depth + 1, "true", 4); |
|
|
|
274 expect = EXPECT_END; |
|
|
|
275 break; |
|
|
|
276 case 'f': /* false */ |
|
|
|
277 if (GETNEXT() != 'a' || GETNEXT() != 'l' || GETNEXT() != 's' || |
|
|
|
278 GETNEXT() != 'e') |
|
|
|
279 JSON_INVALID(); |
|
|
|
280 nodes[depth].type = JSON_TYPE_BOOL; |
|
|
|
281 cb(nodes, depth + 1, "false", 5); |
|
|
|
282 expect = EXPECT_END; |
|
|
|
283 break; |
|
|
|
284 case 'n': /* null */ |
|
|
|
285 if (GETNEXT() != 'u' || GETNEXT() != 'l' || GETNEXT() != 'l') |
|
|
|
286 JSON_INVALID(); |
|
|
|
287 nodes[depth].type = JSON_TYPE_NULL; |
|
|
|
288 cb(nodes, depth + 1, "null", 4); |
|
|
|
289 expect = EXPECT_END; |
|
|
|
290 break; |
|
|
|
291 default: /* number */ |
|
|
|
292 nodes[depth].type = JSON_TYPE_NUMBER; |
|
|
|
293 p = 0; |
|
|
|
294 pri[p++] = c; |
|
|
|
295 expect = EXPECT_END; |
|
|
|
296 while (1) { |
|
|
|
297 c = GETNEXT(); |
|
|
|
298 if (c == EOF || |
|
|
|
299 (!ISDIGIT(c) && c != 'e' && c != 'E' && |
|
|
|
300 c != '+' && c != '-' && c != '.') || |
|
|
|
301 p + 1 >= sizeof(pri)) { |
|
|
|
302 pri[p] = '\0'; |
|
|
|
303 cb(nodes, depth + 1, pri, p); |
|
|
|
304 goto handlechr; /* do not read next char, handle this */ |
|
|
|
305 } else { |
|
|
|
306 pri[p++] = c; |
|
|
|
307 } |
|
|
|
308 } |
|
|
|
309 } |
|
|
|
310 } |
|
|
|
311 if (depth) |
|
|
|
312 JSON_INVALID(); /* unbalanced nodes */ |
|
|
|
313 |
|
|
|
314 ret = 0; /* success */ |
|
|
|
315 end: |
|
|
|
316 for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth++) |
|
|
|
317 free(nodes[depth].name); |
|
|
|
318 free(str); |
|
|
|
319 |
|
|
|
320 return ret; |
|
|
|
321 } |
|