]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org> | |
3 | * | |
4 | * Jansson is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the MIT license. See LICENSE for details. | |
6 | */ | |
7 | ||
8 | #ifndef _GNU_SOURCE | |
9 | #define _GNU_SOURCE | |
10 | #endif | |
11 | ||
12 | #include "jansson_private.h" | |
13 | ||
14 | #include <errno.h> | |
15 | #include <limits.h> | |
16 | #include <stdio.h> | |
17 | #include <stdlib.h> | |
18 | #include <string.h> | |
19 | #include <assert.h> | |
20 | #ifdef HAVE_UNISTD_H | |
21 | #include <unistd.h> | |
22 | #endif | |
23 | ||
24 | #include "jansson.h" | |
25 | #include "strbuffer.h" | |
26 | #include "utf.h" | |
27 | ||
28 | #define STREAM_STATE_OK 0 | |
29 | #define STREAM_STATE_EOF -1 | |
30 | #define STREAM_STATE_ERROR -2 | |
31 | ||
32 | #define TOKEN_INVALID -1 | |
33 | #define TOKEN_EOF 0 | |
34 | #define TOKEN_STRING 256 | |
35 | #define TOKEN_INTEGER 257 | |
36 | #define TOKEN_REAL 258 | |
37 | #define TOKEN_TRUE 259 | |
38 | #define TOKEN_FALSE 260 | |
39 | #define TOKEN_NULL 261 | |
40 | ||
41 | /* Locale independent versions of isxxx() functions */ | |
42 | #define l_isupper(c) ('A' <= (c) && (c) <= 'Z') | |
43 | #define l_islower(c) ('a' <= (c) && (c) <= 'z') | |
44 | #define l_isalpha(c) (l_isupper(c) || l_islower(c)) | |
45 | #define l_isdigit(c) ('0' <= (c) && (c) <= '9') | |
46 | #define l_isxdigit(c) \ | |
47 | (l_isdigit(c) || ('A' <= (c) && (c) <= 'F') || ('a' <= (c) && (c) <= 'f')) | |
48 | ||
49 | /* Read one byte from stream, convert to unsigned char, then int, and | |
50 | return. return EOF on end of file. This corresponds to the | |
51 | behaviour of fgetc(). */ | |
52 | typedef int (*get_func)(void *data); | |
53 | ||
54 | typedef struct { | |
55 | get_func get; | |
56 | void *data; | |
57 | char buffer[5]; | |
58 | size_t buffer_pos; | |
59 | int state; | |
60 | int line; | |
61 | int column, last_column; | |
62 | size_t position; | |
63 | } stream_t; | |
64 | ||
65 | typedef struct { | |
66 | stream_t stream; | |
67 | strbuffer_t saved_text; | |
68 | size_t flags; | |
69 | size_t depth; | |
70 | int token; | |
71 | union { | |
72 | struct { | |
73 | char *val; | |
74 | size_t len; | |
75 | } string; | |
76 | json_int_t integer; | |
77 | double real; | |
78 | } value; | |
79 | } lex_t; | |
80 | ||
81 | #define stream_to_lex(stream) container_of(stream, lex_t, stream) | |
82 | ||
83 | ||
84 | /*** error reporting ***/ | |
85 | ||
86 | static void error_set(json_error_t *error, const lex_t *lex, | |
87 | enum json_error_code code, | |
88 | const char *msg, ...) | |
89 | { | |
90 | va_list ap; | |
91 | char msg_text[JSON_ERROR_TEXT_LENGTH]; | |
92 | char msg_with_context[JSON_ERROR_TEXT_LENGTH + 28]; | |
93 | ||
94 | int line = -1, col = -1; | |
95 | size_t pos = 0; | |
96 | const char *result = msg_text; | |
97 | ||
98 | if(!error) | |
99 | return; | |
100 | ||
101 | va_start(ap, msg); | |
102 | vsnprintf(msg_text, JSON_ERROR_TEXT_LENGTH, msg, ap); | |
103 | msg_text[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; | |
104 | va_end(ap); | |
105 | ||
106 | if(lex) | |
107 | { | |
108 | const char *saved_text = strbuffer_value(&lex->saved_text); | |
109 | ||
110 | line = lex->stream.line; | |
111 | col = lex->stream.column; | |
112 | pos = lex->stream.position; | |
113 | ||
114 | if(saved_text && saved_text[0]) | |
115 | { | |
116 | if(lex->saved_text.length <= 20) { | |
117 | snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH + 28, | |
118 | "%s near '%s'", msg_text, saved_text); | |
119 | msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; | |
120 | result = msg_with_context; | |
121 | } | |
122 | } | |
123 | else | |
124 | { | |
125 | if(code == json_error_invalid_syntax) { | |
126 | /* More specific error code for premature end of file. */ | |
127 | code = json_error_premature_end_of_input; | |
128 | } | |
129 | if(lex->stream.state == STREAM_STATE_ERROR) { | |
130 | /* No context for UTF-8 decoding errors */ | |
131 | result = msg_text; | |
132 | } | |
133 | else { | |
134 | snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH + 17, | |
135 | "%s near end of file", msg_text); | |
136 | msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0'; | |
137 | result = msg_with_context; | |
138 | } | |
139 | } | |
140 | } | |
141 | ||
142 | jsonp_error_set(error, line, col, pos, code, "%s", result); | |
143 | } | |
144 | ||
145 | ||
146 | /*** lexical analyzer ***/ | |
147 | ||
148 | static void | |
149 | stream_init(stream_t *stream, get_func get, void *data) | |
150 | { | |
151 | stream->get = get; | |
152 | stream->data = data; | |
153 | stream->buffer[0] = '\0'; | |
154 | stream->buffer_pos = 0; | |
155 | ||
156 | stream->state = STREAM_STATE_OK; | |
157 | stream->line = 1; | |
158 | stream->column = 0; | |
159 | stream->position = 0; | |
160 | } | |
161 | ||
162 | static int stream_get(stream_t *stream, json_error_t *error) | |
163 | { | |
164 | int c; | |
165 | ||
166 | if(stream->state != STREAM_STATE_OK) | |
167 | return stream->state; | |
168 | ||
169 | if(!stream->buffer[stream->buffer_pos]) | |
170 | { | |
171 | c = stream->get(stream->data); | |
172 | if(c == EOF) { | |
173 | stream->state = STREAM_STATE_EOF; | |
174 | return STREAM_STATE_EOF; | |
175 | } | |
176 | ||
177 | stream->buffer[0] = c; | |
178 | stream->buffer_pos = 0; | |
179 | ||
180 | if(0x80 <= c && c <= 0xFF) | |
181 | { | |
182 | /* multi-byte UTF-8 sequence */ | |
183 | size_t i, count; | |
184 | ||
185 | count = utf8_check_first(c); | |
186 | if(!count) | |
187 | goto out; | |
188 | ||
189 | assert(count >= 2); | |
190 | ||
191 | for(i = 1; i < count; i++) | |
192 | stream->buffer[i] = stream->get(stream->data); | |
193 | ||
194 | if(!utf8_check_full(stream->buffer, count, NULL)) | |
195 | goto out; | |
196 | ||
197 | stream->buffer[count] = '\0'; | |
198 | } | |
199 | else | |
200 | stream->buffer[1] = '\0'; | |
201 | } | |
202 | ||
203 | c = stream->buffer[stream->buffer_pos++]; | |
204 | ||
205 | stream->position++; | |
206 | if(c == '\n') { | |
207 | stream->line++; | |
208 | stream->last_column = stream->column; | |
209 | stream->column = 0; | |
210 | } | |
211 | else if(utf8_check_first(c)) { | |
212 | /* track the Unicode character column, so increment only if | |
213 | this is the first character of a UTF-8 sequence */ | |
214 | stream->column++; | |
215 | } | |
216 | ||
217 | return c; | |
218 | ||
219 | out: | |
220 | stream->state = STREAM_STATE_ERROR; | |
221 | error_set(error, stream_to_lex(stream), json_error_invalid_utf8, "unable to decode byte 0x%x", c); | |
222 | return STREAM_STATE_ERROR; | |
223 | } | |
224 | ||
225 | static void stream_unget(stream_t *stream, int c) | |
226 | { | |
227 | if(c == STREAM_STATE_EOF || c == STREAM_STATE_ERROR) | |
228 | return; | |
229 | ||
230 | stream->position--; | |
231 | if(c == '\n') { | |
232 | stream->line--; | |
233 | stream->column = stream->last_column; | |
234 | } | |
235 | else if(utf8_check_first(c)) | |
236 | stream->column--; | |
237 | ||
238 | assert(stream->buffer_pos > 0); | |
239 | stream->buffer_pos--; | |
240 | assert(stream->buffer[stream->buffer_pos] == c); | |
241 | } | |
242 | ||
243 | ||
244 | static int lex_get(lex_t *lex, json_error_t *error) | |
245 | { | |
246 | return stream_get(&lex->stream, error); | |
247 | } | |
248 | ||
249 | static void lex_save(lex_t *lex, int c) | |
250 | { | |
251 | strbuffer_append_byte(&lex->saved_text, c); | |
252 | } | |
253 | ||
254 | static int lex_get_save(lex_t *lex, json_error_t *error) | |
255 | { | |
256 | int c = stream_get(&lex->stream, error); | |
257 | if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) | |
258 | lex_save(lex, c); | |
259 | return c; | |
260 | } | |
261 | ||
262 | static void lex_unget(lex_t *lex, int c) | |
263 | { | |
264 | stream_unget(&lex->stream, c); | |
265 | } | |
266 | ||
267 | static void lex_unget_unsave(lex_t *lex, int c) | |
268 | { | |
269 | if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) { | |
270 | /* Since we treat warnings as errors, when assertions are turned | |
271 | * off the "d" variable would be set but never used. Which is | |
272 | * treated as an error by GCC. | |
273 | */ | |
274 | #ifndef NDEBUG | |
275 | char d; | |
276 | #endif | |
277 | stream_unget(&lex->stream, c); | |
278 | #ifndef NDEBUG | |
279 | d = | |
280 | #endif | |
281 | strbuffer_pop(&lex->saved_text); | |
282 | assert(c == d); | |
283 | } | |
284 | } | |
285 | ||
286 | static void lex_save_cached(lex_t *lex) | |
287 | { | |
288 | while(lex->stream.buffer[lex->stream.buffer_pos] != '\0') | |
289 | { | |
290 | lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]); | |
291 | lex->stream.buffer_pos++; | |
292 | lex->stream.position++; | |
293 | } | |
294 | } | |
295 | ||
296 | static void lex_free_string(lex_t *lex) | |
297 | { | |
298 | jsonp_free(lex->value.string.val); | |
299 | lex->value.string.val = NULL; | |
300 | lex->value.string.len = 0; | |
301 | } | |
302 | ||
303 | /* assumes that str points to 'u' plus at least 4 valid hex digits */ | |
304 | static int32_t decode_unicode_escape(const char *str) | |
305 | { | |
306 | int i; | |
307 | int32_t value = 0; | |
308 | ||
309 | assert(str[0] == 'u'); | |
310 | ||
311 | for(i = 1; i <= 4; i++) { | |
312 | char c = str[i]; | |
313 | value <<= 4; | |
314 | if(l_isdigit(c)) | |
315 | value += c - '0'; | |
316 | else if(l_islower(c)) | |
317 | value += c - 'a' + 10; | |
318 | else if(l_isupper(c)) | |
319 | value += c - 'A' + 10; | |
320 | else | |
321 | return -1; | |
322 | } | |
323 | ||
324 | return value; | |
325 | } | |
326 | ||
327 | static void lex_scan_string(lex_t *lex, json_error_t *error) | |
328 | { | |
329 | int c; | |
330 | const char *p; | |
331 | char *t; | |
332 | int i; | |
333 | ||
334 | lex->value.string.val = NULL; | |
335 | lex->token = TOKEN_INVALID; | |
336 | ||
337 | c = lex_get_save(lex, error); | |
338 | ||
339 | while(c != '"') { | |
340 | if(c == STREAM_STATE_ERROR) | |
341 | goto out; | |
342 | ||
343 | else if(c == STREAM_STATE_EOF) { | |
344 | error_set(error, lex, json_error_premature_end_of_input, "premature end of input"); | |
345 | goto out; | |
346 | } | |
347 | ||
348 | else if(0 <= c && c <= 0x1F) { | |
349 | /* control character */ | |
350 | lex_unget_unsave(lex, c); | |
351 | if(c == '\n') | |
352 | error_set(error, lex, json_error_invalid_syntax, "unexpected newline"); | |
353 | else | |
354 | error_set(error, lex, json_error_invalid_syntax, "control character 0x%x", c); | |
355 | goto out; | |
356 | } | |
357 | ||
358 | else if(c == '\\') { | |
359 | c = lex_get_save(lex, error); | |
360 | if(c == 'u') { | |
361 | c = lex_get_save(lex, error); | |
362 | for(i = 0; i < 4; i++) { | |
363 | if(!l_isxdigit(c)) { | |
364 | error_set(error, lex, json_error_invalid_syntax, "invalid escape"); | |
365 | goto out; | |
366 | } | |
367 | c = lex_get_save(lex, error); | |
368 | } | |
369 | } | |
370 | else if(c == '"' || c == '\\' || c == '/' || c == 'b' || | |
371 | c == 'f' || c == 'n' || c == 'r' || c == 't') | |
372 | c = lex_get_save(lex, error); | |
373 | else { | |
374 | error_set(error, lex, json_error_invalid_syntax, "invalid escape"); | |
375 | goto out; | |
376 | } | |
377 | } | |
378 | else | |
379 | c = lex_get_save(lex, error); | |
380 | } | |
381 | ||
382 | /* the actual value is at most of the same length as the source | |
383 | string, because: | |
384 | - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte | |
385 | - a single \uXXXX escape (length 6) is converted to at most 3 bytes | |
386 | - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair | |
387 | are converted to 4 bytes | |
388 | */ | |
389 | t = jsonp_malloc(lex->saved_text.length + 1); | |
390 | if(!t) { | |
391 | /* this is not very nice, since TOKEN_INVALID is returned */ | |
392 | goto out; | |
393 | } | |
394 | lex->value.string.val = t; | |
395 | ||
396 | /* + 1 to skip the " */ | |
397 | p = strbuffer_value(&lex->saved_text) + 1; | |
398 | ||
399 | while(*p != '"') { | |
400 | if(*p == '\\') { | |
401 | p++; | |
402 | if(*p == 'u') { | |
403 | size_t length; | |
404 | int32_t value; | |
405 | ||
406 | value = decode_unicode_escape(p); | |
407 | if(value < 0) { | |
408 | error_set(error, lex, json_error_invalid_syntax, "invalid Unicode escape '%.6s'", p - 1); | |
409 | goto out; | |
410 | } | |
411 | p += 5; | |
412 | ||
413 | if(0xD800 <= value && value <= 0xDBFF) { | |
414 | /* surrogate pair */ | |
415 | if(*p == '\\' && *(p + 1) == 'u') { | |
416 | int32_t value2 = decode_unicode_escape(++p); | |
417 | if(value2 < 0) { | |
418 | error_set(error, lex, json_error_invalid_syntax, "invalid Unicode escape '%.6s'", p - 1); | |
419 | goto out; | |
420 | } | |
421 | p += 5; | |
422 | ||
423 | if(0xDC00 <= value2 && value2 <= 0xDFFF) { | |
424 | /* valid second surrogate */ | |
425 | value = | |
426 | ((value - 0xD800) << 10) + | |
427 | (value2 - 0xDC00) + | |
428 | 0x10000; | |
429 | } | |
430 | else { | |
431 | /* invalid second surrogate */ | |
432 | error_set(error, lex, | |
433 | json_error_invalid_syntax, | |
434 | "invalid Unicode '\\u%04X\\u%04X'", | |
435 | value, value2); | |
436 | goto out; | |
437 | } | |
438 | } | |
439 | else { | |
440 | /* no second surrogate */ | |
441 | error_set(error, lex, json_error_invalid_syntax, "invalid Unicode '\\u%04X'", | |
442 | value); | |
443 | goto out; | |
444 | } | |
445 | } | |
446 | else if(0xDC00 <= value && value <= 0xDFFF) { | |
447 | error_set(error, lex, json_error_invalid_syntax, "invalid Unicode '\\u%04X'", value); | |
448 | goto out; | |
449 | } | |
450 | ||
451 | if(utf8_encode(value, t, &length)) | |
452 | assert(0); | |
453 | t += length; | |
454 | } | |
455 | else { | |
456 | switch(*p) { | |
457 | case '"': case '\\': case '/': | |
458 | *t = *p; break; | |
459 | case 'b': *t = '\b'; break; | |
460 | case 'f': *t = '\f'; break; | |
461 | case 'n': *t = '\n'; break; | |
462 | case 'r': *t = '\r'; break; | |
463 | case 't': *t = '\t'; break; | |
464 | default: assert(0); | |
465 | } | |
466 | t++; | |
467 | p++; | |
468 | } | |
469 | } | |
470 | else | |
471 | *(t++) = *(p++); | |
472 | } | |
473 | *t = '\0'; | |
474 | lex->value.string.len = t - lex->value.string.val; | |
475 | lex->token = TOKEN_STRING; | |
476 | return; | |
477 | ||
478 | out: | |
479 | lex_free_string(lex); | |
480 | } | |
481 | ||
482 | #ifndef JANSSON_USING_CMAKE /* disabled if using cmake */ | |
483 | #if JSON_INTEGER_IS_LONG_LONG | |
484 | #ifdef _MSC_VER /* Microsoft Visual Studio */ | |
485 | #define json_strtoint _strtoi64 | |
486 | #else | |
487 | #define json_strtoint strtoll | |
488 | #endif | |
489 | #else | |
490 | #define json_strtoint strtol | |
491 | #endif | |
492 | #endif | |
493 | ||
494 | static int lex_scan_number(lex_t *lex, int c, json_error_t *error) | |
495 | { | |
496 | const char *saved_text; | |
497 | char *end; | |
498 | double doubleval; | |
499 | ||
500 | lex->token = TOKEN_INVALID; | |
501 | ||
502 | if(c == '-') | |
503 | c = lex_get_save(lex, error); | |
504 | ||
505 | if(c == '0') { | |
506 | c = lex_get_save(lex, error); | |
507 | if(l_isdigit(c)) { | |
508 | lex_unget_unsave(lex, c); | |
509 | goto out; | |
510 | } | |
511 | } | |
512 | else if(l_isdigit(c)) { | |
513 | do | |
514 | c = lex_get_save(lex, error); | |
515 | while(l_isdigit(c)); | |
516 | } | |
517 | else { | |
518 | lex_unget_unsave(lex, c); | |
519 | goto out; | |
520 | } | |
521 | ||
522 | if(!(lex->flags & JSON_DECODE_INT_AS_REAL) && | |
523 | c != '.' && c != 'E' && c != 'e') | |
524 | { | |
525 | json_int_t intval; | |
526 | ||
527 | lex_unget_unsave(lex, c); | |
528 | ||
529 | saved_text = strbuffer_value(&lex->saved_text); | |
530 | ||
531 | errno = 0; | |
532 | intval = json_strtoint(saved_text, &end, 10); | |
533 | if(errno == ERANGE) { | |
534 | if(intval < 0) | |
535 | error_set(error, lex, json_error_numeric_overflow, "too big negative integer"); | |
536 | else | |
537 | error_set(error, lex, json_error_numeric_overflow, "too big integer"); | |
538 | goto out; | |
539 | } | |
540 | ||
541 | assert(end == saved_text + lex->saved_text.length); | |
542 | ||
543 | lex->token = TOKEN_INTEGER; | |
544 | lex->value.integer = intval; | |
545 | return 0; | |
546 | } | |
547 | ||
548 | if(c == '.') { | |
549 | c = lex_get(lex, error); | |
550 | if(!l_isdigit(c)) { | |
551 | lex_unget(lex, c); | |
552 | goto out; | |
553 | } | |
554 | lex_save(lex, c); | |
555 | ||
556 | do | |
557 | c = lex_get_save(lex, error); | |
558 | while(l_isdigit(c)); | |
559 | } | |
560 | ||
561 | if(c == 'E' || c == 'e') { | |
562 | c = lex_get_save(lex, error); | |
563 | if(c == '+' || c == '-') | |
564 | c = lex_get_save(lex, error); | |
565 | ||
566 | if(!l_isdigit(c)) { | |
567 | lex_unget_unsave(lex, c); | |
568 | goto out; | |
569 | } | |
570 | ||
571 | do | |
572 | c = lex_get_save(lex, error); | |
573 | while(l_isdigit(c)); | |
574 | } | |
575 | ||
576 | lex_unget_unsave(lex, c); | |
577 | ||
578 | if(jsonp_strtod(&lex->saved_text, &doubleval)) { | |
579 | error_set(error, lex, json_error_numeric_overflow, "real number overflow"); | |
580 | goto out; | |
581 | } | |
582 | ||
583 | lex->token = TOKEN_REAL; | |
584 | lex->value.real = doubleval; | |
585 | return 0; | |
586 | ||
587 | out: | |
588 | return -1; | |
589 | } | |
590 | ||
591 | static int lex_scan(lex_t *lex, json_error_t *error) | |
592 | { | |
593 | int c; | |
594 | ||
595 | strbuffer_clear(&lex->saved_text); | |
596 | ||
597 | if(lex->token == TOKEN_STRING) | |
598 | lex_free_string(lex); | |
599 | ||
600 | do | |
601 | c = lex_get(lex, error); | |
602 | while(c == ' ' || c == '\t' || c == '\n' || c == '\r'); | |
603 | ||
604 | if(c == STREAM_STATE_EOF) { | |
605 | lex->token = TOKEN_EOF; | |
606 | goto out; | |
607 | } | |
608 | ||
609 | if(c == STREAM_STATE_ERROR) { | |
610 | lex->token = TOKEN_INVALID; | |
611 | goto out; | |
612 | } | |
613 | ||
614 | lex_save(lex, c); | |
615 | ||
616 | if(c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',') | |
617 | lex->token = c; | |
618 | ||
619 | else if(c == '"') | |
620 | lex_scan_string(lex, error); | |
621 | ||
622 | else if(l_isdigit(c) || c == '-') { | |
623 | if(lex_scan_number(lex, c, error)) | |
624 | goto out; | |
625 | } | |
626 | ||
627 | else if(l_isalpha(c)) { | |
628 | /* eat up the whole identifier for clearer error messages */ | |
629 | const char *saved_text; | |
630 | ||
631 | do | |
632 | c = lex_get_save(lex, error); | |
633 | while(l_isalpha(c)); | |
634 | lex_unget_unsave(lex, c); | |
635 | ||
636 | saved_text = strbuffer_value(&lex->saved_text); | |
637 | ||
638 | if(strcmp(saved_text, "true") == 0) | |
639 | lex->token = TOKEN_TRUE; | |
640 | else if(strcmp(saved_text, "false") == 0) | |
641 | lex->token = TOKEN_FALSE; | |
642 | else if(strcmp(saved_text, "null") == 0) | |
643 | lex->token = TOKEN_NULL; | |
644 | else | |
645 | lex->token = TOKEN_INVALID; | |
646 | } | |
647 | ||
648 | else { | |
649 | /* save the rest of the input UTF-8 sequence to get an error | |
650 | message of valid UTF-8 */ | |
651 | lex_save_cached(lex); | |
652 | lex->token = TOKEN_INVALID; | |
653 | } | |
654 | ||
655 | out: | |
656 | return lex->token; | |
657 | } | |
658 | ||
659 | static char *lex_steal_string(lex_t *lex, size_t *out_len) | |
660 | { | |
661 | char *result = NULL; | |
662 | if(lex->token == TOKEN_STRING) { | |
663 | result = lex->value.string.val; | |
664 | *out_len = lex->value.string.len; | |
665 | lex->value.string.val = NULL; | |
666 | lex->value.string.len = 0; | |
667 | } | |
668 | return result; | |
669 | } | |
670 | ||
671 | static int lex_init(lex_t *lex, get_func get, size_t flags, void *data) | |
672 | { | |
673 | stream_init(&lex->stream, get, data); | |
674 | if(strbuffer_init(&lex->saved_text)) | |
675 | return -1; | |
676 | ||
677 | lex->flags = flags; | |
678 | lex->token = TOKEN_INVALID; | |
679 | return 0; | |
680 | } | |
681 | ||
682 | static void lex_close(lex_t *lex) | |
683 | { | |
684 | if(lex->token == TOKEN_STRING) | |
685 | lex_free_string(lex); | |
686 | strbuffer_close(&lex->saved_text); | |
687 | } | |
688 | ||
689 | ||
690 | /*** parser ***/ | |
691 | ||
692 | static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error); | |
693 | ||
694 | static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error) | |
695 | { | |
696 | json_t *object = json_object(); | |
697 | if(!object) | |
698 | return NULL; | |
699 | ||
700 | lex_scan(lex, error); | |
701 | if(lex->token == '}') | |
702 | return object; | |
703 | ||
704 | while(1) { | |
705 | char *key; | |
706 | size_t len; | |
707 | json_t *value; | |
708 | ||
709 | if(lex->token != TOKEN_STRING) { | |
710 | error_set(error, lex, json_error_invalid_syntax, "string or '}' expected"); | |
711 | goto error; | |
712 | } | |
713 | ||
714 | key = lex_steal_string(lex, &len); | |
715 | if(!key) | |
716 | return NULL; | |
717 | if (memchr(key, '\0', len)) { | |
718 | jsonp_free(key); | |
719 | error_set(error, lex, json_error_null_byte_in_key, "NUL byte in object key not supported"); | |
720 | goto error; | |
721 | } | |
722 | ||
723 | if(flags & JSON_REJECT_DUPLICATES) { | |
724 | if(json_object_get(object, key)) { | |
725 | jsonp_free(key); | |
726 | error_set(error, lex, json_error_duplicate_key, "duplicate object key"); | |
727 | goto error; | |
728 | } | |
729 | } | |
730 | ||
731 | lex_scan(lex, error); | |
732 | if(lex->token != ':') { | |
733 | jsonp_free(key); | |
734 | error_set(error, lex, json_error_invalid_syntax, "':' expected"); | |
735 | goto error; | |
736 | } | |
737 | ||
738 | lex_scan(lex, error); | |
739 | value = parse_value(lex, flags, error); | |
740 | if(!value) { | |
741 | jsonp_free(key); | |
742 | goto error; | |
743 | } | |
744 | ||
745 | if(json_object_set_new_nocheck(object, key, value)) { | |
746 | jsonp_free(key); | |
747 | goto error; | |
748 | } | |
749 | ||
750 | jsonp_free(key); | |
751 | ||
752 | lex_scan(lex, error); | |
753 | if(lex->token != ',') | |
754 | break; | |
755 | ||
756 | lex_scan(lex, error); | |
757 | } | |
758 | ||
759 | if(lex->token != '}') { | |
760 | error_set(error, lex, json_error_invalid_syntax, "'}' expected"); | |
761 | goto error; | |
762 | } | |
763 | ||
764 | return object; | |
765 | ||
766 | error: | |
767 | json_decref(object); | |
768 | return NULL; | |
769 | } | |
770 | ||
771 | static json_t *parse_array(lex_t *lex, size_t flags, json_error_t *error) | |
772 | { | |
773 | json_t *array = json_array(); | |
774 | if(!array) | |
775 | return NULL; | |
776 | ||
777 | lex_scan(lex, error); | |
778 | if(lex->token == ']') | |
779 | return array; | |
780 | ||
781 | while(lex->token) { | |
782 | json_t *elem = parse_value(lex, flags, error); | |
783 | if(!elem) | |
784 | goto error; | |
785 | ||
786 | if(json_array_append_new(array, elem)) { | |
787 | goto error; | |
788 | } | |
789 | ||
790 | lex_scan(lex, error); | |
791 | if(lex->token != ',') | |
792 | break; | |
793 | ||
794 | lex_scan(lex, error); | |
795 | } | |
796 | ||
797 | if(lex->token != ']') { | |
798 | error_set(error, lex, json_error_invalid_syntax, "']' expected"); | |
799 | goto error; | |
800 | } | |
801 | ||
802 | return array; | |
803 | ||
804 | error: | |
805 | json_decref(array); | |
806 | return NULL; | |
807 | } | |
808 | ||
809 | static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error) | |
810 | { | |
811 | json_t *json; | |
812 | ||
813 | lex->depth++; | |
814 | if(lex->depth > JSON_PARSER_MAX_DEPTH) { | |
815 | error_set(error, lex, json_error_stack_overflow, "maximum parsing depth reached"); | |
816 | return NULL; | |
817 | } | |
818 | ||
819 | switch(lex->token) { | |
820 | case TOKEN_STRING: { | |
821 | const char *value = lex->value.string.val; | |
822 | size_t len = lex->value.string.len; | |
823 | ||
824 | if(!(flags & JSON_ALLOW_NUL)) { | |
825 | if(memchr(value, '\0', len)) { | |
826 | error_set(error, lex, json_error_null_character, "\\u0000 is not allowed without JSON_ALLOW_NUL"); | |
827 | return NULL; | |
828 | } | |
829 | } | |
830 | ||
831 | json = jsonp_stringn_nocheck_own(value, len); | |
832 | lex->value.string.val = NULL; | |
833 | lex->value.string.len = 0; | |
834 | break; | |
835 | } | |
836 | ||
837 | case TOKEN_INTEGER: { | |
838 | json = json_integer(lex->value.integer); | |
839 | break; | |
840 | } | |
841 | ||
842 | case TOKEN_REAL: { | |
843 | json = json_real(lex->value.real); | |
844 | break; | |
845 | } | |
846 | ||
847 | case TOKEN_TRUE: | |
848 | json = json_true(); | |
849 | break; | |
850 | ||
851 | case TOKEN_FALSE: | |
852 | json = json_false(); | |
853 | break; | |
854 | ||
855 | case TOKEN_NULL: | |
856 | json = json_null(); | |
857 | break; | |
858 | ||
859 | case '{': | |
860 | json = parse_object(lex, flags, error); | |
861 | break; | |
862 | ||
863 | case '[': | |
864 | json = parse_array(lex, flags, error); | |
865 | break; | |
866 | ||
867 | case TOKEN_INVALID: | |
868 | error_set(error, lex, json_error_invalid_syntax, "invalid token"); | |
869 | return NULL; | |
870 | ||
871 | default: | |
872 | error_set(error, lex, json_error_invalid_syntax, "unexpected token"); | |
873 | return NULL; | |
874 | } | |
875 | ||
876 | if(!json) | |
877 | return NULL; | |
878 | ||
879 | lex->depth--; | |
880 | return json; | |
881 | } | |
882 | ||
883 | static json_t *parse_json(lex_t *lex, size_t flags, json_error_t *error) | |
884 | { | |
885 | json_t *result; | |
886 | ||
887 | lex->depth = 0; | |
888 | ||
889 | lex_scan(lex, error); | |
890 | if(!(flags & JSON_DECODE_ANY)) { | |
891 | if(lex->token != '[' && lex->token != '{') { | |
892 | error_set(error, lex, json_error_invalid_syntax, "'[' or '{' expected"); | |
893 | return NULL; | |
894 | } | |
895 | } | |
896 | ||
897 | result = parse_value(lex, flags, error); | |
898 | if(!result) | |
899 | return NULL; | |
900 | ||
901 | if(!(flags & JSON_DISABLE_EOF_CHECK)) { | |
902 | lex_scan(lex, error); | |
903 | if(lex->token != TOKEN_EOF) { | |
904 | error_set(error, lex, json_error_end_of_input_expected, "end of file expected"); | |
905 | json_decref(result); | |
906 | return NULL; | |
907 | } | |
908 | } | |
909 | ||
910 | if(error) { | |
911 | /* Save the position even though there was no error */ | |
912 | error->position = (int)lex->stream.position; | |
913 | } | |
914 | ||
915 | return result; | |
916 | } | |
917 | ||
918 | typedef struct | |
919 | { | |
920 | const char *data; | |
921 | size_t pos; | |
922 | } string_data_t; | |
923 | ||
924 | static int string_get(void *data) | |
925 | { | |
926 | char c; | |
927 | string_data_t *stream = (string_data_t *)data; | |
928 | c = stream->data[stream->pos]; | |
929 | if(c == '\0') | |
930 | return EOF; | |
931 | else | |
932 | { | |
933 | stream->pos++; | |
934 | return (unsigned char)c; | |
935 | } | |
936 | } | |
937 | ||
938 | json_t *json_loads(const char *string, size_t flags, json_error_t *error) | |
939 | { | |
940 | lex_t lex; | |
941 | json_t *result; | |
942 | string_data_t stream_data; | |
943 | ||
944 | jsonp_error_init(error, "<string>"); | |
945 | ||
946 | if (string == NULL) { | |
947 | error_set(error, NULL, json_error_invalid_argument, "wrong arguments"); | |
948 | return NULL; | |
949 | } | |
950 | ||
951 | stream_data.data = string; | |
952 | stream_data.pos = 0; | |
953 | ||
954 | if(lex_init(&lex, string_get, flags, (void *)&stream_data)) | |
955 | return NULL; | |
956 | ||
957 | result = parse_json(&lex, flags, error); | |
958 | ||
959 | lex_close(&lex); | |
960 | return result; | |
961 | } | |
962 | ||
963 | typedef struct | |
964 | { | |
965 | const char *data; | |
966 | size_t len; | |
967 | size_t pos; | |
968 | } buffer_data_t; | |
969 | ||
970 | static int buffer_get(void *data) | |
971 | { | |
972 | char c; | |
973 | buffer_data_t *stream = data; | |
974 | if(stream->pos >= stream->len) | |
975 | return EOF; | |
976 | ||
977 | c = stream->data[stream->pos]; | |
978 | stream->pos++; | |
979 | return (unsigned char)c; | |
980 | } | |
981 | ||
982 | json_t *json_loadb(const char *buffer, size_t buflen, size_t flags, json_error_t *error) | |
983 | { | |
984 | lex_t lex; | |
985 | json_t *result; | |
986 | buffer_data_t stream_data; | |
987 | ||
988 | jsonp_error_init(error, "<buffer>"); | |
989 | ||
990 | if (buffer == NULL) { | |
991 | error_set(error, NULL, json_error_invalid_argument, "wrong arguments"); | |
992 | return NULL; | |
993 | } | |
994 | ||
995 | stream_data.data = buffer; | |
996 | stream_data.pos = 0; | |
997 | stream_data.len = buflen; | |
998 | ||
999 | if(lex_init(&lex, buffer_get, flags, (void *)&stream_data)) | |
1000 | return NULL; | |
1001 | ||
1002 | result = parse_json(&lex, flags, error); | |
1003 | ||
1004 | lex_close(&lex); | |
1005 | return result; | |
1006 | } | |
1007 | ||
1008 | json_t *json_loadf(FILE *input, size_t flags, json_error_t *error) | |
1009 | { | |
1010 | lex_t lex; | |
1011 | const char *source; | |
1012 | json_t *result; | |
1013 | ||
1014 | if(input == stdin) | |
1015 | source = "<stdin>"; | |
1016 | else | |
1017 | source = "<stream>"; | |
1018 | ||
1019 | jsonp_error_init(error, source); | |
1020 | ||
1021 | if (input == NULL) { | |
1022 | error_set(error, NULL, json_error_invalid_argument, "wrong arguments"); | |
1023 | return NULL; | |
1024 | } | |
1025 | ||
1026 | if(lex_init(&lex, (get_func)fgetc, flags, input)) | |
1027 | return NULL; | |
1028 | ||
1029 | result = parse_json(&lex, flags, error); | |
1030 | ||
1031 | lex_close(&lex); | |
1032 | return result; | |
1033 | } | |
1034 | ||
1035 | static int fd_get_func(int *fd) | |
1036 | { | |
1037 | #ifdef HAVE_UNISTD_H | |
1038 | uint8_t c; | |
1039 | if (read(*fd, &c, 1) == 1) | |
1040 | return c; | |
1041 | #endif | |
1042 | return EOF; | |
1043 | } | |
1044 | ||
1045 | json_t *json_loadfd(int input, size_t flags, json_error_t *error) | |
1046 | { | |
1047 | lex_t lex; | |
1048 | const char *source; | |
1049 | json_t *result; | |
1050 | ||
1051 | #ifdef HAVE_UNISTD_H | |
1052 | if(input == STDIN_FILENO) | |
1053 | source = "<stdin>"; | |
1054 | else | |
1055 | #endif | |
1056 | source = "<stream>"; | |
1057 | ||
1058 | jsonp_error_init(error, source); | |
1059 | ||
1060 | if (input < 0) { | |
1061 | error_set(error, NULL, json_error_invalid_argument, "wrong arguments"); | |
1062 | return NULL; | |
1063 | } | |
1064 | ||
1065 | if(lex_init(&lex, (get_func)fd_get_func, flags, &input)) | |
1066 | return NULL; | |
1067 | ||
1068 | result = parse_json(&lex, flags, error); | |
1069 | ||
1070 | lex_close(&lex); | |
1071 | return result; | |
1072 | } | |
1073 | ||
1074 | json_t *json_load_file(const char *path, size_t flags, json_error_t *error) | |
1075 | { | |
1076 | json_t *result; | |
1077 | FILE *fp; | |
1078 | ||
1079 | jsonp_error_init(error, path); | |
1080 | ||
1081 | if (path == NULL) { | |
1082 | error_set(error, NULL, json_error_invalid_argument, "wrong arguments"); | |
1083 | return NULL; | |
1084 | } | |
1085 | ||
1086 | fp = fopen(path, "rb"); | |
1087 | if(!fp) | |
1088 | { | |
1089 | error_set(error, NULL, json_error_cannot_open_file, "unable to open %s: %s", | |
1090 | path, strerror(errno)); | |
1091 | return NULL; | |
1092 | } | |
1093 | ||
1094 | result = json_loadf(fp, flags, error); | |
1095 | ||
1096 | fclose(fp); | |
1097 | return result; | |
1098 | } | |
1099 | ||
1100 | #define MAX_BUF_LEN 1024 | |
1101 | ||
1102 | typedef struct | |
1103 | { | |
1104 | char data[MAX_BUF_LEN]; | |
1105 | size_t len; | |
1106 | size_t pos; | |
1107 | json_load_callback_t callback; | |
1108 | void *arg; | |
1109 | } callback_data_t; | |
1110 | ||
1111 | static int callback_get(void *data) | |
1112 | { | |
1113 | char c; | |
1114 | callback_data_t *stream = data; | |
1115 | ||
1116 | if(stream->pos >= stream->len) { | |
1117 | stream->pos = 0; | |
1118 | stream->len = stream->callback(stream->data, MAX_BUF_LEN, stream->arg); | |
1119 | if(stream->len == 0 || stream->len == (size_t)-1) | |
1120 | return EOF; | |
1121 | } | |
1122 | ||
1123 | c = stream->data[stream->pos]; | |
1124 | stream->pos++; | |
1125 | return (unsigned char)c; | |
1126 | } | |
1127 | ||
1128 | json_t *json_load_callback(json_load_callback_t callback, void *arg, size_t flags, json_error_t *error) | |
1129 | { | |
1130 | lex_t lex; | |
1131 | json_t *result; | |
1132 | ||
1133 | callback_data_t stream_data; | |
1134 | ||
1135 | memset(&stream_data, 0, sizeof(stream_data)); | |
1136 | stream_data.callback = callback; | |
1137 | stream_data.arg = arg; | |
1138 | ||
1139 | jsonp_error_init(error, "<callback>"); | |
1140 | ||
1141 | if (callback == NULL) { | |
1142 | error_set(error, NULL, json_error_invalid_argument, "wrong arguments"); | |
1143 | return NULL; | |
1144 | } | |
1145 | ||
1146 | if(lex_init(&lex, (get_func)callback_get, flags, &stream_data)) | |
1147 | return NULL; | |
1148 | ||
1149 | result = parse_json(&lex, flags, error); | |
1150 | ||
1151 | lex_close(&lex); | |
1152 | return result; | |
1153 | } |