2 * Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
4 * Jansson is free software; you can redistribute it and/or modify
5 * it under the terms of the MIT license. See LICENSE for details.
12 #include "jansson_private.h"
25 #include "strbuffer.h"
28 #define STREAM_STATE_OK 0
29 #define STREAM_STATE_EOF -1
30 #define STREAM_STATE_ERROR -2
32 #define TOKEN_INVALID -1
34 #define TOKEN_STRING 256
35 #define TOKEN_INTEGER 257
36 #define TOKEN_REAL 258
37 #define TOKEN_TRUE 259
38 #define TOKEN_FALSE 260
39 #define TOKEN_NULL 261
41 /* Locale independent versions of isxxx() functions */
42 #define l_isupper(c) ('A' <= (c) && (c) <= 'Z')
43 #define l_islower(c) ('a' <= (c) && (c) <= 'z')
44 #define l_isalpha(c) (l_isupper(c) || l_islower(c))
45 #define l_isdigit(c) ('0' <= (c) && (c) <= '9')
46 #define l_isxdigit(c) \
47 (l_isdigit(c) || ('A' <= (c) && (c) <= 'F') || ('a' <= (c) && (c) <= 'f'))
49 /* Read one byte from stream, convert to unsigned char, then int, and
50 return. return EOF on end of file. This corresponds to the
51 behaviour of fgetc(). */
52 typedef int (*get_func
)(void *data
);
61 int column
, last_column
;
67 strbuffer_t saved_text
;
81 #define stream_to_lex(stream) container_of(stream, lex_t, stream)
84 /*** error reporting ***/
86 static void error_set(json_error_t
*error
, const lex_t
*lex
,
87 enum json_error_code code
,
91 char msg_text
[JSON_ERROR_TEXT_LENGTH
];
92 char msg_with_context
[JSON_ERROR_TEXT_LENGTH
];
94 int line
= -1, col
= -1;
96 const char *result
= msg_text
;
102 vsnprintf(msg_text
, JSON_ERROR_TEXT_LENGTH
, msg
, ap
);
103 msg_text
[JSON_ERROR_TEXT_LENGTH
- 1] = '\0';
108 const char *saved_text
= strbuffer_value(&lex
->saved_text
);
110 line
= lex
->stream
.line
;
111 col
= lex
->stream
.column
;
112 pos
= lex
->stream
.position
;
114 if(saved_text
&& saved_text
[0])
116 if(lex
->saved_text
.length
<= 20) {
117 snprintf(msg_with_context
, JSON_ERROR_TEXT_LENGTH
,
118 "%s near '%s'", msg_text
, saved_text
);
119 msg_with_context
[JSON_ERROR_TEXT_LENGTH
- 1] = '\0';
120 result
= msg_with_context
;
125 if(code
== json_error_invalid_syntax
) {
126 /* More specific error code for premature end of file. */
127 code
= json_error_premature_end_of_input
;
129 if(lex
->stream
.state
== STREAM_STATE_ERROR
) {
130 /* No context for UTF-8 decoding errors */
134 snprintf(msg_with_context
, JSON_ERROR_TEXT_LENGTH
,
135 "%s near end of file", msg_text
);
136 msg_with_context
[JSON_ERROR_TEXT_LENGTH
- 1] = '\0';
137 result
= msg_with_context
;
142 jsonp_error_set(error
, line
, col
, pos
, code
, "%s", result
);
146 /*** lexical analyzer ***/
149 stream_init(stream_t
*stream
, get_func get
, void *data
)
153 stream
->buffer
[0] = '\0';
154 stream
->buffer_pos
= 0;
156 stream
->state
= STREAM_STATE_OK
;
159 stream
->position
= 0;
162 static int stream_get(stream_t
*stream
, json_error_t
*error
)
166 if(stream
->state
!= STREAM_STATE_OK
)
167 return stream
->state
;
169 if(!stream
->buffer
[stream
->buffer_pos
])
171 c
= stream
->get(stream
->data
);
173 stream
->state
= STREAM_STATE_EOF
;
174 return STREAM_STATE_EOF
;
177 stream
->buffer
[0] = c
;
178 stream
->buffer_pos
= 0;
180 if(0x80 <= c
&& c
<= 0xFF)
182 /* multi-byte UTF-8 sequence */
185 count
= utf8_check_first(c
);
191 for(i
= 1; i
< count
; i
++)
192 stream
->buffer
[i
] = stream
->get(stream
->data
);
194 if(!utf8_check_full(stream
->buffer
, count
, NULL
))
197 stream
->buffer
[count
] = '\0';
200 stream
->buffer
[1] = '\0';
203 c
= stream
->buffer
[stream
->buffer_pos
++];
208 stream
->last_column
= stream
->column
;
211 else if(utf8_check_first(c
)) {
212 /* track the Unicode character column, so increment only if
213 this is the first character of a UTF-8 sequence */
220 stream
->state
= STREAM_STATE_ERROR
;
221 error_set(error
, stream_to_lex(stream
), json_error_invalid_utf8
, "unable to decode byte 0x%x", c
);
222 return STREAM_STATE_ERROR
;
225 static void stream_unget(stream_t
*stream
, int c
)
227 if(c
== STREAM_STATE_EOF
|| c
== STREAM_STATE_ERROR
)
233 stream
->column
= stream
->last_column
;
235 else if(utf8_check_first(c
))
238 assert(stream
->buffer_pos
> 0);
239 stream
->buffer_pos
--;
240 assert(stream
->buffer
[stream
->buffer_pos
] == c
);
244 static int lex_get(lex_t
*lex
, json_error_t
*error
)
246 return stream_get(&lex
->stream
, error
);
249 static void lex_save(lex_t
*lex
, int c
)
251 strbuffer_append_byte(&lex
->saved_text
, c
);
254 static int lex_get_save(lex_t
*lex
, json_error_t
*error
)
256 int c
= stream_get(&lex
->stream
, error
);
257 if(c
!= STREAM_STATE_EOF
&& c
!= STREAM_STATE_ERROR
)
262 static void lex_unget(lex_t
*lex
, int c
)
264 stream_unget(&lex
->stream
, c
);
267 static void lex_unget_unsave(lex_t
*lex
, int c
)
269 if(c
!= STREAM_STATE_EOF
&& c
!= STREAM_STATE_ERROR
) {
270 /* Since we treat warnings as errors, when assertions are turned
271 * off the "d" variable would be set but never used. Which is
272 * treated as an error by GCC.
277 stream_unget(&lex
->stream
, c
);
281 strbuffer_pop(&lex
->saved_text
);
286 static void lex_save_cached(lex_t
*lex
)
288 while(lex
->stream
.buffer
[lex
->stream
.buffer_pos
] != '\0')
290 lex_save(lex
, lex
->stream
.buffer
[lex
->stream
.buffer_pos
]);
291 lex
->stream
.buffer_pos
++;
292 lex
->stream
.position
++;
296 static void lex_free_string(lex_t
*lex
)
298 jsonp_free(lex
->value
.string
.val
);
299 lex
->value
.string
.val
= NULL
;
300 lex
->value
.string
.len
= 0;
303 /* assumes that str points to 'u' plus at least 4 valid hex digits */
304 static int32_t decode_unicode_escape(const char *str
)
309 assert(str
[0] == 'u');
311 for(i
= 1; i
<= 4; i
++) {
316 else if(l_islower(c
))
317 value
+= c
- 'a' + 10;
318 else if(l_isupper(c
))
319 value
+= c
- 'A' + 10;
327 static void lex_scan_string(lex_t
*lex
, json_error_t
*error
)
334 lex
->value
.string
.val
= NULL
;
335 lex
->token
= TOKEN_INVALID
;
337 c
= lex_get_save(lex
, error
);
340 if(c
== STREAM_STATE_ERROR
)
343 else if(c
== STREAM_STATE_EOF
) {
344 error_set(error
, lex
, json_error_premature_end_of_input
, "premature end of input");
348 else if(0 <= c
&& c
<= 0x1F) {
349 /* control character */
350 lex_unget_unsave(lex
, c
);
352 error_set(error
, lex
, json_error_invalid_syntax
, "unexpected newline");
354 error_set(error
, lex
, json_error_invalid_syntax
, "control character 0x%x", c
);
359 c
= lex_get_save(lex
, error
);
361 c
= lex_get_save(lex
, error
);
362 for(i
= 0; i
< 4; i
++) {
364 error_set(error
, lex
, json_error_invalid_syntax
, "invalid escape");
367 c
= lex_get_save(lex
, error
);
370 else if(c
== '"' || c
== '\\' || c
== '/' || c
== 'b' ||
371 c
== 'f' || c
== 'n' || c
== 'r' || c
== 't')
372 c
= lex_get_save(lex
, error
);
374 error_set(error
, lex
, json_error_invalid_syntax
, "invalid escape");
379 c
= lex_get_save(lex
, error
);
382 /* the actual value is at most of the same length as the source
384 - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte
385 - a single \uXXXX escape (length 6) is converted to at most 3 bytes
386 - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair
387 are converted to 4 bytes
389 t
= jsonp_malloc(lex
->saved_text
.length
+ 1);
391 /* this is not very nice, since TOKEN_INVALID is returned */
394 lex
->value
.string
.val
= t
;
396 /* + 1 to skip the " */
397 p
= strbuffer_value(&lex
->saved_text
) + 1;
406 value
= decode_unicode_escape(p
);
408 error_set(error
, lex
, json_error_invalid_syntax
, "invalid Unicode escape '%.6s'", p
- 1);
413 if(0xD800 <= value
&& value
<= 0xDBFF) {
415 if(*p
== '\\' && *(p
+ 1) == 'u') {
416 int32_t value2
= decode_unicode_escape(++p
);
418 error_set(error
, lex
, json_error_invalid_syntax
, "invalid Unicode escape '%.6s'", p
- 1);
423 if(0xDC00 <= value2
&& value2
<= 0xDFFF) {
424 /* valid second surrogate */
426 ((value
- 0xD800) << 10) +
431 /* invalid second surrogate */
432 error_set(error
, lex
,
433 json_error_invalid_syntax
,
434 "invalid Unicode '\\u%04X\\u%04X'",
440 /* no second surrogate */
441 error_set(error
, lex
, json_error_invalid_syntax
, "invalid Unicode '\\u%04X'",
446 else if(0xDC00 <= value
&& value
<= 0xDFFF) {
447 error_set(error
, lex
, json_error_invalid_syntax
, "invalid Unicode '\\u%04X'", value
);
451 if(utf8_encode(value
, t
, &length
))
457 case '"': case '\\': case '/':
459 case 'b': *t
= '\b'; break;
460 case 'f': *t
= '\f'; break;
461 case 'n': *t
= '\n'; break;
462 case 'r': *t
= '\r'; break;
463 case 't': *t
= '\t'; break;
474 lex
->value
.string
.len
= t
- lex
->value
.string
.val
;
475 lex
->token
= TOKEN_STRING
;
479 lex_free_string(lex
);
482 #ifndef JANSSON_USING_CMAKE /* disabled if using cmake */
483 #if JSON_INTEGER_IS_LONG_LONG
484 #ifdef _MSC_VER /* Microsoft Visual Studio */
485 #define json_strtoint _strtoi64
487 #define json_strtoint strtoll
490 #define json_strtoint strtol
494 static int lex_scan_number(lex_t
*lex
, int c
, json_error_t
*error
)
496 const char *saved_text
;
500 lex
->token
= TOKEN_INVALID
;
503 c
= lex_get_save(lex
, error
);
506 c
= lex_get_save(lex
, error
);
508 lex_unget_unsave(lex
, c
);
512 else if(l_isdigit(c
)) {
514 c
= lex_get_save(lex
, error
);
518 lex_unget_unsave(lex
, c
);
522 if(!(lex
->flags
& JSON_DECODE_INT_AS_REAL
) &&
523 c
!= '.' && c
!= 'E' && c
!= 'e')
527 lex_unget_unsave(lex
, c
);
529 saved_text
= strbuffer_value(&lex
->saved_text
);
532 intval
= json_strtoint(saved_text
, &end
, 10);
533 if(errno
== ERANGE
) {
535 error_set(error
, lex
, json_error_numeric_overflow
, "too big negative integer");
537 error_set(error
, lex
, json_error_numeric_overflow
, "too big integer");
541 assert(end
== saved_text
+ lex
->saved_text
.length
);
543 lex
->token
= TOKEN_INTEGER
;
544 lex
->value
.integer
= intval
;
549 c
= lex_get(lex
, error
);
557 c
= lex_get_save(lex
, error
);
561 if(c
== 'E' || c
== 'e') {
562 c
= lex_get_save(lex
, error
);
563 if(c
== '+' || c
== '-')
564 c
= lex_get_save(lex
, error
);
567 lex_unget_unsave(lex
, c
);
572 c
= lex_get_save(lex
, error
);
576 lex_unget_unsave(lex
, c
);
578 if(jsonp_strtod(&lex
->saved_text
, &doubleval
)) {
579 error_set(error
, lex
, json_error_numeric_overflow
, "real number overflow");
583 lex
->token
= TOKEN_REAL
;
584 lex
->value
.real
= doubleval
;
591 static int lex_scan(lex_t
*lex
, json_error_t
*error
)
595 strbuffer_clear(&lex
->saved_text
);
597 if(lex
->token
== TOKEN_STRING
)
598 lex_free_string(lex
);
601 c
= lex_get(lex
, error
);
602 while(c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r');
604 if(c
== STREAM_STATE_EOF
) {
605 lex
->token
= TOKEN_EOF
;
609 if(c
== STREAM_STATE_ERROR
) {
610 lex
->token
= TOKEN_INVALID
;
616 if(c
== '{' || c
== '}' || c
== '[' || c
== ']' || c
== ':' || c
== ',')
620 lex_scan_string(lex
, error
);
622 else if(l_isdigit(c
) || c
== '-') {
623 if(lex_scan_number(lex
, c
, error
))
627 else if(l_isalpha(c
)) {
628 /* eat up the whole identifier for clearer error messages */
629 const char *saved_text
;
632 c
= lex_get_save(lex
, error
);
634 lex_unget_unsave(lex
, c
);
636 saved_text
= strbuffer_value(&lex
->saved_text
);
638 if(strcmp(saved_text
, "true") == 0)
639 lex
->token
= TOKEN_TRUE
;
640 else if(strcmp(saved_text
, "false") == 0)
641 lex
->token
= TOKEN_FALSE
;
642 else if(strcmp(saved_text
, "null") == 0)
643 lex
->token
= TOKEN_NULL
;
645 lex
->token
= TOKEN_INVALID
;
649 /* save the rest of the input UTF-8 sequence to get an error
650 message of valid UTF-8 */
651 lex_save_cached(lex
);
652 lex
->token
= TOKEN_INVALID
;
659 static char *lex_steal_string(lex_t
*lex
, size_t *out_len
)
662 if(lex
->token
== TOKEN_STRING
) {
663 result
= lex
->value
.string
.val
;
664 *out_len
= lex
->value
.string
.len
;
665 lex
->value
.string
.val
= NULL
;
666 lex
->value
.string
.len
= 0;
671 static int lex_init(lex_t
*lex
, get_func get
, size_t flags
, void *data
)
673 stream_init(&lex
->stream
, get
, data
);
674 if(strbuffer_init(&lex
->saved_text
))
678 lex
->token
= TOKEN_INVALID
;
682 static void lex_close(lex_t
*lex
)
684 if(lex
->token
== TOKEN_STRING
)
685 lex_free_string(lex
);
686 strbuffer_close(&lex
->saved_text
);
692 static json_t
*parse_value(lex_t
*lex
, size_t flags
, json_error_t
*error
);
694 static json_t
*parse_object(lex_t
*lex
, size_t flags
, json_error_t
*error
)
696 json_t
*object
= json_object();
700 lex_scan(lex
, error
);
701 if(lex
->token
== '}')
709 if(lex
->token
!= TOKEN_STRING
) {
710 error_set(error
, lex
, json_error_invalid_syntax
, "string or '}' expected");
714 key
= lex_steal_string(lex
, &len
);
717 if (memchr(key
, '\0', len
)) {
719 error_set(error
, lex
, json_error_null_byte_in_key
, "NUL byte in object key not supported");
723 if(flags
& JSON_REJECT_DUPLICATES
) {
724 if(json_object_get(object
, key
)) {
726 error_set(error
, lex
, json_error_duplicate_key
, "duplicate object key");
731 lex_scan(lex
, error
);
732 if(lex
->token
!= ':') {
734 error_set(error
, lex
, json_error_invalid_syntax
, "':' expected");
738 lex_scan(lex
, error
);
739 value
= parse_value(lex
, flags
, error
);
745 if(json_object_set_new_nocheck(object
, key
, value
)) {
752 lex_scan(lex
, error
);
753 if(lex
->token
!= ',')
756 lex_scan(lex
, error
);
759 if(lex
->token
!= '}') {
760 error_set(error
, lex
, json_error_invalid_syntax
, "'}' expected");
771 static json_t
*parse_array(lex_t
*lex
, size_t flags
, json_error_t
*error
)
773 json_t
*array
= json_array();
777 lex_scan(lex
, error
);
778 if(lex
->token
== ']')
782 json_t
*elem
= parse_value(lex
, flags
, error
);
786 if(json_array_append_new(array
, elem
)) {
790 lex_scan(lex
, error
);
791 if(lex
->token
!= ',')
794 lex_scan(lex
, error
);
797 if(lex
->token
!= ']') {
798 error_set(error
, lex
, json_error_invalid_syntax
, "']' expected");
809 static json_t
*parse_value(lex_t
*lex
, size_t flags
, json_error_t
*error
)
814 if(lex
->depth
> JSON_PARSER_MAX_DEPTH
) {
815 error_set(error
, lex
, json_error_stack_overflow
, "maximum parsing depth reached");
821 const char *value
= lex
->value
.string
.val
;
822 size_t len
= lex
->value
.string
.len
;
824 if(!(flags
& JSON_ALLOW_NUL
)) {
825 if(memchr(value
, '\0', len
)) {
826 error_set(error
, lex
, json_error_null_character
, "\\u0000 is not allowed without JSON_ALLOW_NUL");
831 json
= jsonp_stringn_nocheck_own(value
, len
);
833 lex
->value
.string
.val
= NULL
;
834 lex
->value
.string
.len
= 0;
839 case TOKEN_INTEGER
: {
840 json
= json_integer(lex
->value
.integer
);
845 json
= json_real(lex
->value
.real
);
862 json
= parse_object(lex
, flags
, error
);
866 json
= parse_array(lex
, flags
, error
);
870 error_set(error
, lex
, json_error_invalid_syntax
, "invalid token");
874 error_set(error
, lex
, json_error_invalid_syntax
, "unexpected token");
885 static json_t
*parse_json(lex_t
*lex
, size_t flags
, json_error_t
*error
)
891 lex_scan(lex
, error
);
892 if(!(flags
& JSON_DECODE_ANY
)) {
893 if(lex
->token
!= '[' && lex
->token
!= '{') {
894 error_set(error
, lex
, json_error_invalid_syntax
, "'[' or '{' expected");
899 result
= parse_value(lex
, flags
, error
);
903 if(!(flags
& JSON_DISABLE_EOF_CHECK
)) {
904 lex_scan(lex
, error
);
905 if(lex
->token
!= TOKEN_EOF
) {
906 error_set(error
, lex
, json_error_end_of_input_expected
, "end of file expected");
913 /* Save the position even though there was no error */
914 error
->position
= (int)lex
->stream
.position
;
926 static int string_get(void *data
)
929 string_data_t
*stream
= (string_data_t
*)data
;
930 c
= stream
->data
[stream
->pos
];
936 return (unsigned char)c
;
940 json_t
*json_loads(const char *string
, size_t flags
, json_error_t
*error
)
944 string_data_t stream_data
;
946 jsonp_error_init(error
, "<string>");
948 if (string
== NULL
) {
949 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
953 stream_data
.data
= string
;
956 if(lex_init(&lex
, string_get
, flags
, (void *)&stream_data
))
959 result
= parse_json(&lex
, flags
, error
);
972 static int buffer_get(void *data
)
975 buffer_data_t
*stream
= data
;
976 if(stream
->pos
>= stream
->len
)
979 c
= stream
->data
[stream
->pos
];
981 return (unsigned char)c
;
984 json_t
*json_loadb(const char *buffer
, size_t buflen
, size_t flags
, json_error_t
*error
)
988 buffer_data_t stream_data
;
990 jsonp_error_init(error
, "<buffer>");
992 if (buffer
== NULL
) {
993 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
997 stream_data
.data
= buffer
;
999 stream_data
.len
= buflen
;
1001 if(lex_init(&lex
, buffer_get
, flags
, (void *)&stream_data
))
1004 result
= parse_json(&lex
, flags
, error
);
1010 json_t
*json_loadf(FILE *input
, size_t flags
, json_error_t
*error
)
1019 source
= "<stream>";
1021 jsonp_error_init(error
, source
);
1023 if (input
== NULL
) {
1024 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1028 if(lex_init(&lex
, (get_func
)fgetc
, flags
, input
))
1031 result
= parse_json(&lex
, flags
, error
);
1037 static int fd_get_func(int *fd
)
1040 #ifdef HAVE_UNISTD_H
1041 if (read(*fd
, &c
, 1) == 1)
1047 json_t
*json_loadfd(int input
, size_t flags
, json_error_t
*error
)
1053 #ifdef HAVE_UNISTD_H
1054 if(input
== STDIN_FILENO
)
1058 source
= "<stream>";
1060 jsonp_error_init(error
, source
);
1063 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1067 if(lex_init(&lex
, (get_func
)fd_get_func
, flags
, &input
))
1070 result
= parse_json(&lex
, flags
, error
);
1076 json_t
*json_load_file(const char *path
, size_t flags
, json_error_t
*error
)
1081 jsonp_error_init(error
, path
);
1084 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1088 fp
= fopen(path
, "rb");
1091 error_set(error
, NULL
, json_error_cannot_open_file
, "unable to open %s: %s",
1092 path
, strerror(errno
));
1096 result
= json_loadf(fp
, flags
, error
);
1102 #define MAX_BUF_LEN 1024
1106 char data
[MAX_BUF_LEN
];
1109 json_load_callback_t callback
;
1113 static int callback_get(void *data
)
1116 callback_data_t
*stream
= data
;
1118 if(stream
->pos
>= stream
->len
) {
1120 stream
->len
= stream
->callback(stream
->data
, MAX_BUF_LEN
, stream
->arg
);
1121 if(stream
->len
== 0 || stream
->len
== (size_t)-1)
1125 c
= stream
->data
[stream
->pos
];
1127 return (unsigned char)c
;
1130 json_t
*json_load_callback(json_load_callback_t callback
, void *arg
, size_t flags
, json_error_t
*error
)
1135 callback_data_t stream_data
;
1137 memset(&stream_data
, 0, sizeof(stream_data
));
1138 stream_data
.callback
= callback
;
1139 stream_data
.arg
= arg
;
1141 jsonp_error_init(error
, "<callback>");
1143 if (callback
== NULL
) {
1144 error_set(error
, NULL
, json_error_invalid_argument
, "wrong arguments");
1148 if(lex_init(&lex
, (get_func
)callback_get
, flags
, &stream_data
))
1151 result
= parse_json(&lex
, flags
, error
);