From a62f0d4ae50089905072439610d93d0190887e41 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 25 Nov 2024 14:24:33 +0100 Subject: [PATCH] Add a notion of maximum depth to the parser state and `jq_state`. It's not sufficient to just add the state to the parser, since we might call `jv_load_file` from a variety of other functions. A few helper functions defer to `DEFAULT_MAX_PARSING_DEPTH`, as they are given neither a `jq_state` nor `jv_parser`. Added tests in `tests/shtest` to confirm that the maximum depth limit is appropriately enforced. --- docs/content/manual/dev/manual.yml | 7 +++++++ jq.1.prebuilt | 6 ++++++ src/execute.c | 13 +++++++++++++ src/jq.h | 3 +++ src/jq_test.c | 2 +- src/jv.h | 7 +++++-- src/jv_file.c | 4 ++-- src/jv_parse.c | 24 +++++++++++------------- src/linker.c | 6 +++--- src/main.c | 22 +++++++++++++++++++--- tests/shtest | 24 ++++++++++++++++++++++++ 11 files changed, 94 insertions(+), 24 deletions(-) diff --git a/docs/content/manual/dev/manual.yml b/docs/content/manual/dev/manual.yml index 895deab9e5..4f9eddf288 100644 --- a/docs/content/manual/dev/manual.yml +++ b/docs/content/manual/dev/manual.yml @@ -266,6 +266,13 @@ sections: available in the program and has a string whose contents are to the texts in the file named `bar`. + * `--depth n`: + + This option sets the maximum parsing depth (of objects and arrays) to + `n`. Exceeding the parsing depth causes `jq` to exit early with an error. + If you set `n` to 0, the parser will go arbitrarily deep. The default + value is 256. + * `--args`: Remaining arguments are positional string arguments. These are diff --git a/jq.1.prebuilt b/jq.1.prebuilt index 1a2c3cbc82..73019231a6 100644 --- a/jq.1.prebuilt +++ b/jq.1.prebuilt @@ -202,6 +202,12 @@ This option reads all the JSON texts in the named file and binds an array of the This option reads in the named file and binds its contents to the given global variable\. If you run jq with \fB\-\-rawfile foo bar\fR, then \fB$foo\fR is available in the program and has a string whose contents are to the texts in the file named \fBbar\fR\. . .TP +\fB\-\-depth n\fR: +. +.IP +This option sets the maximum parsing depth (of objects and arrays) to \fBn\fR\. Exceeding the parsing depth causes \fBjq\fR to exit early with an error\. If you set \fBn\fR to 0, the parser will go arbitrarily deep\. The default value is 256\. +. +.TP \fB\-\-args\fR: . .IP diff --git a/src/execute.c b/src/execute.c index cf255e49ec..cd897fce99 100644 --- a/src/execute.c +++ b/src/execute.c @@ -38,6 +38,7 @@ struct jq_state { int subexp_nest; int debug_trace_enabled; int initial_execution; + int parser_maxdepth; unsigned next_label; int halted; @@ -1072,6 +1073,8 @@ jq_state *jq_init(void) { jq->exit_code = jv_invalid(); jq->error_message = jv_invalid(); + jq->parser_maxdepth = DEFAULT_MAX_PARSING_DEPTH; + jq->input_cb = NULL; jq->input_cb_data = NULL; @@ -1347,3 +1350,13 @@ jv jq_get_error_message(jq_state *jq) { return jv_copy(jq->error_message); } + +int jq_get_parser_maxdepth(jq_state *jq) +{ + return jq->parser_maxdepth; +} + +void jq_set_parser_maxdepth(jq_state *jq, int parser_maxdepth) +{ + jq->parser_maxdepth = parser_maxdepth; +} diff --git a/src/jq.h b/src/jq.h index 8e9a7b8cf8..7a10303807 100644 --- a/src/jq.h +++ b/src/jq.h @@ -35,6 +35,9 @@ int jq_halted(jq_state *); jv jq_get_exit_code(jq_state *); jv jq_get_error_message(jq_state *); +int jq_get_parser_maxdepth(jq_state *); +void jq_set_parser_maxdepth(jq_state *, int); + typedef jv (*jq_input_cb)(jq_state *, void *); void jq_set_input_cb(jq_state *, jq_input_cb, void *); void jq_get_input_cb(jq_state *, jq_input_cb *, void **); diff --git a/src/jq_test.c b/src/jq_test.c index dd6920a420..46deb02c51 100644 --- a/src/jq_test.c +++ b/src/jq_test.c @@ -354,7 +354,7 @@ static void *test_pthread_run(void *ptr) { return NULL; } - struct jv_parser *parser = jv_parser_new(0); + struct jv_parser *parser = jv_parser_new(0, jq_get_parser_maxdepth(jq)); jv_parser_set_buf(parser, buf, strlen(buf), 0); rv = test_pthread_jq_parse(jq, parser); diff --git a/src/jv.h b/src/jv.h index ef6f70121e..cd2ebfb99e 100644 --- a/src/jv.h +++ b/src/jv.h @@ -246,10 +246,13 @@ jv jv_parse_custom_flags(const char* string, int flags); typedef void (*jv_nomem_handler_f)(void *); void jv_nomem_handler(jv_nomem_handler_f, void *); -jv jv_load_file(const char *, int); +jv jv_load_file(const char *, int, int); +#ifndef DEFAULT_MAX_PARSING_DEPTH +#define DEFAULT_MAX_PARSING_DEPTH (256) +#endif typedef struct jv_parser jv_parser; -jv_parser* jv_parser_new(int); +jv_parser* jv_parser_new(int, int); void jv_parser_set_buf(jv_parser*, const char*, int, int); int jv_parser_remaining(jv_parser*); jv jv_parser_next(jv_parser*); diff --git a/src/jv_file.c b/src/jv_file.c index b10bcc0b5c..baceda03f1 100644 --- a/src/jv_file.c +++ b/src/jv_file.c @@ -9,7 +9,7 @@ #include "jv.h" #include "jv_unicode.h" -jv jv_load_file(const char* filename, int raw) { +jv jv_load_file(const char* filename, int raw, int maxdepth) { struct stat sb; int fd = open(filename, O_RDONLY); if (fd == -1) { @@ -36,7 +36,7 @@ jv jv_load_file(const char* filename, int raw) { data = jv_string(""); } else { data = jv_array(); - parser = jv_parser_new(0); + parser = jv_parser_new(0, maxdepth); } // To avoid mangling UTF-8 multi-byte sequences that cross the end of our read diff --git a/src/jv_parse.c b/src/jv_parse.c index 519c2047f2..04eee109e5 100644 --- a/src/jv_parse.c +++ b/src/jv_parse.c @@ -10,10 +10,6 @@ typedef const char* presult; -#ifndef MAX_PARSING_DEPTH -#define MAX_PARSING_DEPTH (256) -#endif - #define TRY(x) do {presult msg__ = (x); if (msg__) return msg__; } while(0) #ifdef __GNUC__ #define pfunc __attribute__((warn_unused_result)) presult @@ -39,6 +35,7 @@ struct jv_parser { unsigned bom_strip_position; int flags; + int maxdepth; jv* stack; // parser int stackpos; // parser @@ -66,7 +63,7 @@ struct jv_parser { }; -static void parser_init(struct jv_parser* p, int flags) { +static void parser_init(struct jv_parser* p, int flags, int maxdepth) { p->flags = flags; if ((p->flags & JV_PARSE_STREAMING)) { p->path = jv_array(); @@ -74,6 +71,7 @@ static void parser_init(struct jv_parser* p, int flags) { p->path = jv_invalid(); p->flags &= ~(JV_PARSE_STREAM_ERRORS); } + p->maxdepth = maxdepth; p->stack = 0; p->stacklen = p->stackpos = 0; p->last_seen = JV_LAST_NONE; @@ -156,13 +154,13 @@ static void push(struct jv_parser* p, jv v) { static pfunc parse_token(struct jv_parser* p, char ch) { switch (ch) { case '[': - if (p->stackpos >= MAX_PARSING_DEPTH) return "Exceeds depth limit for parsing"; + if (p->maxdepth > 0 && p->stackpos >= p->maxdepth) return "Exceeds depth limit for parsing (set with --depth)"; if (jv_is_valid(p->next)) return "Expected separator between values"; push(p, jv_array()); break; case '{': - if (p->stackpos >= MAX_PARSING_DEPTH) return "Exceeds depth limit for parsing"; + if (p->maxdepth > 0 && p->stackpos >= p->maxdepth) return "Exceeds depth limit for parsing (set with --depth)"; if (jv_is_valid(p->next)) return "Expected separator between values"; push(p, jv_object()); break; @@ -707,9 +705,9 @@ static pfunc scan(struct jv_parser* p, char ch, jv* out) { return answer; } -struct jv_parser* jv_parser_new(int flags) { +struct jv_parser* jv_parser_new(int flags, int maxdepth) { struct jv_parser* p = jv_mem_alloc(sizeof(struct jv_parser)); - parser_init(p, flags); + parser_init(p, flags, maxdepth); p->flags = flags; return p; } @@ -861,9 +859,9 @@ jv jv_parser_next(struct jv_parser* p) { } } -jv jv_parse_sized_custom_flags(const char* string, int length, int flags) { +jv jv_parse_sized_custom_flags(const char* string, int length, int flags, int maxdepth) { struct jv_parser parser; - parser_init(&parser, flags); + parser_init(&parser, flags, maxdepth); jv_parser_set_buf(&parser, string, length, 0); jv value = jv_parser_next(&parser); if (jv_is_valid(value)) { @@ -901,7 +899,7 @@ jv jv_parse_sized_custom_flags(const char* string, int length, int flags) { } jv jv_parse_sized(const char* string, int length) { - return jv_parse_sized_custom_flags(string, length, 0); + return jv_parse_sized_custom_flags(string, length, 0, DEFAULT_MAX_PARSING_DEPTH); } jv jv_parse(const char* string) { @@ -909,5 +907,5 @@ jv jv_parse(const char* string) { } jv jv_parse_custom_flags(const char* string, int flags) { - return jv_parse_sized_custom_flags(string, strlen(string), flags); + return jv_parse_sized_custom_flags(string, strlen(string), flags, DEFAULT_MAX_PARSING_DEPTH); } diff --git a/src/linker.c b/src/linker.c index a4006b22b8..54ef4634eb 100644 --- a/src/linker.c +++ b/src/linker.c @@ -334,9 +334,9 @@ static int load_library(jq_state *jq, jv lib_path, int is_data, int raw, int opt block program; jv data; if (is_data && !raw) - data = jv_load_file(jv_string_value(lib_path), 0); + data = jv_load_file(jv_string_value(lib_path), 0, jq_get_parser_maxdepth(jq)); else - data = jv_load_file(jv_string_value(lib_path), 1); + data = jv_load_file(jv_string_value(lib_path), 1, jq_get_parser_maxdepth(jq)); int state_idx; if (!jv_is_valid(data)) { program = gen_noop(); @@ -386,7 +386,7 @@ jv load_module_meta(jq_state *jq, jv mod_relpath) { if (!jv_is_valid(lib_path)) return lib_path; jv meta = jv_null(); - jv data = jv_load_file(jv_string_value(lib_path), 1); + jv data = jv_load_file(jv_string_value(lib_path), 1, jq_get_parser_maxdepth(jq)); if (jv_is_valid(data)) { block program; struct locfile* src = locfile_init(jq, jv_string_value(lib_path), jv_string_value(data), jv_string_length_bytes(jv_copy(data))); diff --git a/src/main.c b/src/main.c index 2f25105292..d4eac77753 100644 --- a/src/main.c +++ b/src/main.c @@ -101,6 +101,7 @@ static void usage(int code, int keep_it_short) { " --slurpfile name file set $name to an array of JSON values read\n" " from the file;\n" " --rawfile name file set $name to string contents of file;\n" + " --depth n set parser maxdepth to n (0 for unbounded);\n" " --args consume remaining arguments as positional\n" " string values;\n" " --jsonargs consume remaining arguments as positional\n" @@ -296,6 +297,7 @@ int main(int argc, char* argv[]) { int ret = JQ_OK_NO_OUTPUT; int compiled = 0; int parser_flags = 0; + int parser_maxdepth = DEFAULT_MAX_PARSING_DEPTH; int nfiles = 0; int last_result = -1; /* -1 = no result, 0=null or false, 1=true */ int badwrite; @@ -441,6 +443,18 @@ int main(int argc, char* argv[]) { i++; } else if (isoption(&text, 0, "seq", is_short)) { options |= SEQ; + } else if (isoption(&text, 0, "depth", is_short)) { + if (i >= argc - 1) { + fprintf(stderr, "%s: --depth takes one parameter\n", progname); + die(); + } + int depth = atoi(argv[i+1]); + if (depth < 0) { + fprintf(stderr, "%s: --depth takes a non-negative number\n", progname); + die(); + } + parser_maxdepth = depth; + i++; } else if (isoption(&text, 0, "stream", is_short)) { parser_flags |= JV_PARSE_STREAMING; } else if (isoption(&text, 0, "stream-errors", is_short)) { @@ -483,7 +497,7 @@ int main(int argc, char* argv[]) { die(); } if (!jv_object_has(jv_copy(program_arguments), jv_string(argv[i+1]))) { - jv data = jv_load_file(argv[i+2], raw); + jv data = jv_load_file(argv[i+2], raw, parser_maxdepth); if (!jv_is_valid(data)) { data = jv_invalid_get_msg(data); fprintf(stderr, "%s: Bad JSON in --%s %s %s: %s\n", progname, which, @@ -591,6 +605,8 @@ int main(int argc, char* argv[]) { if (!program) usage(2, 1); + jq_set_parser_maxdepth(jq, parser_maxdepth); + if (options & FROM_FILE) { char *program_origin = strdup(program); if (program_origin == NULL) { @@ -598,7 +614,7 @@ int main(int argc, char* argv[]) { exit(2); } - jv data = jv_load_file(program, 1); + jv data = jv_load_file(program, 1, parser_maxdepth); if (!jv_is_valid(data)) { data = jv_invalid_get_msg(data); fprintf(stderr, "%s: %s\n", progname, jv_string_value(data)); @@ -644,7 +660,7 @@ int main(int argc, char* argv[]) { if ((options & RAW_INPUT)) jq_util_input_set_parser(input_state, NULL, (options & SLURP) ? 1 : 0); else - jq_util_input_set_parser(input_state, jv_parser_new(parser_flags), (options & SLURP) ? 1 : 0); + jq_util_input_set_parser(input_state, jv_parser_new(parser_flags, parser_maxdepth), (options & SLURP) ? 1 : 0); // Let jq program read from inputs jq_set_input_cb(jq, jq_util_input_next_input_cb, input_state); diff --git a/tests/shtest b/tests/shtest index 507c44395f..0c94e687c3 100755 --- a/tests/shtest +++ b/tests/shtest @@ -725,4 +725,28 @@ $VALGRIND $Q $JQ . <<\NUM -10E-1000000001 NUM +# --depth flag +echo '[["doubly nested"]]' >$d/depth2.json + +s='"inner"' +i=0 +while [ "$i" -lt 257 ]; do + s="[$s]" + i=$((i + 1)) +done +echo "$s" >$d/depth257.json + +$JQ -- '.' $d/depth2.json # should work +if $JQ --depth 1 -- '.' $d/depth2.json; then + echo "setting --depth 1 did not crash when given deep input" + exit 1 +fi + +if $JQ -- '.' $d/depth257.json; then + echo "default depth of 256 did not crash when given deep input" + exit 1 +fi +$JQ --depth 257 -- '.' $d/depth257.json # should work +$JQ --depth 0 -- '.' $d/depth257.json # should work + exit 0