From 9ad3ebaf978a6305589ea6a06192b1790e7874b6 Mon Sep 17 00:00:00 2001 From: Lynn Date: Mon, 16 Oct 2023 00:43:05 -0700 Subject: [PATCH 1/5] WIP sql stuff --- .github/workflows/config.yml | 2 +- README.md | 10 ++-- data/sql_input_0.sql | 5 ++ data/sql_output_0.json | 3 ++ snippets/python/sql_test.py | 10 ++++ src/python/sql_test.py | 25 ++++++++++ tasks.py | 91 +++++++++++++++++------------------- 7 files changed, 94 insertions(+), 52 deletions(-) create mode 100644 data/sql_input_0.sql create mode 100644 data/sql_output_0.json create mode 100644 snippets/python/sql_test.py create mode 100644 src/python/sql_test.py diff --git a/.github/workflows/config.yml b/.github/workflows/config.yml index e20b43d..aa10317 100644 --- a/.github/workflows/config.yml +++ b/.github/workflows/config.yml @@ -28,4 +28,4 @@ jobs: uses: actions/checkout@v3 - run: pip install invoke pyyaml - - run: invoke test ${{ matrix.language }} any + - run: invoke test ${{ matrix.language }} any any diff --git a/README.md b/README.md index eaad6c0..2ccf097 100644 --- a/README.md +++ b/README.md @@ -18,10 +18,12 @@ pip install invoke pyyaml Then run any of the algos: ```bash -invoke test $language $script -invoke test python insertion_sort -invoke test python any # "any" is a wildcard keyword -invoke test rust selection_sort +invoke test $language $script $data_index +invoke test python insertion_sort any +invoke test python any any +invoke test rust selection_sort any +invoke test python sql any +invoke test python sql 0 ``` You will get output like so: diff --git a/data/sql_input_0.sql b/data/sql_input_0.sql new file mode 100644 index 0000000..79ce2da --- /dev/null +++ b/data/sql_input_0.sql @@ -0,0 +1,5 @@ +-- https://cratedb.com/docs/sql-99/en/latest/chapters/01.html +-- https://www.postgresql.org/docs/16/sql-createtable.html +-- https://www.postgresql.org/docs/16/sql-select.html +CREATE TABLE city (); +SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'; diff --git a/data/sql_output_0.json b/data/sql_output_0.json new file mode 100644 index 0000000..eaa5d04 --- /dev/null +++ b/data/sql_output_0.json @@ -0,0 +1,3 @@ +{ + "table_name": ["city"] +} diff --git a/snippets/python/sql_test.py b/snippets/python/sql_test.py new file mode 100644 index 0000000..5304b97 --- /dev/null +++ b/snippets/python/sql_test.py @@ -0,0 +1,10 @@ + +import json + + +def run_sql(input_sql: list[str]) -> list[str]: + output = [] + output = {"table_name": ["city"]} + + return [json.dumps(output)] + diff --git a/src/python/sql_test.py b/src/python/sql_test.py new file mode 100644 index 0000000..3ac0af7 --- /dev/null +++ b/src/python/sql_test.py @@ -0,0 +1,25 @@ +import helpers + + +# pylint: disable=wrong-import-order + + +######################## +# business logic start # +######################## + + +import json + + +def run_sql(input_sql: list[str]) -> list[str]: + output = {"table_name": ["city"]} + return [json.dumps(output)] + + +###################### +# business logic end # +###################### + +if __name__ == "__main__": + helpers.run(run_sql) diff --git a/tasks.py b/tasks.py index 4a16873..982d7e3 100644 --- a/tasks.py +++ b/tasks.py @@ -65,7 +65,7 @@ class TestRunnerContexts: snippet_start_text = "business logic start" snippet_end_text = "business logic end" - def __init__(self, language) -> None: + def __init__(self, language, input_data_index) -> None: # get the language specific config with open(f"{self.base_directory}/config.yml", "r", encoding="utf-8") as obj: data = obj.read() @@ -79,18 +79,24 @@ def __init__(self, language) -> None: script_type = script_path.split("/")[-1].split("_")[0] # ignore helpers, metadata files, etc - if config.get("ignoreFiles") and script_path.split("/")[-1] in config.get( - "ignoreFiles" - ): + if config.get("ignoreFiles") and script_path.split("/")[-1] in config.get("ignoreFiles"): continue # ignore directories, generally compiled code if not os.path.isfile(script_path): continue - for input_file_path in glob.glob( - f"{self.data_folder_path}/{script_type}_input_*.txt" - ): + for input_file_path in glob.glob(f"{self.data_folder_path}/{script_type}_input_*"): + # given "data/sort_input_1.txt" => return "1" + input_file_index = input_file_path.split("_")[-1].split(".")[0] + + # skip this input file if it's not the one we want to run + if inputs_are_truthy_and_different( + clean_string(input_file_index), + clean_string(input_data_index), + ): + continue + # generate a context for this particular script if ctx := self.generate(language, config, script_path, input_file_path): self.ctxs.append(ctx) @@ -114,17 +120,28 @@ def generate(self, language, config, script_path, input_file_path): # this path is used in various places later script_relative_path = f"./src/{language}/{script_name_with_file_type}" - # get the path of the file that's been prepared in advance - # and has the output we would be expecting from out script - prepared_file_path = input_file_path.replace("input", "output") + # given "./data/sql_input_1.txt" => return "data/sql_output_1" + partial_output_file_path = "." + input_file_path.replace("input", "output").split(".")[1] + + # get the actual output file path + potentional_output_file_paths = glob.glob(f"{partial_output_file_path}.*") + if len(potentional_output_file_paths) == 0: + raise Exception(f"could not find output file for input file {input_file_path}") + if len(potentional_output_file_paths) > 1: + raise Exception( + f"Found multiple output files for a single input file: {potentional_output_file_paths}. " + f"The input file was {input_file_path}." + ) + prepared_file_path = potentional_output_file_paths[0] # given "data/sort_input_1.txt" => return "1" prepared_file_index = prepared_file_path.split("_")[-1].split(".")[0] + # given "data/sql_output_0.json" => return "json" + prepared_file_type = prepared_file_path.split(".")[-1] + # our scripts write their output files to this path - script_output_file_name = ( - f"output_{language}_{script_name}_{prepared_file_index}.txt" - ) + script_output_file_name = f"output_{language}_{script_name}_{prepared_file_index}.{prepared_file_type}" script_output_file_path = f"{self.data_folder_path}/{script_output_file_name}" # script_invoker is command that we run in a subprocess to invoke our script @@ -194,13 +211,9 @@ def generate(self, language, config, script_path, input_file_path): ) snippet_end_line = idx - snippet_end_line_offset if snippet_start_line == 0: - raise Exception( - f'could not find the text "{self.snippet_start_text}" in {script_relative_path}' - ) + raise Exception(f'could not find the text "{self.snippet_start_text}" in {script_relative_path}') if snippet_end_line == 0: - raise Exception( - f'could not find the text "{self.snippet_end_text}" in {script_relative_path}' - ) + raise Exception(f'could not find the text "{self.snippet_end_text}" in {script_relative_path}') # return the fully constructed context return TestRunnerContext( @@ -226,9 +239,9 @@ class TestRunner: invoke: invoke.Context ctxs: TestRunnerContexts - def __init__(self, _invoke, language) -> None: + def __init__(self, _invoke, language, input_data_index) -> None: self.invoke = _invoke - self.ctxs = TestRunnerContexts(language) + self.ctxs = TestRunnerContexts(language, input_data_index) def run_tests(self, input_script): # run every test @@ -266,41 +279,29 @@ def run_tests(self, input_script): # check if the script invoke failed if output.exited != 0: self.set_success_status(False) - print( - f"\tšŸ”“ {ctx.script_relative_path} on {ctx.input_file_path} failed, reason:" - ) + print(f"\tšŸ”“ {ctx.script_relative_path} on {ctx.input_file_path} failed, reason:") print(f'\t\t the exit code "{output.exited}" was not 0') # check if the output file was created if not os.path.exists(ctx.script_output_file_path): self.set_success_status(False) - print( - f"\tšŸ”“ {ctx.script_relative_path} on {ctx.input_file_path} failed, reason:" - ) - print( - f"\t\t the output {ctx.script_output_file_name} file was not created" - ) + print(f"\tšŸ”“ {ctx.script_relative_path} on {ctx.input_file_path} failed, reason:") + print(f"\t\t the output {ctx.script_output_file_name} file was not created") # check if the output file matches the prepared file if os.path.exists(ctx.script_output_file_path) and filecmp.cmp( ctx.prepared_file_path, ctx.script_output_file_path ): self.set_success_status(True) - print( - f"\tšŸŸ¢ {ctx.script_relative_path} on {ctx.input_file_path} succeeded" - ) + print(f"\tšŸŸ¢ {ctx.script_relative_path} on {ctx.input_file_path} succeeded") # check if the output file does not match the prepared file if os.path.exists(ctx.script_output_file_path) and not filecmp.cmp( ctx.prepared_file_path, ctx.script_output_file_path ): self.set_success_status(False) - print( - f"\tšŸ”“ {ctx.script_relative_path} on {ctx.input_file_path} failed, reason:" - ) - print( - f"\t\t output file {ctx.script_output_file_name} has does not match the prepared file" - ) + print(f"\tšŸ”“ {ctx.script_relative_path} on {ctx.input_file_path} failed, reason:") + print(f"\t\t output file {ctx.script_output_file_name} has does not match the prepared file") # catch any errors, mark the test as failed, and continue except Exception as exc: @@ -325,9 +326,7 @@ def generate_snippets(self, input_script): "r", encoding="utf-8", ) as reader: - snippet = reader.readlines()[ - ctx.snippet_start_line : ctx.snippet_end_line - ] + snippet = reader.readlines()[ctx.snippet_start_line : ctx.snippet_end_line] # write the snippet with open( @@ -347,9 +346,7 @@ def generate_snippets(self, input_script): # Check if there are unsaved changes on the snippets. if output.exited != 0: self.set_success_status(False) - print( - f"šŸ”“ snippets/{ctx.language}/{ctx.script_name_with_file_type} has uncommitted changes" - ) + print(f"šŸ”“ snippets/{ctx.language}/{ctx.script_name_with_file_type} has uncommitted changes") # catch any errors, mark the test as failed, and continue except Exception as exc: @@ -383,10 +380,10 @@ def show_results(self): @invoke.task -def test(ctx: invoke.Context, language, input_script): +def test(ctx: invoke.Context, language, input_script, input_data_index): # language is the programming language to run scripts in # input_script is the name of a script you want to run - runner = TestRunner(ctx, language) + runner = TestRunner(ctx, language, input_data_index) runner.run_tests(input_script) runner.generate_snippets(input_script) runner.show_results() From 362defdc79476f3339b5d27ed63106f2468dc748 Mon Sep 17 00:00:00 2001 From: Lynn Date: Mon, 16 Oct 2023 00:43:38 -0700 Subject: [PATCH 2/5] syntax --- snippets/python/sql_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/snippets/python/sql_test.py b/snippets/python/sql_test.py index 5304b97..1a07517 100644 --- a/snippets/python/sql_test.py +++ b/snippets/python/sql_test.py @@ -3,8 +3,6 @@ def run_sql(input_sql: list[str]) -> list[str]: - output = [] output = {"table_name": ["city"]} - return [json.dumps(output)] From 8c51b41c603ddb695ba7bed68a893f21f7953dca Mon Sep 17 00:00:00 2001 From: Lynn Date: Mon, 16 Oct 2023 01:19:04 -0700 Subject: [PATCH 3/5] json comp --- tasks.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/tasks.py b/tasks.py index 982d7e3..db3215d 100644 --- a/tasks.py +++ b/tasks.py @@ -49,6 +49,7 @@ class TestRunnerContext: script_output_file_name: str input_file_path: str prepared_file_path: str + prepared_file_type: str snippet_start_line: int snippet_end_line: int @@ -228,6 +229,7 @@ def generate(self, language, config, script_path, input_file_path): script_output_file_name=script_output_file_name, input_file_path=input_file_path, prepared_file_path=prepared_file_path, + prepared_file_type=prepared_file_type, snippet_start_line=snippet_start_line, snippet_end_line=snippet_end_line, ) @@ -287,18 +289,28 @@ def run_tests(self, input_script): self.set_success_status(False) print(f"\tšŸ”“ {ctx.script_relative_path} on {ctx.input_file_path} failed, reason:") print(f"\t\t the output {ctx.script_output_file_name} file was not created") + continue + + # check if the output file matches the prepared file, when both files are json + if ctx.prepared_file_type == "json": + with open(ctx.prepared_file_path, "r", encoding="utf-8") as reader: + prepared_file_data = json.load(reader) + with open(ctx.script_output_file_path, "r", encoding="utf-8") as reader: + script_output_file_data = json.load(reader) + if prepared_file_data == script_output_file_data: + self.set_success_status(True) + print(f"\tšŸŸ¢ {ctx.script_relative_path} on {ctx.input_file_path} succeeded") + else: + self.set_success_status(False) + print(f"\tšŸ”“ {ctx.script_relative_path} on {ctx.input_file_path} failed, reason:") + print(f"\t\t output file {ctx.script_output_file_name} has does not match the prepared file") + continue # check if the output file matches the prepared file - if os.path.exists(ctx.script_output_file_path) and filecmp.cmp( - ctx.prepared_file_path, ctx.script_output_file_path - ): + if filecmp.cmp(ctx.prepared_file_path, ctx.script_output_file_path): self.set_success_status(True) print(f"\tšŸŸ¢ {ctx.script_relative_path} on {ctx.input_file_path} succeeded") - - # check if the output file does not match the prepared file - if os.path.exists(ctx.script_output_file_path) and not filecmp.cmp( - ctx.prepared_file_path, ctx.script_output_file_path - ): + else: self.set_success_status(False) print(f"\tšŸ”“ {ctx.script_relative_path} on {ctx.input_file_path} failed, reason:") print(f"\t\t output file {ctx.script_output_file_name} has does not match the prepared file") From e7747abc057ee6eebb6ccd32c6a6108f4ea3758c Mon Sep 17 00:00:00 2001 From: Lynn Date: Thu, 19 Oct 2023 19:34:08 -0700 Subject: [PATCH 4/5] thing thats kind sql is working now --- .github/workflows/config.yml | 2 +- data/sql_input_1.sql | 6 +++ data/sql_output_1.json | 3 ++ snippets/python/sql_test.py | 9 +++- src/python/sql_test.py | 82 ++++++++++++++++++++++++++++++++++-- tasks.py | 18 ++++---- 6 files changed, 105 insertions(+), 15 deletions(-) create mode 100644 data/sql_input_1.sql create mode 100644 data/sql_output_1.json diff --git a/.github/workflows/config.yml b/.github/workflows/config.yml index aa10317..887c634 100644 --- a/.github/workflows/config.yml +++ b/.github/workflows/config.yml @@ -28,4 +28,4 @@ jobs: uses: actions/checkout@v3 - run: pip install invoke pyyaml - - run: invoke test ${{ matrix.language }} any any + - run: invoke test ${{ matrix.language }} any any --snippets diff --git a/data/sql_input_1.sql b/data/sql_input_1.sql new file mode 100644 index 0000000..bb65967 --- /dev/null +++ b/data/sql_input_1.sql @@ -0,0 +1,6 @@ +-- https://cratedb.com/docs/sql-99/en/latest/chapters/01.html +-- https://www.postgresql.org/docs/16/sql-createtable.html +-- https://www.postgresql.org/docs/16/sql-select.html +CREATE TABLE city (); +CREATE TABLE town (); +SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'; diff --git a/data/sql_output_1.json b/data/sql_output_1.json new file mode 100644 index 0000000..86970c5 --- /dev/null +++ b/data/sql_output_1.json @@ -0,0 +1,3 @@ +{ + "table_name": ["city", "town"] +} diff --git a/snippets/python/sql_test.py b/snippets/python/sql_test.py index 1a07517..7bac216 100644 --- a/snippets/python/sql_test.py +++ b/snippets/python/sql_test.py @@ -3,6 +3,13 @@ def run_sql(input_sql: list[str]) -> list[str]: - output = {"table_name": ["city"]} + output = {} + lines = [] + + for line in input_sql: + if line.startswith("--"): + continue + lines.append(line) + return [json.dumps(output)] diff --git a/src/python/sql_test.py b/src/python/sql_test.py index 3ac0af7..7f41e13 100644 --- a/src/python/sql_test.py +++ b/src/python/sql_test.py @@ -12,9 +12,83 @@ import json -def run_sql(input_sql: list[str]) -> list[str]: - output = {"table_name": ["city"]} - return [json.dumps(output)] +class SQL: + data: dict = {} + + def __init__(self) -> None: + self.data = {} + + def information_schema_tables(self) -> list[dict]: + return [data["metadata"] for data in self.data.values()] + + def create_table(self, *args, table_schema="public") -> dict: + table_name = args[2] + if not self.data.get(table_name): + self.data[table_name] = { + "metadata": { + "table_name": table_name, + "table_schema": table_schema, + }, + } + return {} + + create_table.sql = "CREATE TABLE" + + def select(self, *args) -> dict: + output = {} + + from_index = None + where_index = None + for i, arg in enumerate(args): + if arg == "FROM": + from_index = i + if arg == "WHERE": + where_index = i + + # get select keys by getting the slice of args before FROM + select_keys = " ".join(args[1:from_index]).split(",") + + # get where keys by getting the slice of args after WHERE + from_value = args[from_index + 1] + + # consider "information_schema.tables" a special case until + # we figure out why its so different from the others + if from_value == "information_schema.tables": + target = self.information_schema_tables() + + # fmt: off + output = { + key: [ + value for data in target + for key, value in data.items() + if key in select_keys + ] + for key in select_keys + } + # fmt: on + + return output + + select.sql = "SELECT" + + sql_map = { + create_table.sql: create_table, + select.sql: select, + } + + def run(self, input_sql: list[str]) -> list[str]: + output = {} + + for line in input_sql: + if not line.startswith("--"): + words = line.split(" ") + for i in reversed(range(len(words))): + key = " ".join(words[:i]) + if func := self.sql_map.get(key): + output = func(self, *words) + break + + return [json.dumps(output)] ###################### @@ -22,4 +96,4 @@ def run_sql(input_sql: list[str]) -> list[str]: ###################### if __name__ == "__main__": - helpers.run(run_sql) + helpers.run(SQL().run) diff --git a/tasks.py b/tasks.py index db3215d..f104be7 100644 --- a/tasks.py +++ b/tasks.py @@ -1,4 +1,5 @@ # builtin packages +import unittest import filecmp import glob import os @@ -163,6 +164,8 @@ def generate(self, language, config, script_path, input_file_path): docker_run_test_list = [ "docker", "run", + "--rm", + f"--name={language}", f"--volume={self.base_directory}:/workdir", "-w=/workdir", ] @@ -297,13 +300,9 @@ def run_tests(self, input_script): prepared_file_data = json.load(reader) with open(ctx.script_output_file_path, "r", encoding="utf-8") as reader: script_output_file_data = json.load(reader) - if prepared_file_data == script_output_file_data: - self.set_success_status(True) - print(f"\tšŸŸ¢ {ctx.script_relative_path} on {ctx.input_file_path} succeeded") - else: - self.set_success_status(False) - print(f"\tšŸ”“ {ctx.script_relative_path} on {ctx.input_file_path} failed, reason:") - print(f"\t\t output file {ctx.script_output_file_name} has does not match the prepared file") + unittest.TestCase().assertDictEqual(prepared_file_data, script_output_file_data) + self.set_success_status(True) + print(f"\tšŸŸ¢ {ctx.script_relative_path} on {ctx.input_file_path} succeeded") continue # check if the output file matches the prepared file @@ -392,12 +391,13 @@ def show_results(self): @invoke.task -def test(ctx: invoke.Context, language, input_script, input_data_index): +def test(ctx: invoke.Context, language, input_script, input_data_index, snippets=False): # language is the programming language to run scripts in # input_script is the name of a script you want to run runner = TestRunner(ctx, language, input_data_index) runner.run_tests(input_script) - runner.generate_snippets(input_script) + if snippets: + runner.generate_snippets(input_script) runner.show_results() From ee9db61e41de3cc1d357bb314364540770e4d1cc Mon Sep 17 00:00:00 2001 From: Lynn Date: Thu, 19 Oct 2023 19:36:36 -0700 Subject: [PATCH 5/5] snips --- snippets/python/sql_test.py | 83 +++++++++++++++++++++++++++++++++---- 1 file changed, 75 insertions(+), 8 deletions(-) diff --git a/snippets/python/sql_test.py b/snippets/python/sql_test.py index 7bac216..b9d290e 100644 --- a/snippets/python/sql_test.py +++ b/snippets/python/sql_test.py @@ -2,14 +2,81 @@ import json -def run_sql(input_sql: list[str]) -> list[str]: - output = {} - lines = [] +class SQL: + data: dict = {} - for line in input_sql: - if line.startswith("--"): - continue - lines.append(line) + def __init__(self) -> None: + self.data = {} - return [json.dumps(output)] + def information_schema_tables(self) -> list[dict]: + return [data["metadata"] for data in self.data.values()] + + def create_table(self, *args, table_schema="public") -> dict: + table_name = args[2] + if not self.data.get(table_name): + self.data[table_name] = { + "metadata": { + "table_name": table_name, + "table_schema": table_schema, + }, + } + return {} + + create_table.sql = "CREATE TABLE" + + def select(self, *args) -> dict: + output = {} + + from_index = None + where_index = None + for i, arg in enumerate(args): + if arg == "FROM": + from_index = i + if arg == "WHERE": + where_index = i + + # get select keys by getting the slice of args before FROM + select_keys = " ".join(args[1:from_index]).split(",") + + # get where keys by getting the slice of args after WHERE + from_value = args[from_index + 1] + + # consider "information_schema.tables" a special case until + # we figure out why its so different from the others + if from_value == "information_schema.tables": + target = self.information_schema_tables() + + # fmt: off + output = { + key: [ + value for data in target + for key, value in data.items() + if key in select_keys + ] + for key in select_keys + } + # fmt: on + + return output + + select.sql = "SELECT" + + sql_map = { + create_table.sql: create_table, + select.sql: select, + } + + def run(self, input_sql: list[str]) -> list[str]: + output = {} + + for line in input_sql: + if not line.startswith("--"): + words = line.split(" ") + for i in reversed(range(len(words))): + key = " ".join(words[:i]) + if func := self.sql_map.get(key): + output = func(self, *words) + break + + return [json.dumps(output)]