From b36ed3fe558d93f29f7accb925768049e562f93e Mon Sep 17 00:00:00 2001 From: Talion Date: Sun, 4 Aug 2024 07:58:44 -0400 Subject: [PATCH 1/7] add test cases --- .../zbf_yml_missing_chat | 109 ++++++++++++++++++ .../zbf_yml_missing_code | 13 +++ tests/core/test_salvage_correct_hunks.py | 8 ++ 3 files changed, 130 insertions(+) create mode 100644 tests/core/improve_function_test_cases/zbf_yml_missing_chat create mode 100644 tests/core/improve_function_test_cases/zbf_yml_missing_code diff --git a/tests/core/improve_function_test_cases/zbf_yml_missing_chat b/tests/core/improve_function_test_cases/zbf_yml_missing_chat new file mode 100644 index 0000000000..60b7eba9a6 --- /dev/null +++ b/tests/core/improve_function_test_cases/zbf_yml_missing_chat @@ -0,0 +1,109 @@ +To achieve the separation of environments (stage and local) and to use H2 DB for the local environment, we need to make the following changes: + +1. Create a new `application-local.yml` configuration file for the local environment. +2. Modify the existing `application.yml` to be used for the stage environment. +3. Update the `run.sh` script to handle both environments. + +Let's start with the changes: + +### 1. Create `application-local.yml` for the local environment + +```diff +--- /dev/null ++++ src/main/resources/application-local.yml +@@ -0,0 +1,13 @@ ++spring: ++ datasource: ++ url: jdbc:h2:mem:testdb ++ driver-class-name: org.h2.Driver ++ username: sa ++ password: password ++ h2: ++ console: ++ enabled: true ++ jpa: ++ hibernate: ++ ddl-auto: update ++ show-sql: true +``` + +### 2. Modify `application.yml` to be used for the stage environment + +```diff +--- src/main/resources/application.yml ++++ src/main/resources/application.yml +@@ -1,13 +1,13 @@ + spring: + datasource: +- url: jdbc:postgresql://:5432/ +- username: +- password: ++ url: jdbc:postgresql://stage-rds-endpoint:5432/stage_db ++ username: stage_user ++ password: stage_password + driver-class-name: org.postgresql.Driver + jpa: + hibernate: + ddl-auto: update + show-sql: true + properties: + hibernate: + format_sql: true +``` + + + +### 6. Update `application.yml` to include profile-specific configurations + +```diff +--- src/main/resources/application.yml ++++ src/main/resources/application.yml +@@ -1,13 +1,15 @@ + spring: + profiles: +- active: stage ++ active: stage + datasource: +- url: jdbc:postgresql://:5432/ +- username: +- password: ++ url: jdbc:postgresql://stage-rds-endpoint:5432/stage_db ++ username: stage_user ++ password: stage_password + driver-class-name: org.postgresql.Driver + jpa: + hibernate: + ddl-auto: update + show-sql: true + properties: + hibernate: + format_sql: true +``` + +### 7. Update `application-local.yml` to include profile-specific configurations + +```diff +--- src/main/resources/application-local.yml ++++ src/main/resources/application-local.yml +@@ -1,13 +1,15 @@ + spring: + profiles: +- active: local ++ active: local + datasource: +- url: jdbc:h2:mem:testdb +- driver-class-name: org.h2.Driver +- username: sa +- password: password ++ url: jdbc:h2:mem:testdb ++ driver-class-name: org.h2.Driver ++ username: sa ++ password: password + h2: + console: + enabled: true + jpa: + hibernate: + ddl-auto: update + show-sql: true +``` diff --git a/tests/core/improve_function_test_cases/zbf_yml_missing_code b/tests/core/improve_function_test_cases/zbf_yml_missing_code new file mode 100644 index 0000000000..577ae6c2e1 --- /dev/null +++ b/tests/core/improve_function_test_cases/zbf_yml_missing_code @@ -0,0 +1,13 @@ +spring: + datasource: + url: jdbc:postgresql://:5432/ + username: + password: + driver-class-name: org.postgresql.Driver + jpa: + hibernate: + ddl-auto: update + show-sql: true + properties: + hibernate: + format_sql: true diff --git a/tests/core/test_salvage_correct_hunks.py b/tests/core/test_salvage_correct_hunks.py index aeebd378c5..ed02a9f585 100644 --- a/tests/core/test_salvage_correct_hunks.py +++ b/tests/core/test_salvage_correct_hunks.py @@ -89,6 +89,14 @@ def test_theo_case(): print(updated_files["run.py"]) +def test_zbf_yml_missing(): + files = FilesDict({"application.yml": get_file_content("zbf_yml_missing_code")}) + updated_files, _ = salvage_correct_hunks( + message_builder("zbf_yml_missing_chat"), files, memory + ) + print(updated_files["application.yml"]) + + def test_clean_up_folder(clean_up_folder): # The folder should be deleted after the test is run assert True From 3867b01bc99d4adefdf45336f86c5292a57b94cb Mon Sep 17 00:00:00 2001 From: Talion Date: Sun, 4 Aug 2024 07:59:25 -0400 Subject: [PATCH 2/7] update preprompt to avoid sequential diffs on a single file --- gpt_engineer/preprompts/improve | 1 + 1 file changed, 1 insertion(+) diff --git a/gpt_engineer/preprompts/improve b/gpt_engineer/preprompts/improve index a810b42666..976fade688 100644 --- a/gpt_engineer/preprompts/improve +++ b/gpt_engineer/preprompts/improve @@ -10,6 +10,7 @@ Follow a language and framework appropriate best practice file naming convention Make sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other. Ensure to implement all code, if you are unsure, write a plausible implementation. Include module dependency or package manager dependency definition file. +Ensure to provide all changes in a single diff chunk per file to avoid multiple diffs on the same file. Before you finish, double check that all parts of the architecture is present in the files. When you are done, write finish with "this concludes a fully working implementation". From be4b75660ddafdaa7823409c5ad203e22e6f5715 Mon Sep 17 00:00:00 2001 From: Talion Date: Mon, 5 Aug 2024 08:52:50 -0400 Subject: [PATCH 3/7] functionality change: discard consecutive diffs and only keep the first --- gpt_engineer/core/chat_to_files.py | 15 ++++++++++----- tests/core/test_salvage_correct_hunks.py | 7 +++++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/gpt_engineer/core/chat_to_files.py b/gpt_engineer/core/chat_to_files.py index 7459bf3b38..86176c6cd5 100644 --- a/gpt_engineer/core/chat_to_files.py +++ b/gpt_engineer/core/chat_to_files.py @@ -26,8 +26,6 @@ from typing import Dict, Tuple -from regex import regex - from gpt_engineer.core.diff import ADD, REMOVE, RETAIN, Diff, Hunk from gpt_engineer.core.files_dict import FilesDict, file_to_lines_dict @@ -131,18 +129,25 @@ def parse_diffs(diff_string: str) -> dict: - dict: A dictionary of Diff objects keyed by filename. """ # Regex to match individual diff blocks - diff_block_pattern = regex.compile( + diff_block_pattern = re.compile( r"```.*?\n\s*?--- .*?\n\s*?\+\+\+ .*?\n(?:@@ .*? @@\n(?:[-+ ].*?\n)*?)*?```", re.DOTALL, ) diffs = {} try: - for block in diff_block_pattern.finditer(diff_string, timeout=1): + for block in diff_block_pattern.finditer(diff_string): diff_block = block.group() # Parse individual diff blocks and update the diffs dictionary - diffs.update(parse_diff_block(diff_block)) + diff = parse_diff_block(diff_block) + for filename, diff_obj in diff.items(): + if filename not in diffs: + diffs[filename] = diff_obj + else: + print( + f"\nMultiple diffs found for {filename}. Only the first one is kept." + ) except TimeoutError: print("gpt-engineer timed out while parsing git diff") diff --git a/tests/core/test_salvage_correct_hunks.py b/tests/core/test_salvage_correct_hunks.py index ed02a9f585..218ace422c 100644 --- a/tests/core/test_salvage_correct_hunks.py +++ b/tests/core/test_salvage_correct_hunks.py @@ -90,11 +90,14 @@ def test_theo_case(): def test_zbf_yml_missing(): - files = FilesDict({"application.yml": get_file_content("zbf_yml_missing_code")}) + files = FilesDict( + {"src/main/resources/application.yml": get_file_content("zbf_yml_missing_code")} + ) updated_files, _ = salvage_correct_hunks( message_builder("zbf_yml_missing_chat"), files, memory ) - print(updated_files["application.yml"]) + print(updated_files["src/main/resources/application.yml"]) + print(updated_files["src/main/resources/application-local.yml"]) def test_clean_up_folder(clean_up_folder): From a7b3ea8d01c757c7be2efb276cdafcff27c6c11f Mon Sep 17 00:00:00 2001 From: Talion Date: Mon, 5 Aug 2024 11:26:07 -0400 Subject: [PATCH 4/7] move the single diff logic to file_format_diff --- gpt_engineer/preprompts/file_format_diff | 4 +++- gpt_engineer/preprompts/improve | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/gpt_engineer/preprompts/file_format_diff b/gpt_engineer/preprompts/file_format_diff index bcf34e2921..486e76a2e9 100644 --- a/gpt_engineer/preprompts/file_format_diff +++ b/gpt_engineer/preprompts/file_format_diff @@ -30,10 +30,12 @@ Example of a git diff creating a new file: + +Last example line ``` + +RULES: -A program will apply the diffs you generate exactly to the code, so diffs must be precise and unambiguous! -Every diff must be fenced with triple backtick ```. -The file names at the beginning of a diff, (lines starting with --- and +++) is the relative path to the file before and after the diff. -LINES TO BE REMOVED (starting with single -) AND LINES TO BE RETAIN (no starting symbol) HAVE TO REPLICATE THE DIFFED HUNK OF THE CODE EXACTLY LINE BY LINE. KEEP THE NUMBER OF RETAIN LINES SMALL IF POSSIBLE. -EACH LINE IN THE SOURCE FILES STARTS WITH A LINE NUMBER, WHICH IS NOT PART OF THE SOURCE CODE. NEVER TRANSFER THESE LINE NUMBERS TO THE DIFF HUNKS. -AVOID STARTING A HUNK WITH AN EMPTY LINE. --ALL DIFFS WILL BE APPLIED SIMULTANEOUSLY, AVOID SEQUENTIAL HUNKS. +-ENSURE ALL CHANGES ARE PROVIDED IN A SINGLE DIFF CHUNK PER FILE TO PREVENT MULTIPLE DIFFS ON THE SAME FILE. diff --git a/gpt_engineer/preprompts/improve b/gpt_engineer/preprompts/improve index 976fade688..a810b42666 100644 --- a/gpt_engineer/preprompts/improve +++ b/gpt_engineer/preprompts/improve @@ -10,7 +10,6 @@ Follow a language and framework appropriate best practice file naming convention Make sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other. Ensure to implement all code, if you are unsure, write a plausible implementation. Include module dependency or package manager dependency definition file. -Ensure to provide all changes in a single diff chunk per file to avoid multiple diffs on the same file. Before you finish, double check that all parts of the architecture is present in the files. When you are done, write finish with "this concludes a fully working implementation". From 951404ebcca24bb8a1bc56e2c5da5c1c4216177a Mon Sep 17 00:00:00 2001 From: Talion Date: Mon, 5 Aug 2024 11:26:31 -0400 Subject: [PATCH 5/7] add test for the discarding consecutive diffs --- tests/core/test_chat_to_files.py | 56 ++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tests/core/test_chat_to_files.py b/tests/core/test_chat_to_files.py index 2762219e33..df5e867de1 100644 --- a/tests/core/test_chat_to_files.py +++ b/tests/core/test_chat_to_files.py @@ -147,6 +147,32 @@ class Calculator: Conclusion: *** """ +single_diff = """ +``` +--- a/file1.txt ++++ a/file1.txt +@@ -1,3 +1,3 @@ +-old line ++new line +``` +""" +multi_diff = """ +``` +--- a/file1.txt ++++ a/file1.txt +@@ -1,3 +1,3 @@ +-old line ++new line +``` +``` +--- a/file1.txt ++++ a/file1.txt +@@ -2,3 +2,3 @@ +-another old line ++another new line +``` +""" + # Single function tests def test_basic_similarity(): @@ -289,6 +315,36 @@ def parse_chats_with_regex( return diff_content, code_content, diffs +def capture_print_output(func): + import io + import sys + + captured_output = io.StringIO() + sys.stdout = captured_output + func() + sys.stdout = sys.__stdout__ + return captured_output + + +def test_single_diff(): + diffs = parse_diffs(single_diff) + correct_diff = "\n".join(single_diff.strip().split("\n")[1:-1]) + assert diffs["a/file1.txt"].diff_to_string() == correct_diff + + +def test_multi_diff_discard(): + captured_output = capture_print_output(lambda: parse_diffs(multi_diff)) + diffs = parse_diffs(multi_diff) + correct_diff = "\n".join(multi_diff.strip().split("\n")[1:8]).replace( + "```\n```", "" + ) + assert ( + "Multiple diffs found for a/file1.txt. Only the first one is kept." + in captured_output.getvalue() + ) + assert diffs["a/file1.txt"].diff_to_string().strip() == correct_diff.strip() + + # test parse diff def test_controller_diff(): parse_chats_with_regex("controller_chat", "controller_code") From 1401ba725b5035846a64ca318719ab89d9fac2b2 Mon Sep 17 00:00:00 2001 From: Talion Date: Mon, 5 Aug 2024 12:31:41 -0400 Subject: [PATCH 6/7] change back to regex from re --- gpt_engineer/core/chat_to_files.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gpt_engineer/core/chat_to_files.py b/gpt_engineer/core/chat_to_files.py index 86176c6cd5..0b4c28f6f9 100644 --- a/gpt_engineer/core/chat_to_files.py +++ b/gpt_engineer/core/chat_to_files.py @@ -26,6 +26,8 @@ from typing import Dict, Tuple +from regex import regex + from gpt_engineer.core.diff import ADD, REMOVE, RETAIN, Diff, Hunk from gpt_engineer.core.files_dict import FilesDict, file_to_lines_dict @@ -129,7 +131,7 @@ def parse_diffs(diff_string: str) -> dict: - dict: A dictionary of Diff objects keyed by filename. """ # Regex to match individual diff blocks - diff_block_pattern = re.compile( + diff_block_pattern = regex.compile( r"```.*?\n\s*?--- .*?\n\s*?\+\+\+ .*?\n(?:@@ .*? @@\n(?:[-+ ].*?\n)*?)*?```", re.DOTALL, ) From acad5fa2e78581bf8005b35d33b8c7fbe5bbe85b Mon Sep 17 00:00:00 2001 From: Talion Date: Mon, 5 Aug 2024 12:33:43 -0400 Subject: [PATCH 7/7] restore timeout to avoid conflicts --- gpt_engineer/core/chat_to_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpt_engineer/core/chat_to_files.py b/gpt_engineer/core/chat_to_files.py index 0b4c28f6f9..e9a5bc7443 100644 --- a/gpt_engineer/core/chat_to_files.py +++ b/gpt_engineer/core/chat_to_files.py @@ -138,7 +138,7 @@ def parse_diffs(diff_string: str) -> dict: diffs = {} try: - for block in diff_block_pattern.finditer(diff_string): + for block in diff_block_pattern.finditer(diff_string, timeout=1): diff_block = block.group() # Parse individual diff blocks and update the diffs dictionary