From 7fa07e7c746833b9b9cac62195314c17d1ec1e5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?T=C5=91berling=20J=C3=A1nos?=
 <jtoberling@localhost.localdomain>
Date: Sat, 27 Jul 2024 21:24:40 +0200
Subject: [PATCH] Diff regexp timeout as command line parameter

---
 gpt_engineer/applications/cli/cli_agent.py |  9 ++++++++-
 gpt_engineer/applications/cli/main.py      |  9 ++++++++-
 gpt_engineer/core/chat_to_files.py         |  4 ++--
 gpt_engineer/core/default/steps.py         | 23 ++++++++++++----------
 gpt_engineer/tools/custom_steps.py         |  3 ++-
 tests/applications/cli/test_main.py        | 15 ++++++++++++++
 6 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/gpt_engineer/applications/cli/cli_agent.py b/gpt_engineer/applications/cli/cli_agent.py
index 85933378f3..daae9addaf 100644
--- a/gpt_engineer/applications/cli/cli_agent.py
+++ b/gpt_engineer/applications/cli/cli_agent.py
@@ -109,6 +109,7 @@ def with_default_config(
         improve_fn: ImproveType = improve_fn,
         process_code_fn: CodeProcessor = execute_entrypoint,
         preprompts_holder: PrepromptsHolder = None,
+        diff_timeout=3,
     ):
         """
         Creates a new instance of CliAgent with default configurations for memory, execution environment,
@@ -186,6 +187,7 @@ def improve(
         files_dict: FilesDict,
         prompt: Prompt,
         execution_command: Optional[str] = None,
+        diff_timeout=3,
     ) -> FilesDict:
         """
         Improves an existing piece of code using the AI and step bundle based on the provided prompt.
@@ -206,7 +208,12 @@ def improve(
         """
 
         files_dict = self.improve_fn(
-            self.ai, prompt, files_dict, self.memory, self.preprompts_holder
+            self.ai,
+            prompt,
+            files_dict,
+            self.memory,
+            self.preprompts_holder,
+            diff_timeout=diff_timeout,
         )
         # entrypoint = gen_entrypoint(
         #     self.ai, prompt, files_dict, self.memory, self.preprompts_holder
diff --git a/gpt_engineer/applications/cli/main.py b/gpt_engineer/applications/cli/main.py
index c8ac7717da..5a0c4135b7 100644
--- a/gpt_engineer/applications/cli/main.py
+++ b/gpt_engineer/applications/cli/main.py
@@ -373,6 +373,11 @@ def main(
         "--sysinfo",
         help="Output system information for debugging",
     ),
+    diff_timeout: int = typer.Option(
+        3,
+        "--diff_timeout",
+        help="Diff regexp timeout. Default: 3. Increase if regexp search timeouts.",
+    ),
 ):
     """
     The main entry point for the CLI tool that generates or improves a project.
@@ -517,7 +522,9 @@ def main(
             if is_linting:
                 files_dict_before = files.linting(files_dict_before)
 
-            files_dict = handle_improve_mode(prompt, agent, memory, files_dict_before)
+            files_dict = handle_improve_mode(
+                prompt, agent, memory, files_dict_before, diff_timeout=diff_timeout
+            )
             if not files_dict or files_dict_before == files_dict:
                 print(
                     f"No changes applied. Could you please upload the debug_log_file.txt in {memory.path}/logs folder in a github issue?"
diff --git a/gpt_engineer/core/chat_to_files.py b/gpt_engineer/core/chat_to_files.py
index fb31c03a55..08b1397b7a 100644
--- a/gpt_engineer/core/chat_to_files.py
+++ b/gpt_engineer/core/chat_to_files.py
@@ -120,7 +120,7 @@ def apply_diffs(diffs: Dict[str, Diff], files: FilesDict) -> FilesDict:
     return files
 
 
-def parse_diffs(diff_string: str) -> dict:
+def parse_diffs(diff_string: str, diff_timeout=3) -> dict:
     """
     Parses a diff string in the unified git diff format.
 
@@ -138,7 +138,7 @@ def parse_diffs(diff_string: str) -> dict:
 
     diffs = {}
     try:
-        for block in diff_block_pattern.finditer(diff_string, timeout=10):
+        for block in diff_block_pattern.finditer(diff_string, timeout=diff_timeout):
             diff_block = block.group()
 
             # Parse individual diff blocks and update the diffs dictionary
diff --git a/gpt_engineer/core/default/steps.py b/gpt_engineer/core/default/steps.py
index d778948b65..6b46263424 100644
--- a/gpt_engineer/core/default/steps.py
+++ b/gpt_engineer/core/default/steps.py
@@ -274,6 +274,7 @@ def improve_fn(
     files_dict: FilesDict,
     memory: BaseMemory,
     preprompts_holder: PrepromptsHolder,
+    diff_timeout=3,
 ) -> FilesDict:
     """
     Improves the code based on user input and returns the updated files.
@@ -308,14 +309,16 @@ def improve_fn(
         DEBUG_LOG_FILE,
         "UPLOADED FILES:\n" + files_dict.to_log() + "\nPROMPT:\n" + prompt.text,
     )
-    return _improve_loop(ai, files_dict, memory, messages)
+    return _improve_loop(ai, files_dict, memory, messages, diff_timeout=diff_timeout)
 
 
 def _improve_loop(
-    ai: AI, files_dict: FilesDict, memory: BaseMemory, messages: List
+    ai: AI, files_dict: FilesDict, memory: BaseMemory, messages: List, diff_timeout=3
 ) -> FilesDict:
     messages = ai.next(messages, step_name=curr_fn())
-    files_dict, errors = salvage_correct_hunks(messages, files_dict, memory)
+    files_dict, errors = salvage_correct_hunks(
+        messages, files_dict, memory, diff_timeout=diff_timeout
+    )
 
     retries = 0
     while errors and retries < MAX_EDIT_REFINEMENT_STEPS:
@@ -327,21 +330,21 @@ def _improve_loop(
             )
         )
         messages = ai.next(messages, step_name=curr_fn())
-        files_dict, errors = salvage_correct_hunks(messages, files_dict, memory)
+        files_dict, errors = salvage_correct_hunks(
+            messages, files_dict, memory, diff_timeout
+        )
         retries += 1
 
     return files_dict
 
 
 def salvage_correct_hunks(
-    messages: List,
-    files_dict: FilesDict,
-    memory: BaseMemory,
+    messages: List, files_dict: FilesDict, memory: BaseMemory, diff_timeout=3
 ) -> tuple[FilesDict, List[str]]:
     error_messages = []
     ai_response = messages[-1].content.strip()
 
-    diffs = parse_diffs(ai_response)
+    diffs = parse_diffs(ai_response, diff_timeout=diff_timeout)
     # validate and correct diffs
 
     for _, diff in diffs.items():
@@ -370,13 +373,13 @@ def flush(self):
             file.flush()
 
 
-def handle_improve_mode(prompt, agent, memory, files_dict):
+def handle_improve_mode(prompt, agent, memory, files_dict, diff_timeout=3):
     captured_output = io.StringIO()
     old_stdout = sys.stdout
     sys.stdout = Tee(sys.stdout, captured_output)
 
     try:
-        files_dict = agent.improve(files_dict, prompt)
+        files_dict = agent.improve(files_dict, prompt, diff_timeout=diff_timeout)
     except Exception as e:
         print(
             f"Error while improving the project: {e}\nCould you please upload the debug_log_file.txt in {memory.path}/logs folder to github?\nFULL STACK TRACE:\n"
diff --git a/gpt_engineer/tools/custom_steps.py b/gpt_engineer/tools/custom_steps.py
index 827fbb6850..8e4f7cb930 100644
--- a/gpt_engineer/tools/custom_steps.py
+++ b/gpt_engineer/tools/custom_steps.py
@@ -44,6 +44,7 @@ def self_heal(
     prompt: Prompt = None,
     preprompts_holder: PrepromptsHolder = None,
     memory: BaseMemory = None,
+    diff_timeout=3,
 ) -> FilesDict:
     """
     Attempts to execute the code from the entrypoint and if it fails, sends the error output back to the AI with instructions to fix.
@@ -111,7 +112,7 @@ def self_heal(
                 f"A program with this specification was requested:\n{prompt}\n, but running it produced the following output:\n{stdout_full}\n and the following errors:\n{stderr_full}. Please change it so that it fulfills the requirements."
             )
             files_dict = improve_fn(
-                ai, new_prompt, files_dict, memory, preprompts_holder
+                ai, new_prompt, files_dict, memory, preprompts_holder, diff_timeout
             )
         else:
             break
diff --git a/tests/applications/cli/test_main.py b/tests/applications/cli/test_main.py
index ca9364872c..161a84052d 100644
--- a/tests/applications/cli/test_main.py
+++ b/tests/applications/cli/test_main.py
@@ -107,6 +107,21 @@ def test_improve_existing_project_skip_file_selection(self, tmp_path, monkeypatc
         args()
         assert args.skip_file_selection, "Skip_file_selection not set"
 
+    #  Runs gpt-engineer with improve mode and improves an existing project in the specified path, with skip_file_selection
+    def test_improve_existing_project_diff_timeout(self, tmp_path, monkeypatch):
+        p = tmp_path / "projects/example"
+        p.mkdir(parents=True)
+        (p / "prompt").write_text(prompt_text)
+        args = DefaultArgumentsMain(
+            str(p),
+            improve_mode=True,
+            llm_via_clipboard=True,
+            no_execution=True,
+            diff_timeout=99,
+        )
+        args()
+        assert args.diff_timeout == 99, "Diff timeout not set"
+
         # def improve_generator():
         #     yield "y"
         #     while True: