Skip to content

Commit

Permalink
Merge pull request #813 from TheoMcCabe/task/increase-chat-to-files-c…
Browse files Browse the repository at this point in the history
…overage

Increase chat to files coverage
  • Loading branch information
ATheorell authored Oct 22, 2023
2 parents f67c1bd + f169040 commit fb8b6ca
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 43 deletions.
68 changes: 26 additions & 42 deletions gpt_engineer/core/chat_to_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
Functions:
- parse_chat: Extracts code blocks from chat messages.
- to_files: Parses a chat and adds the extracted files to a workspace.
- overwrite_files: Parses a chat and overwrites files in the workspace.
- get_code_strings: Reads a file list and returns filenames and their content.
- format_file_to_input: Formats a file's content for input to an AI agent.
- to_files_and_memory: Saves chat content to memory and adds extracted files to a workspace.
- to_files: Adds extracted files to a workspace.
- get_code_strings: Retrieves file names and their content.
- format_file_to_input: Formats file content for AI input.
- overwrite_files_with_edits: Overwrites workspace files based on parsed edits from chat.
- apply_edits: Applies file edits to a workspace.
"""

import os
Expand Down Expand Up @@ -116,27 +118,6 @@ def to_files(chat: str, workspace: DB):
workspace[file_name] = file_content


def overwrite_files(chat: str, dbs: DBs) -> None:
"""
Parse the chat and overwrite all files in the workspace.
Parameters
----------
chat : str
The chat containing the AI files.
dbs : DBs
The database containing the workspace.
"""
dbs.memory["all_output_overwrite.txt"] = chat

files = parse_chat(chat)
for file_name, file_content in files:
if file_name == "README.md":
dbs.memory["LAST_MODIFICATION_README.md"] = file_content
else:
dbs.workspace[file_name] = file_content


def get_code_strings(workspace: DB, metadata_db: DB) -> dict[str, str]:
"""
Read file_list.txt and return file names and their content.
Expand All @@ -150,19 +131,12 @@ def get_code_strings(workspace: DB, metadata_db: DB) -> dict[str, str]:
A dictionary mapping file names to their content.
"""

def get_all_files_in_dir(directory):
for root, dirs, files in os.walk(directory):
for file in files:
yield os.path.join(root, file)
for dir in dirs:
yield from get_all_files_in_dir(os.path.join(root, dir))

files_paths = metadata_db[FILE_LIST_NAME].strip().split("\n")
files = []

for full_file_path in files_paths:
if os.path.isdir(full_file_path):
for file_path in get_all_files_in_dir(full_file_path):
for file_path in _get_all_files_in_dir(full_file_path):
files.append(file_path)
else:
files.append(full_file_path)
Expand All @@ -177,15 +151,7 @@ def get_all_files_in_dir(directory):
file_name = os.path.relpath(path, workspace.path)

if file_name in workspace:
try:
with open(path, "r", encoding="utf-8") as f:
file_content = f.read()
except UnicodeDecodeError:
raise ValueError(
f"Non-text file detected: {file_name}, gpt-engineer currently only supports utf-8 decodable text files."
)

files_dict[file_name] = file_content
files_dict[file_name] = _open_file(path)

return files_dict

Expand Down Expand Up @@ -285,3 +251,21 @@ def apply_edits(edits: List[Edit], workspace: DB):
workspace[filename] = workspace[filename].replace(
edit.before, edit.after
) # existing file


def _get_all_files_in_dir(directory):
for root, dirs, files in os.walk(directory):
for file in files:
yield os.path.join(root, file)
for dir in dirs:
yield from _get_all_files_in_dir(os.path.join(root, dir))


def _open_file(file_path) -> str:
try:
with open(file_path, "r", encoding="utf-8") as f:
return f.read()
except UnicodeDecodeError:
raise ValueError(
f"Non-text file detected: {file_path}, gpt-engineer currently only supports utf-8 decodable text files."
)
36 changes: 35 additions & 1 deletion tests/test_chat_to_files.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import textwrap

from gpt_engineer.core.chat_to_files import to_files_and_memory
from gpt_engineer.core.chat_to_files import to_files_and_memory, get_code_strings
from gpt_engineer.cli.file_selector import FILE_LIST_NAME

from unittest.mock import MagicMock


class DummyDBs:
Expand Down Expand Up @@ -208,3 +211,34 @@ def test_files_with_newline_between_header():

for file_name, file_content in expected_files.items():
assert dbs.workspace[file_name] == file_content


def test_get_code_strings(monkeypatch):
# arrange
mock_db = MagicMock()
mock_db.path = "path/to"
data = {"file1.txt": "This is file 1 content", "file2.txt": "This is file 2 content"}
mock_db.__getitem__ = lambda self, x: data.get(x)
mock_db.__contains__ = lambda self, x: x in data

mock_metadata_db = {FILE_LIST_NAME: "path/to/file1.txt\npath/to/file2.txt"}

def mock_get_all_files_in_dir(directory):
return ["path/to/file1.txt", "path/to/file2.txt"]

def mock_open_file(path):
return f"File Data for file: {path}"

monkeypatch.setattr(
"gpt_engineer.core.chat_to_files._get_all_files_in_dir", mock_get_all_files_in_dir
)

monkeypatch.setattr("gpt_engineer.core.chat_to_files._open_file", mock_open_file)

# act
result = get_code_strings(mock_db, mock_metadata_db)

print(result)
# assert
assert result["file1.txt"] == "File Data for file: path/to/file1.txt"
assert result["file2.txt"] == "File Data for file: path/to/file2.txt"

0 comments on commit fb8b6ca

Please sign in to comment.