Skip to content

Commit

Permalink
Merge branch 'semantic-similarity' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Daethyra authored Dec 8, 2023
2 parents 42d3daf + b63c9eb commit b36324f
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 3 deletions.
2 changes: 0 additions & 2 deletions src/conv_html_to_markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,6 @@ def main():

chunks = list(chunk_dataset(original_data, chunk_size))
formatted_contents = []

logging.info("Processing and saving dataset in chunks.")
with ThreadPoolExecutor(max_workers=max_threads) as executor:
results = executor.map(process_chunk, chunks)
Expand All @@ -272,7 +271,6 @@ def main():
output_file_name = "gpt-crawler-curated_markdown.md"
save_output_in_chunks(output_file_name, formatted_contents)
logging.info("Content formatted and saved in chunks successfully.")

logging.info("\nConversion process successful. Exiting program.")
except Exception as e:
logging.error("An error occurred in the main function: %s", e)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_conv_html_to_markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,4 @@ def test_process_chunk(self):


if __name__ == "__main__":
unittest.main
unittest.main

0 comments on commit b36324f

Please sign in to comment.