Skip to content

Commit

Permalink
Moved cache files, tested it, minor cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
leokim-l committed Sep 4, 2024
1 parent 070f74c commit 2913dc7
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 21 deletions.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ packages = [{include = "malco", from = "src"}]
python = "^3.10"
pheval = "^0.3.2"
setuptools = "^69.5.1"
shelved-cache = "^0.3.1"


[tool.poetry.plugins."pheval.plugins"]
Expand All @@ -20,7 +21,7 @@ pytest = "^7.1.2"
pylint = "^2.15.6"
pycodestyle = "^2.10.0"
coverage = "^6.5.0"
ontogpt = {git = "https://github.com/monarch-initiative/ontogpt.git", branch = "main"}
ontogpt = {git = "https://github.com/monarch-initiative/ontogpt.git", tag = "v1.0.3"}

[tool.poetry.group.dev.dependencies]
tox = "^4.15.0"
Expand Down
25 changes: 15 additions & 10 deletions src/malco/post_process/ranking_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,24 @@ def mondo_adapter() -> OboGraphInterface:
return get_adapter("sqlite:obo:mondo")

def compute_mrr_and_ranks(
comparing,
output_dir,
prompt_dir,
correct_answer_file,
comparing: str,
output_dir: Path,
out_subdir: str,
prompt_dir: str,
correct_answer_file: str,
) -> Path:

# Read in results TSVs from self.output_dir that match glob results*tsv
out_caches = output_dir / "caches"
out_caches.mkdir(exist_ok=True)
output_dir = output_dir / out_subdir
results_data = []
results_files = []
num_ppkt = 0
pc2_cache_file = str(output_dir / "score_grounded_result_cache")
pc2 = PersistentCache(LRUCache, pc2_cache_file, maxsize=4096)
pc1_cache_file = str(output_dir / "omim_mappings_cache")
pc1 = PersistentCache(LRUCache, pc1_cache_file, maxsize=16384)
pc2_cache_file = str(out_caches / "score_grounded_result_cache")
pc2 = PersistentCache(LRUCache, pc2_cache_file, maxsize=524288)
pc1_cache_file = str(out_caches / "omim_mappings_cache")
pc1 = PersistentCache(LRUCache, pc1_cache_file, maxsize=524288)
# Treat hits and misses as run-specific arguments, write them cache_log
pc1.hits = pc1.misses = 0
pc2.hits = pc2.misses = 0
Expand Down Expand Up @@ -78,7 +83,7 @@ def compute_mrr_and_ranks(
header = [comparing, "n1", "n2", "n3", "n4", "n5", "n6", "n7", "n8", "n9", "n10", "n10p", "nf"]
rank_df = pd.DataFrame(0, index=np.arange(len(results_files)), columns=header)

cache_file = output_dir / "cache_log.txt"
cache_file = out_caches / "cache_log.txt"

with cache_file.open('a', newline = '') as cf:
now_is = datetime.now().strftime("%Y%m%d-%H%M%S")
Expand Down Expand Up @@ -120,7 +125,7 @@ def compute_mrr_and_ranks(
# Save full data frame
full_df_path = output_dir / results_files[i].split("/")[0]
full_df_filename = "full_df_results.tsv"
safe_save_tsv(full_df_path, df, full_df_filename)
safe_save_tsv(full_df_path, full_df_filename, df)

# Calculate MRR for this file
mrr = df.groupby("label")["reciprocal_rank"].max().mean()
Expand Down
15 changes: 5 additions & 10 deletions src/malco/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,7 @@ class MalcoRunner(PhEvalRunner):
#languages: tuple
#models: tuple
#just_run: bool
#just_postprocess: bool

#languages = ("en", "es", "nl", "it", "de")
#models = ("gpt-3.5-turbo", "gpt-4", "gpt-4-turbo", "gpt-4o") # Decide on list of models: Claude-Sonnet (Anthropic key),
#models = ("gpt-3.5-turbo", "gpt-4-turbo") # Decide on list of models: Claude-Sonnet (Anthropic key),
#just_run = 0 # only run the run part of the code
#just_postprocess = 1 # only run the postprocess part of the code

#just_postprocess: bool


def prepare(self):
Expand Down Expand Up @@ -75,7 +68,8 @@ def post_process(self,
'''
comparing = "language"
mrr_file, plot_dir, num_ppkt, topn_aggr_file = compute_mrr_and_ranks(comparing,
output_dir=self.output_dir / "multilingual" ,
output_dir=self.output_dir,
out_subdir="multilingual",
prompt_dir=os.path.join(self.input_dir, prompts_subdir_name),
correct_answer_file=correct_answer_file)
Expand All @@ -85,7 +79,8 @@ def post_process(self,
'''
comparing = "model"
mrr_file, data_dir, num_ppkt, topn_aggr_file = compute_mrr_and_ranks(comparing,
output_dir=self.output_dir / "multimodel" ,
output_dir=self.output_dir,
out_subdir="multimodel",
prompt_dir=os.path.join(self.input_dir, prompts_subdir_name),
correct_answer_file=correct_answer_file)

Expand Down

0 comments on commit 2913dc7

Please sign in to comment.