Skip to content

Commit

Permalink
more xgrammar scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Dec 6, 2024
1 parent d8e239f commit 02c0f72
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 1 deletion.
97 changes: 97 additions & 0 deletions json_stats/xgr_multi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/env python3

import sys
import json
import glob
import os
import random
import time
import concurrent.futures
import subprocess
from typing import List

positive_base = os.environ.get("HOME") + "/src/json-data/positive"
output_base = os.environ.get("HOME") + "/src/json-data/xgr_output"

def process_file(files: List[str]):
timeout=60
try:
command = ["python", "xgr_test.py"] + files

result = subprocess.run(
command,
stdin=subprocess.DEVNULL,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
encoding="utf-8",
timeout=timeout
)

# Collect combined output
output = result.stdout

return {
"output": output,
"return_code": result.returncode
}
except subprocess.TimeoutExpired as e:
output = e.output or ""
# decode output if needed
if isinstance(output, bytes):
output = output.decode("utf-8", errors="replace")
return {
"output": output,
"error": "TimeoutExpired",
"message": f"The process exceeded the {timeout}-second timeout."
}
except Exception as e:
return {
"error": "ExecutionError",
"message": str(e)
}

files = []
for arg in sys.argv[1:]:
if arg.endswith(".json"):
files.append(arg)
else:
files.extend(glob.glob(arg + "/*.json"))
print(len(files), file=sys.stderr)
missing_files = []
for f in files:
file_base = f.split("/")[-1]
output_name = f"{output_base}/{file_base}"
if not os.path.exists(output_name):
missing_files.append(f)
print(len(missing_files), file=sys.stderr)
random.shuffle(missing_files)

if len(missing_files) < 10:
print(missing_files)
sys.exit(0)

chunk_size = 10
chunks = []
for i in range(0, len(missing_files), chunk_size):
chunks.append(missing_files[i:i + chunk_size])

log_file = f"{output_base}/log.txt"
cnt = 0

with concurrent.futures.ThreadPoolExecutor(max_workers=40) as executor:
futures = {executor.submit(process_file, f): f for f in chunks}
for future in concurrent.futures.as_completed(futures):
files = futures[future]
try:
r = future.result()
cnt += len(files)
print(cnt)
rs = json.dumps(r)
with open(log_file, "a") as f:
f.write(f"FILES: {files}\n{rs}\n")
# print(f"OK: {files}")
except Exception as e:
with open(log_file, "a") as f:
f.write(f"ERROR {files}: {repr(e)}")
print(f"ERROR: {files}", repr(e))
25 changes: 25 additions & 0 deletions json_stats/xgr_process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env python3

import json
import sys

objs = []

with open(sys.argv[1]) as f:
for l in f.readlines():
if not l:
continue
j = json.loads(l)
buf = ""
for outline in j["output"].split("\n"):
if outline.startswith("RESULT: "):
parsed = json.loads(outline[len("RESULT: "):])
parsed["warnings"] = buf
objs.append(parsed)
print(parsed.get("compile_time",0), parsed["file"])
buf = ""
else:
buf += outline + "\n"

with open(sys.argv[2], "w") as f:
f.write(json.dumps(objs, indent=4))
3 changes: 2 additions & 1 deletion json_stats/xgr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,9 @@ def process_file(file: str):
if os.path.exists(output_name):
return

print("PROCESSING: " + file, file=sys.stderr)
status = do_process(file)
print(status, file=sys.stderr)
print("RESULT: " + json.dumps(status), file=sys.stderr)
with open(output_name, "w") as f:
f.write(json.dumps(status, indent=4))

Expand Down

0 comments on commit 02c0f72

Please sign in to comment.