From 3d2182c930b15b186cdabfab41dfa7eb054a7d00 Mon Sep 17 00:00:00 2001 From: Tyson Smith Date: Thu, 16 May 2024 11:42:15 -0700 Subject: [PATCH] Use mmap to simplify searching for tracebacks in logs --- grizzly/common/status_reporter.py | 33 +++++++++++--------------- grizzly/common/test_status_reporter.py | 4 ++-- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/grizzly/common/status_reporter.py b/grizzly/common/status_reporter.py index deac872c..6fe8d484 100644 --- a/grizzly/common/status_reporter.py +++ b/grizzly/common/status_reporter.py @@ -9,7 +9,8 @@ from functools import partial from itertools import zip_longest from logging import DEBUG, INFO, basicConfig, getLogger -from os import SEEK_CUR, getenv +from mmap import ACCESS_READ, mmap +from os import getenv from pathlib import Path from platform import system from re import match @@ -73,24 +74,18 @@ def from_file( token = b"Traceback (most recent call last):" assert len(token) < cls.READ_LIMIT try: - with log_file.open("rb") as in_fp: - for chunk in iter(partial(in_fp.read, cls.READ_LIMIT), b""): - idx = chunk.find(token) - if idx > -1: - # calculate offset of data in the file - pos = in_fp.tell() - len(chunk) + idx - break - if len(chunk) == cls.READ_LIMIT: - # seek back to avoid missing beginning of token - in_fp.seek(len(token) * -1, SEEK_CUR) - else: - # no traceback here, move along - return None - # seek back 2KB to collect preceding lines - in_fp.seek(max(pos - 2048, 0)) - data = in_fp.read(cls.READ_LIMIT) - except OSError: # pragma: no cover - # in case the file goes away + with log_file.open("rb") as lfp: + with mmap(lfp.fileno(), 0, access=ACCESS_READ) as lmm: + idx = lmm.find(token) + if idx == -1: + # no traceback here, move along + return None + # seek back 2KB to collect preceding lines + lmm.seek(max(idx - len(token) - 2048, 0)) + data = lmm.read(cls.READ_LIMIT) + except (OSError, ValueError): # pragma: no cover + # OSError: in case the file goes away + # ValueError: cannot mmap an empty file on Windows return None data_lines = data.decode("ascii", errors="ignore").splitlines() diff --git a/grizzly/common/test_status_reporter.py b/grizzly/common/test_status_reporter.py index 87329ca6..f6a850e0 100644 --- a/grizzly/common/test_status_reporter.py +++ b/grizzly/common/test_status_reporter.py @@ -484,7 +484,7 @@ def test_status_reporter_09(mocker, tmp_path): test_fp.write( ' File "some/long/path/name/foobar.py", line 5000, in \n' ) - test_fp.write(f" some_long_name_for_a_func_{j:0>4d}()\n") + test_fp.write(f" some_long_name_for_a_func_{j:04d}()\n") test_fp.write("IndexError: list index out of range\n") rptr = StatusReporter.load(db_file, tb_path=tmp_path) rptr._sys_info = _fake_sys_info @@ -598,7 +598,7 @@ def test_traceback_report_04(tmp_path): test_fp.write(" second()\n") for i in reversed(range(TracebackReport.MAX_LINES)): test_fp.write(' File "foo.py", line 5, in \n') - test_fp.write(f" func_{i:0>2d}()\n") + test_fp.write(f" func_{i:02d}()\n") test_fp.write("END_WITH_BLANK_LINE\n\n") test_fp.write("end junk\n") tbr = TracebackReport.from_file(test_log)