-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Pseudo-Keys Part II: history metadata #738
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,10 @@ | |
from onyo.lib.ui import ui | ||
|
||
if TYPE_CHECKING: | ||
from typing import Iterable | ||
from typing import ( | ||
Generator, | ||
Iterable, | ||
) | ||
|
||
log: logging.Logger = logging.getLogger('onyo.git') | ||
|
||
|
@@ -401,3 +404,88 @@ | |
|
||
# TODO: git check-ignore --no-index --stdin (or via command call) -> lazy, check GitRepo.files once. (Same invalidation) | ||
# -> OnyoRepo would use it to maintain a ignore list from a (top-level .onyoignore)? .onyo/ignore ? Both? | ||
|
||
def _parse_log_output(self, lines: list[str]) -> dict: | ||
"""Generate a dict from the output of ``git log`` for one commit. | ||
|
||
Internal helper that converts a list of git-log-output lines of | ||
a single commit to a dictionary. | ||
""" | ||
import datetime | ||
import re | ||
regex_author = re.compile(r"Author:\s+(?P<name>\b.*\b)\s+<(?P<email>[^\s]+)>$") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I understand this correctly, we're not going to get much of a win doing the compile here, since we're only using it once per function invocation. Thus, it's effectively the same as using Moving the compilation out of the function would probably lead to a notable performance win. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, you understand correctly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think that depends where you put it. If it's something that gets imported (similar to consts) then it wouldn't be. Just something to keep in mind. We'll see when we get there. And we have benchmarking now. :-D |
||
regex_committer = re.compile(r"Commit:\s+(?P<name>\b.*\b)\s+<(?P<email>[^\s]+)>$") | ||
commit = dict() | ||
for line in lines: | ||
if line.startswith('commit '): | ||
commit['hexsha'] = line.split()[1] | ||
continue | ||
elif line.startswith('Author:'): | ||
try: | ||
commit['author'] = re.match(regex_author, line).groupdict() # pyre-ignore [16] AttributeError is caught | ||
except AttributeError as e: | ||
if str(e).endswith("'groupdict'"): | ||
raise RuntimeError(f"Unable to parse author-line:\n{line}") from e | ||
raise | ||
continue | ||
elif line.startswith('AuthorDate:'): | ||
continue | ||
elif line.startswith('Commit:'): | ||
try: | ||
commit['committer'] = re.match(regex_committer, line).groupdict() | ||
except AttributeError as e: | ||
if str(e).endswith("'groupdict'"): | ||
raise RuntimeError(f"Unable to parse committer-line:\n{line}") from e | ||
raise | ||
continue | ||
elif line.startswith('CommitDate:'): | ||
commit['time'] = datetime.datetime.fromisoformat(line.split(maxsplit=1)[1]) | ||
continue | ||
else: | ||
# This line is part of the message. | ||
if 'message' not in commit: | ||
commit['message'] = [line] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This retains the 4 leading spaces added by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right. Need to check whether that's guaranteed to be 4 spaces, though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree. I don't have a good way to detect the indenting though. I don't want to strip intentional leading spaces. |
||
else: | ||
commit['message'].append(line) | ||
|
||
return commit | ||
|
||
def history(self, path: Path | None = None, n: int | None = None) -> Generator[dict, None, None]: | ||
"""Yields commit dicts representing the history of ``path``. | ||
|
||
History according to git log (git log --follow if a path is given). | ||
|
||
Parameters | ||
---------- | ||
path: | ||
What file to follow. If `None`, get the history of HEAD instead. | ||
n: | ||
Limit history going back ``n`` commits. | ||
``None`` for no limit (default). | ||
""" | ||
# TODO: Something like this may allow us to efficiently deal with multiline strings in git-log's output, | ||
# rather than splitting lines and iterating over them when assembling output that belongs to a single | ||
# commit, parsing the git data and then the operations record (also removes leading spaces for the commit | ||
# message): | ||
# --pretty='format:commit: %H%nAuthor: %an (%ae)%nCommitter: %cn (%ce)%nCommitDate: %cI%nMessage:%n%B' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice. +1 |
||
limit = [f'-n{n}'] if n is not None else [] | ||
pathspec = ['--follow', '--', str(path)] if path else [] | ||
cmd = ['log', '--date=iso-strict', '--pretty=fuller'] + limit + pathspec | ||
output = self._git(cmd) | ||
|
||
# yield output on a per-commit-basis | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hah. Nice solution. :-) As an alternate idea, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That could work indeed. I'll try what that would look like. I do lean towards postponing that part, though. Need to implement the actual pseudo-keys and test they work and are queryable. And then there's the refactoring to be done, fully utilizing |
||
commit_output = [] | ||
for line in output.splitlines(): | ||
if line.startswith('commit '): | ||
# This is the first line of a new commit. | ||
# 1. store previous commit output | ||
if commit_output: | ||
# we just finished the previous commit. | ||
yield self._parse_log_output(commit_output) | ||
# 2. start new commit output | ||
commit_output = [line] | ||
else: | ||
# add to current commit output | ||
commit_output.append(line) | ||
if commit_output: | ||
yield self._parse_log_output(commit_output) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from __future__ import annotations | ||
|
||
from copy import deepcopy | ||
from pathlib import Path | ||
from typing import TYPE_CHECKING | ||
|
||
|
@@ -51,7 +52,10 @@ def __init__(self, | |
self.update(onyo.lib.pseudokeys.PSEUDO_KEYS) | ||
self._path = None | ||
|
||
if isinstance(item, Path): | ||
if isinstance(item, Item): | ||
self._path = item._path | ||
self.data = deepcopy(item.data) | ||
elif isinstance(item, Path): | ||
assert item.is_absolute() # currently no support for relative. This is how all existing code should work ATM. | ||
self.update_from_path(item) | ||
elif item is not None: | ||
|
@@ -95,15 +99,51 @@ def update_from_path(self, path: Path): | |
loader = self.repo.get_asset_content if self.repo else get_asset_content | ||
self.update(loader(path)) | ||
|
||
# TODO: git metadata: | ||
# def fill_created(self, what: str | None): | ||
# # self.repo.git.xxx (self.get('onyo.path.file')) | ||
# # fill in all created keys | ||
# # switch `what` -> return whatever part was requesting this | ||
# pass#raise NotImplementedError | ||
# | ||
# def fill_modified(self, what: str | None): | ||
# pass#raise NotImplementedError | ||
def fill_created(self, what: str | None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I propose I havn't read the rest of the code, so I'm not sure how this function is being used yet, but I feel like this might not be the right way to do it. I imagine a getter for the key, and then if the key is unset, running I'm not sold on the |
||
"""Initializer for the 'onyo.was.modified' pseudo-keys. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
Fills in the entire sub-dict and returns the value specified by ``what``. | ||
|
||
Note/TODO: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO this is not so much a TODO. It's a note that says: official onyo commands do this right. It is possible for others to generate commits that are simply too insane for Onyo to anticipate. They need to take care. You have been warned. |
||
---------- | ||
This is currently based on ``git log --follow <path>``. Looking back, the first appearance | ||
of a 'new_assets'/'new_directories' operation should be it, assuming the history | ||
was created by onyo commands. | ||
However, if the history was created using the python interface, that assumption wouldn't hold | ||
and we'd have to trace back moves and renames in order to know what path or file name we are looking | ||
to match against these operations. | ||
""" | ||
if self.repo and self['onyo.path.absolute']: | ||
for commit in self.repo.get_history(self['onyo.path.file']): # pyre-ignore[16] | ||
if 'operations' in commit: | ||
if (self['onyo.is.asset'] and commit['operations']['new_assets']) or \ | ||
(self['onyo.is.directory'] and commit['operations']['new_directories']): | ||
self['onyo.was.created'] = commit.data | ||
return commit[what] if what else None | ||
return None | ||
|
||
def fill_modified(self, what: str | None): | ||
"""Initializer for the 'onyo.was.modified' pseudo-keys. | ||
|
||
Fills in the entire sub-dict and returns the value specified by ``what``. | ||
|
||
Note/TODO: | ||
---------- | ||
See ``fill_created``. | ||
""" | ||
if self.repo and self['onyo.path.absolute']: | ||
for commit in self.repo.get_history(self['onyo.path.file']): # pyre-ignore[16] | ||
if 'operations' in commit: | ||
if (self['onyo.is.asset'] and | ||
(commit['operations']['modify_assets'] or | ||
commit['operations']['new_assets'])) or \ | ||
(self['onyo.is.directory'] and | ||
(commit['operations']['new_directories'] or | ||
commit['operations']['move_directories'] or | ||
commit['operations']['rename_directories'])): | ||
self['onyo.was.modified'] = commit.data | ||
return commit[what] if what else None | ||
return None | ||
|
||
def get_path_absolute(self): | ||
"""Initializer for the 'onyo.path.absolute' pseudo-key.""" | ||
|
@@ -125,7 +165,14 @@ def get_path_parent(self): | |
|
||
def get_path_file(self): | ||
"""Initializer for the 'onyo.path.file' pseudo-key.""" | ||
return self._path.relative_to(self.repo.git.root) if self.repo and self['onyo.is.asset'] else None | ||
if self.repo and self['onyo.path.absolute']: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know why, but this function evokes very positive emotions. This was definitely the right design decision. :-D |
||
if self['onyo.is.asset'] and not self['onyo.is.directory']: | ||
return self['onyo.path.relative'] | ||
if self['onyo.is.asset'] and self['onyo.is.directory']: | ||
return self['onyo.path.relative'] / onyo.lib.onyo.OnyoRepo.ASSET_DIR_FILE_NAME | ||
if self['onyo.is.directory'] and not self['onyo.is.asset']: | ||
return self['onyo.path.relative'] / onyo.lib.onyo.OnyoRepo.ANCHOR_FILE_NAME | ||
return None | ||
|
||
def is_asset(self) -> bool | None: | ||
"""Initializer for the 'onyo.is.asset' pseudo-key.""" | ||
|
@@ -147,7 +194,10 @@ def is_template(self) -> bool | None: | |
|
||
def is_empty(self) -> bool | None: | ||
"""Initializer for the 'onyo.is.empty' pseudo-key.""" | ||
return None # TODO: Unclear what exactly this needs to consider. Probably child items? Or just assets? | ||
if self['onyo.is.directory'] and self.repo and self._path: | ||
# TODO: This likely can be faster when redoing/enhancing caching of repo paths. | ||
return not any(p.parent == self._path for p in self.repo.get_asset_paths()) | ||
return None | ||
|
||
# TODO/Notes for next PR(s): | ||
# - Bug/Missing feature: pseudo-keys that are supposed to be settable by commands, are not yet | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
from pathlib import Path | ||
|
||
from onyo.lib.inventory import OPERATIONS_MAPPING | ||
|
||
|
||
def parse_operations_record(record: list[str]) -> dict: | ||
|
||
if not record[0].strip() == "--- Inventory Operations ---": | ||
raise RuntimeError("Invalid operations record.") | ||
parsed_record = {k: [] for k in OPERATIONS_MAPPING.keys()} | ||
collecting_key = None | ||
for line in record[1:]: | ||
line = line.strip() | ||
if not line: | ||
continue | ||
match line: | ||
case "New assets:": | ||
collecting_key = "new_assets" | ||
case "New directories:": | ||
collecting_key = "new_directories" | ||
case "Removed assets:": | ||
collecting_key = "remove_assets" | ||
case "Removed directories:": | ||
collecting_key = "remove_directories" | ||
case "Moved assets:": | ||
collecting_key = "move_assets" | ||
case "Moved directories:": | ||
collecting_key = "move_directories" | ||
case "Renamed directories:": | ||
collecting_key = "rename_directories" | ||
case "Renamed assets:": | ||
collecting_key = "rename_assets" | ||
case "Modified assets:": | ||
collecting_key = "modify_assets" | ||
case _: | ||
if not collecting_key or line and not line.startswith("- "): | ||
raise RuntimeError("Invalid operations record.") | ||
cleaned_line = line[2:].split(" -> ") | ||
if len(cleaned_line) > 2: | ||
raise RuntimeError(f"Invalid operations record:{line}") | ||
if len(cleaned_line) == 1: | ||
parsed_record[collecting_key].append(Path(cleaned_line[0])) | ||
else: | ||
parsed_record[collecting_key].append( | ||
(Path(cleaned_line[0]), Path(cleaned_line[1])) | ||
) | ||
return parsed_record |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Proposal: this function can be simplified if all lines are put into one large string.
That string is then split into two pieces, splitting on the first entirely empty line.
The first half is the header. The second half is the message.
The regex already checks for "Author", "Commit", etc. Have them all run in one large try statement on the header text, and all get caught by the same exception handling.
Then the message is dealt with separately, and simply assigns the second half.