-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Pseudo-Keys Part II: history metadata #738
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -408,8 +408,8 @@ def check_ignore(self, ignore: Path, paths: list[Path]) -> list[Path]: | |
def _parse_log_output(self, lines: list[str]) -> dict: | ||
"""Generate a dict from the output of ``git log`` for one commit. | ||
|
||
Internal helper that gets a list of git-log-output lines, | ||
that represent a single commit and generates a dictionary from it. | ||
Internal helper that converts a list of git-log-output lines of | ||
a single commit to a dictionary. | ||
""" | ||
import datetime | ||
import re | ||
|
@@ -418,7 +418,7 @@ def _parse_log_output(self, lines: list[str]) -> dict: | |
commit = dict() | ||
for line in lines: | ||
if line.startswith('commit '): | ||
commit['commit'] = line.split()[1] | ||
commit['hexsha'] = line.split()[1] | ||
continue | ||
elif line.startswith('Author:'): | ||
try: | ||
|
@@ -450,21 +450,24 @@ def _parse_log_output(self, lines: list[str]) -> dict: | |
|
||
return commit | ||
|
||
def follow(self, path: Path | None = None, n: int | None = None) -> Generator[dict, None, None]: | ||
def history(self, path: Path | None = None, n: int | None = None) -> Generator[dict, None, None]: | ||
"""Yields commit dicts representing the history of ``path``. | ||
|
||
History according to ``git log --follow`` if a path is given | ||
or ``git log`` otherwise. | ||
History according to git log (git log --follow if a path is given). | ||
|
||
Parameters | ||
---------- | ||
path: Path, optional | ||
path: | ||
What file to follow. If `None`, get the history of HEAD instead. | ||
n: int, optional | ||
n: | ||
Limit history going back ``n`` commits. | ||
``None`` for no limit (default). | ||
""" | ||
# TODO: Do we want to distinguish methods for `git log` (path optional) vs `git log --follow` (requires a path)? | ||
# TODO: Something like this may allow us to efficiently deal with multiline strings in git-log's output, | ||
# rather than splitting lines and iterating over them when assembling output that belongs to a single | ||
# commit, parsing the git data and then the operations record (also removes leading spaces for the commit | ||
# message): | ||
# --pretty='format:commit: %H%nAuthor: %an (%ae)%nCommitter: %cn (%ce)%nCommitDate: %cI%nMessage:%n%B' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice. +1 |
||
limit = [f'-n{n}'] if n is not None else [] | ||
pathspec = ['--follow', '--', str(path)] if path else [] | ||
cmd = ['log', '--date=iso-strict', '--pretty=fuller'] + limit + pathspec | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from __future__ import annotations | ||
|
||
from copy import deepcopy | ||
from pathlib import Path | ||
from typing import TYPE_CHECKING | ||
|
||
|
@@ -51,7 +52,10 @@ def __init__(self, | |
self.update(onyo.lib.pseudokeys.PSEUDO_KEYS) | ||
self._path = None | ||
|
||
if isinstance(item, Path): | ||
if isinstance(item, Item): | ||
self._path = item._path | ||
self.data = deepcopy(item.data) | ||
elif isinstance(item, Path): | ||
assert item.is_absolute() # currently no support for relative. This is how all existing code should work ATM. | ||
self.update_from_path(item) | ||
elif item is not None: | ||
|
@@ -95,15 +99,51 @@ def update_from_path(self, path: Path): | |
loader = self.repo.get_asset_content if self.repo else get_asset_content | ||
self.update(loader(path)) | ||
|
||
# TODO: git metadata: | ||
# def fill_created(self, what: str | None): | ||
# # self.repo.git.xxx (self.get('onyo.path.file')) | ||
# # fill in all created keys | ||
# # switch `what` -> return whatever part was requesting this | ||
# pass#raise NotImplementedError | ||
# | ||
# def fill_modified(self, what: str | None): | ||
# pass#raise NotImplementedError | ||
def fill_created(self, what: str | None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I propose I havn't read the rest of the code, so I'm not sure how this function is being used yet, but I feel like this might not be the right way to do it. I imagine a getter for the key, and then if the key is unset, running I'm not sold on the |
||
"""Initializer for the 'onyo.was.modified' pseudo-keys. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
Fills in the entire sub-dict and returns the value specified by ``what``. | ||
|
||
Note/TODO: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO this is not so much a TODO. It's a note that says: official onyo commands do this right. It is possible for others to generate commits that are simply too insane for Onyo to anticipate. They need to take care. You have been warned. |
||
---------- | ||
This is currently based on ``git log --follow <path>``. Looking back, the first appearance | ||
of a 'new_assets'/'new_directories' operation should be it, assuming the history | ||
was created by onyo commands. | ||
However, if the history was created using the python interface, that assumption wouldn't hold | ||
and we'd have to trace back moves and renames in order to know what path or file name we are looking | ||
to match against these operations. | ||
""" | ||
if self.repo and self['onyo.path.absolute']: | ||
for commit in self.repo.get_history(self['onyo.path.file']): # pyre-ignore[16] | ||
if 'operations' in commit: | ||
if (self['onyo.is.asset'] and commit['operations']['new_assets']) or \ | ||
(self['onyo.is.directory'] and commit['operations']['new_directories']): | ||
self['onyo.was.created'] = commit.data | ||
return commit[what] if what else None | ||
return None | ||
|
||
def fill_modified(self, what: str | None): | ||
"""Initializer for the 'onyo.was.modified' pseudo-keys. | ||
|
||
Fills in the entire sub-dict and returns the value specified by ``what``. | ||
|
||
Note/TODO: | ||
---------- | ||
See ``fill_created``. | ||
""" | ||
if self.repo and self['onyo.path.absolute']: | ||
for commit in self.repo.get_history(self['onyo.path.file']): # pyre-ignore[16] | ||
if 'operations' in commit: | ||
if (self['onyo.is.asset'] and | ||
(commit['operations']['modify_assets'] or | ||
commit['operations']['new_assets'])) or \ | ||
(self['onyo.is.directory'] and | ||
(commit['operations']['new_directories'] or | ||
commit['operations']['move_directories'] or | ||
commit['operations']['rename_directories'])): | ||
self['onyo.was.modified'] = commit.data | ||
return commit[what] if what else None | ||
return None | ||
|
||
def get_path_absolute(self): | ||
"""Initializer for the 'onyo.path.absolute' pseudo-key.""" | ||
|
@@ -125,7 +165,14 @@ def get_path_parent(self): | |
|
||
def get_path_file(self): | ||
"""Initializer for the 'onyo.path.file' pseudo-key.""" | ||
return self._path.relative_to(self.repo.git.root) if self.repo and self['onyo.is.asset'] else None | ||
if self.repo and self['onyo.path.absolute']: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know why, but this function evokes very positive emotions. This was definitely the right design decision. :-D |
||
if self['onyo.is.asset'] and not self['onyo.is.directory']: | ||
return self['onyo.path.relative'] | ||
if self['onyo.is.asset'] and self['onyo.is.directory']: | ||
return self['onyo.path.relative'] / onyo.lib.onyo.OnyoRepo.ASSET_DIR_FILE_NAME | ||
if self['onyo.is.directory'] and not self['onyo.is.asset']: | ||
return self['onyo.path.relative'] / onyo.lib.onyo.OnyoRepo.ANCHOR_FILE_NAME | ||
return None | ||
|
||
def is_asset(self) -> bool | None: | ||
"""Initializer for the 'onyo.is.asset' pseudo-key.""" | ||
|
@@ -147,7 +194,10 @@ def is_template(self) -> bool | None: | |
|
||
def is_empty(self) -> bool | None: | ||
"""Initializer for the 'onyo.is.empty' pseudo-key.""" | ||
return None # TODO: Unclear what exactly this needs to consider. Probably child items? Or just assets? | ||
if self['onyo.is.directory'] and self.repo and self._path: | ||
# TODO: This likely can be faster when redoing/enhancing caching of repo paths. | ||
return not any(p.parent == self._path for p in self.repo.get_asset_paths()) | ||
return None | ||
|
||
# TODO/Notes for next PR(s): | ||
# - Bug/Missing feature: pseudo-keys that are supposed to be settable by commands, are not yet | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,12 @@ class PseudoKey: | |
This Callable is expected to have a single parameter of type `Item`. | ||
""" | ||
|
||
def __eq__(self, other): | ||
if not isinstance(other, PseudoKey): | ||
return False | ||
|
||
# TODO: This isn't clean yet, since it relies on `implementation` being a `partial`: | ||
return self.description == other.description and self.implementation.func == other.implementation.func | ||
|
||
def delegate(self: Item, attribute: str, *args, **kwargs): | ||
# This is to avoid circular imports ATM. | ||
|
@@ -56,6 +62,48 @@ def delegate(self: Item, attribute: str, *args, **kwargs): | |
'onyo.is.empty': PseudoKey(description="Is the directory empty. <unset> if the item is not a directory.", | ||
implementation=partial(delegate, attribute='is_empty') | ||
), | ||
'onyo.was.modified.hexsha': PseudoKey(description="SHA of the last commit that modified the item.", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: "last" is just vague enough to be problematic. In this case, you mean "last" as in "most recent". |
||
implementation=partial(delegate, attribute='fill_modified', what='hexsha') | ||
), | ||
'onyo.was.modified.time': PseudoKey(description="Time of the last commit that modified the item.", | ||
implementation=partial(delegate, attribute='fill_modified', what='time')), | ||
'onyo.was.modified.author.name': PseudoKey( | ||
description="Name of the author of the last commit that modified the item.", | ||
implementation=partial(delegate, attribute='fill_modified', what='author.time') | ||
), | ||
'onyo.was.modified.author.email': PseudoKey( | ||
description="Email of the author of the last commit that modified the item.", | ||
implementation=partial(delegate, attribute='fill_modified', what='author.email') | ||
), | ||
'onyo.was.modified.committer.name': PseudoKey( | ||
description="Name of the committer of the last commit that modified the item.", | ||
implementation=partial(delegate, attribute='fill_modified', what='committer.name') | ||
), | ||
'onyo.was.modified.committer.email': PseudoKey( | ||
description="Email of the committer of the last commit that modified the item.", | ||
implementation=partial(delegate, attribute='fill_modified', what='committer.email') | ||
), | ||
'onyo.was.created.hexsha': PseudoKey(description="SHA of the last commit that created the item.", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps drop "last" here? I see your programmatic point. Onyo is searching for the most recent commit... but if I would read this, then I would want to read the 1-2 sentences that explain the corner cases this is avoiding. |
||
implementation=partial(delegate, attribute='fill_created', what='hexsha') | ||
), | ||
'onyo.was.created.time': PseudoKey(description="Time of the last commit that created the item.", | ||
implementation=partial(delegate, attribute='fill_created', what='time')), | ||
'onyo.was.created.author.name': PseudoKey( | ||
description="Name of the author of the last commit that created the item.", | ||
implementation=partial(delegate, attribute='fill_created', what='author.time') | ||
), | ||
'onyo.was.created.author.email': PseudoKey( | ||
description="Email of the author of the last commit that created the item.", | ||
implementation=partial(delegate, attribute='fill_created', what='author.email') | ||
), | ||
'onyo.was.created.committer.name': PseudoKey( | ||
description="Name of the committer of the last commit that created the item.", | ||
implementation=partial(delegate, attribute='fill_created', what='committer.name') | ||
), | ||
'onyo.was.created.committer.email': PseudoKey( | ||
description="Email of the committer of the last commit that created the item.", | ||
implementation=partial(delegate, attribute='fill_created', what='committer.email') | ||
), | ||
} | ||
r"""Pseudo-Keys are key names that are addressable but not written to disk in asset YAML. | ||
|
||
|
@@ -66,33 +114,6 @@ def delegate(self: Item, attribute: str, *args, **kwargs): | |
RESERVED_KEYS | ||
""" | ||
|
||
# 'onyo.git.created.time': PseudoKey(description="Datetime the inventory item was created.", | ||
# implementation=partial(delegate, | ||
# attribute='fill_created', | ||
# what='time') | ||
# # or onyo.git.created.time for an "return self.get(what)" in implementation? | ||
# ), | ||
# 'onyo.git.created.commit': PseudoKey(description="Commit SHA of the commit the object was created in", | ||
# implementation=partial(delegate, | ||
# attribute='fill_created', | ||
# what='SHA') | ||
# ), | ||
|
||
# 'onyo.git.created.committer.name': None, | ||
# 'onyo.git.created.committer.email': None, | ||
# 'onyo.git.created.author.name': None, | ||
# 'onyo.git.created.author.email': None, | ||
# 'onyo.git.modified.time': None, | ||
# 'onyo.git.modified.commit': None, | ||
# 'onyo.git.modified.committer.name': None, | ||
# 'onyo.git.modified.committer.email': None, | ||
# 'onyo.git.modified.author.name': None, | ||
# 'onyo.git.modified.author.email': None, | ||
# | ||
# }, | ||
# } | ||
# } | ||
|
||
# Hardcode aliases for now: | ||
# Introduction of proper aliases requires config cache first. | ||
PSEUDOKEY_ALIASES: Dict[str, str] = { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Proposal: this function can be simplified if all lines are put into one large string.
That string is then split into two pieces, splitting on the first entirely empty line.
The first half is the header. The second half is the message.
The regex already checks for "Author", "Commit", etc. Have them all run in one large try statement on the header text, and all get caught by the same exception handling.
Then the message is dealt with separately, and simply assigns the second half.