Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pseudo-Keys Part II: history metadata #738

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions onyo/lib/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,8 +408,8 @@ def check_ignore(self, ignore: Path, paths: list[Path]) -> list[Path]:
def _parse_log_output(self, lines: list[str]) -> dict:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Proposal: this function can be simplified if all lines are put into one large string.

That string is then split into two pieces, splitting on the first entirely empty line.

The first half is the header. The second half is the message.

The regex already checks for "Author", "Commit", etc. Have them all run in one large try statement on the header text, and all get caught by the same exception handling.

Then the message is dealt with separately, and simply assigns the second half.

"""Generate a dict from the output of ``git log`` for one commit.

Internal helper that gets a list of git-log-output lines,
that represent a single commit and generates a dictionary from it.
Internal helper that converts a list of git-log-output lines of
a single commit to a dictionary.
"""
import datetime
import re
Expand All @@ -418,7 +418,7 @@ def _parse_log_output(self, lines: list[str]) -> dict:
commit = dict()
for line in lines:
if line.startswith('commit '):
commit['commit'] = line.split()[1]
commit['hexsha'] = line.split()[1]
continue
elif line.startswith('Author:'):
try:
Expand Down Expand Up @@ -450,21 +450,24 @@ def _parse_log_output(self, lines: list[str]) -> dict:

return commit

def follow(self, path: Path | None = None, n: int | None = None) -> Generator[dict, None, None]:
def history(self, path: Path | None = None, n: int | None = None) -> Generator[dict, None, None]:
"""Yields commit dicts representing the history of ``path``.

History according to ``git log --follow`` if a path is given
or ``git log`` otherwise.
History according to git log (git log --follow if a path is given).

Parameters
----------
path: Path, optional
path:
What file to follow. If `None`, get the history of HEAD instead.
n: int, optional
n:
Limit history going back ``n`` commits.
``None`` for no limit (default).
"""
# TODO: Do we want to distinguish methods for `git log` (path optional) vs `git log --follow` (requires a path)?
# TODO: Something like this may allow us to efficiently deal with multiline strings in git-log's output,
# rather than splitting lines and iterating over them when assembling output that belongs to a single
# commit, parsing the git data and then the operations record (also removes leading spaces for the commit
# message):
# --pretty='format:commit: %H%nAuthor: %an (%ae)%nCommitter: %cn (%ce)%nCommitDate: %cI%nMessage:%n%B'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice. +1

limit = [f'-n{n}'] if n is not None else []
pathspec = ['--follow', '--', str(path)] if path else []
cmd = ['log', '--date=iso-strict', '--pretty=fuller'] + limit + pathspec
Expand Down
3 changes: 1 addition & 2 deletions onyo/lib/inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,5 @@ def raise_empty_keys(self, asset: dict | UserDict) -> None:
# Hence, `None` as a key would show up here as 'None'.
raise ValueError("Keys are not allowed to be empty or None-values.")

def get_history(self, path: Path | None = None, n: int | None = None):
# TODO: Not sure we need this one yet. We get the data from pseudo-keys.
def get_history(self, path: Path | None = None, n: int | None = None) -> Generator[UserDict, None, None]:
yield from self.repo.get_history(path, n)
74 changes: 62 additions & 12 deletions onyo/lib/items.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from copy import deepcopy
from pathlib import Path
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -51,7 +52,10 @@ def __init__(self,
self.update(onyo.lib.pseudokeys.PSEUDO_KEYS)
self._path = None

if isinstance(item, Path):
if isinstance(item, Item):
self._path = item._path
self.data = deepcopy(item.data)
elif isinstance(item, Path):
assert item.is_absolute() # currently no support for relative. This is how all existing code should work ATM.
self.update_from_path(item)
elif item is not None:
Expand Down Expand Up @@ -95,15 +99,51 @@ def update_from_path(self, path: Path):
loader = self.repo.get_asset_content if self.repo else get_asset_content
self.update(loader(path))

# TODO: git metadata:
# def fill_created(self, what: str | None):
# # self.repo.git.xxx (self.get('onyo.path.file'))
# # fill in all created keys
# # switch `what` -> return whatever part was requesting this
# pass#raise NotImplementedError
#
# def fill_modified(self, what: str | None):
# pass#raise NotImplementedError
def fill_created(self, what: str | None):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I propose what -> key.

I havn't read the rest of the code, so I'm not sure how this function is being used yet, but I feel like this might not be the right way to do it.

I imagine a getter for the key, and then if the key is unset, running fill_created() (without an argument) which populates the fields, and then the getter looks up/returns the (now populated) key.

I'm not sold on the what (aka key) argument.

"""Initializer for the 'onyo.was.modified' pseudo-keys.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'onyo.was.modified' -> 'onyo.was.created'


Fills in the entire sub-dict and returns the value specified by ``what``.

Note/TODO:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO this is not so much a TODO.

It's a note that says: official onyo commands do this right. It is possible for others to generate commits that are simply too insane for Onyo to anticipate. They need to take care. You have been warned.

----------
This is currently based on ``git log --follow <path>``. Looking back, the first appearance
of a 'new_assets'/'new_directories' operation should be it, assuming the history
was created by onyo commands.
However, if the history was created using the python interface, that assumption wouldn't hold
and we'd have to trace back moves and renames in order to know what path or file name we are looking
to match against these operations.
"""
if self.repo and self['onyo.path.absolute']:
for commit in self.repo.get_history(self['onyo.path.file']): # pyre-ignore[16]
if 'operations' in commit:
if (self['onyo.is.asset'] and commit['operations']['new_assets']) or \
(self['onyo.is.directory'] and commit['operations']['new_directories']):
self['onyo.was.created'] = commit.data
return commit[what] if what else None
return None

def fill_modified(self, what: str | None):
"""Initializer for the 'onyo.was.modified' pseudo-keys.

Fills in the entire sub-dict and returns the value specified by ``what``.

Note/TODO:
----------
See ``fill_created``.
"""
if self.repo and self['onyo.path.absolute']:
for commit in self.repo.get_history(self['onyo.path.file']): # pyre-ignore[16]
if 'operations' in commit:
if (self['onyo.is.asset'] and
(commit['operations']['modify_assets'] or
commit['operations']['new_assets'])) or \
(self['onyo.is.directory'] and
(commit['operations']['new_directories'] or
commit['operations']['move_directories'] or
commit['operations']['rename_directories'])):
self['onyo.was.modified'] = commit.data
return commit[what] if what else None
return None

def get_path_absolute(self):
"""Initializer for the 'onyo.path.absolute' pseudo-key."""
Expand All @@ -125,7 +165,14 @@ def get_path_parent(self):

def get_path_file(self):
"""Initializer for the 'onyo.path.file' pseudo-key."""
return self._path.relative_to(self.repo.git.root) if self.repo and self['onyo.is.asset'] else None
if self.repo and self['onyo.path.absolute']:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know why, but this function evokes very positive emotions.

This was definitely the right design decision. :-D

if self['onyo.is.asset'] and not self['onyo.is.directory']:
return self['onyo.path.relative']
if self['onyo.is.asset'] and self['onyo.is.directory']:
return self['onyo.path.relative'] / onyo.lib.onyo.OnyoRepo.ASSET_DIR_FILE_NAME
if self['onyo.is.directory'] and not self['onyo.is.asset']:
return self['onyo.path.relative'] / onyo.lib.onyo.OnyoRepo.ANCHOR_FILE_NAME
return None

def is_asset(self) -> bool | None:
"""Initializer for the 'onyo.is.asset' pseudo-key."""
Expand All @@ -147,7 +194,10 @@ def is_template(self) -> bool | None:

def is_empty(self) -> bool | None:
"""Initializer for the 'onyo.is.empty' pseudo-key."""
return None # TODO: Unclear what exactly this needs to consider. Probably child items? Or just assets?
if self['onyo.is.directory'] and self.repo and self._path:
# TODO: This likely can be faster when redoing/enhancing caching of repo paths.
return not any(p.parent == self._path for p in self.repo.get_asset_paths())
return None

# TODO/Notes for next PR(s):
# - Bug/Missing feature: pseudo-keys that are supposed to be settable by commands, are not yet
Expand Down
15 changes: 10 additions & 5 deletions onyo/lib/onyo.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@
from .utils import get_asset_content, write_asset_file

if TYPE_CHECKING:
from typing import Iterable, List
from typing import (
Generator,
Iterable,
List,
)
from collections import UserDict

log: logging.Logger = logging.getLogger('onyo.onyo')
Expand Down Expand Up @@ -739,14 +743,15 @@ def commit(self, paths: Iterable[Path] | Path, message: str):
self.git.commit(paths=paths, message=message)
self.clear_cache()

def get_history(self, path: Path | None = None, n: int | None = None):
def get_history(self, path: Path | None = None, n: int | None = None) -> Generator[UserDict, None, None]:
# TODO: This isn't quite right yet. operations records are defined in Inventory.
# But Inventory shouldn't talk to GitRepo directly. So, either pass the record
# from Inventory to OnyoRepo and turn it into a commit-message part only,
# or have sort of a proxy in OnyoRepo.
# -> May be: get_history(Item) in Inventory and get_history(path) in OnyoRepo.
from onyo.lib.decoder import parse_operations_record
for commit in self.git.follow(path, n):
from onyo.lib.parser import parse_operations_record
from onyo.lib.utils import DotNotationWrapper
for commit in self.git.history(path, n):
record = []
start = False
for line in commit['message']:
Expand All @@ -757,4 +762,4 @@ def get_history(self, path: Path | None = None, n: int | None = None):
if record:

commit['operations'] = parse_operations_record(record)
yield commit
yield DotNotationWrapper(commit)
2 changes: 1 addition & 1 deletion onyo/lib/decoder.py → onyo/lib/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from onyo.lib.inventory import OPERATIONS_MAPPING


def parse_operations_record(record: list[str]):
def parse_operations_record(record: list[str]) -> dict:

if not record[0].strip() == "--- Inventory Operations ---":
raise RuntimeError("Invalid operations record.")
Expand Down
75 changes: 48 additions & 27 deletions onyo/lib/pseudokeys.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ class PseudoKey:
This Callable is expected to have a single parameter of type `Item`.
"""

def __eq__(self, other):
if not isinstance(other, PseudoKey):
return False

# TODO: This isn't clean yet, since it relies on `implementation` being a `partial`:
return self.description == other.description and self.implementation.func == other.implementation.func

def delegate(self: Item, attribute: str, *args, **kwargs):
# This is to avoid circular imports ATM.
Expand Down Expand Up @@ -56,6 +62,48 @@ def delegate(self: Item, attribute: str, *args, **kwargs):
'onyo.is.empty': PseudoKey(description="Is the directory empty. <unset> if the item is not a directory.",
implementation=partial(delegate, attribute='is_empty')
),
'onyo.was.modified.hexsha': PseudoKey(description="SHA of the last commit that modified the item.",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: "last" is just vague enough to be problematic. In this case, you mean "last" as in "most recent".

implementation=partial(delegate, attribute='fill_modified', what='hexsha')
),
'onyo.was.modified.time': PseudoKey(description="Time of the last commit that modified the item.",
implementation=partial(delegate, attribute='fill_modified', what='time')),
'onyo.was.modified.author.name': PseudoKey(
description="Name of the author of the last commit that modified the item.",
implementation=partial(delegate, attribute='fill_modified', what='author.time')
),
'onyo.was.modified.author.email': PseudoKey(
description="Email of the author of the last commit that modified the item.",
implementation=partial(delegate, attribute='fill_modified', what='author.email')
),
'onyo.was.modified.committer.name': PseudoKey(
description="Name of the committer of the last commit that modified the item.",
implementation=partial(delegate, attribute='fill_modified', what='committer.name')
),
'onyo.was.modified.committer.email': PseudoKey(
description="Email of the committer of the last commit that modified the item.",
implementation=partial(delegate, attribute='fill_modified', what='committer.email')
),
'onyo.was.created.hexsha': PseudoKey(description="SHA of the last commit that created the item.",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps drop "last" here? I see your programmatic point. Onyo is searching for the most recent commit... but if I would read this, then I would want to read the 1-2 sentences that explain the corner cases this is avoiding.

implementation=partial(delegate, attribute='fill_created', what='hexsha')
),
'onyo.was.created.time': PseudoKey(description="Time of the last commit that created the item.",
implementation=partial(delegate, attribute='fill_created', what='time')),
'onyo.was.created.author.name': PseudoKey(
description="Name of the author of the last commit that created the item.",
implementation=partial(delegate, attribute='fill_created', what='author.time')
),
'onyo.was.created.author.email': PseudoKey(
description="Email of the author of the last commit that created the item.",
implementation=partial(delegate, attribute='fill_created', what='author.email')
),
'onyo.was.created.committer.name': PseudoKey(
description="Name of the committer of the last commit that created the item.",
implementation=partial(delegate, attribute='fill_created', what='committer.name')
),
'onyo.was.created.committer.email': PseudoKey(
description="Email of the committer of the last commit that created the item.",
implementation=partial(delegate, attribute='fill_created', what='committer.email')
),
}
r"""Pseudo-Keys are key names that are addressable but not written to disk in asset YAML.

Expand All @@ -66,33 +114,6 @@ def delegate(self: Item, attribute: str, *args, **kwargs):
RESERVED_KEYS
"""

# 'onyo.git.created.time': PseudoKey(description="Datetime the inventory item was created.",
# implementation=partial(delegate,
# attribute='fill_created',
# what='time')
# # or onyo.git.created.time for an "return self.get(what)" in implementation?
# ),
# 'onyo.git.created.commit': PseudoKey(description="Commit SHA of the commit the object was created in",
# implementation=partial(delegate,
# attribute='fill_created',
# what='SHA')
# ),

# 'onyo.git.created.committer.name': None,
# 'onyo.git.created.committer.email': None,
# 'onyo.git.created.author.name': None,
# 'onyo.git.created.author.email': None,
# 'onyo.git.modified.time': None,
# 'onyo.git.modified.commit': None,
# 'onyo.git.modified.committer.name': None,
# 'onyo.git.modified.committer.email': None,
# 'onyo.git.modified.author.name': None,
# 'onyo.git.modified.author.email': None,
#
# },
# }
# }

# Hardcode aliases for now:
# Introduction of proper aliases requires config cache first.
PSEUDOKEY_ALIASES: Dict[str, str] = {
Expand Down
8 changes: 6 additions & 2 deletions onyo/lib/tests/test_commands_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,12 @@ def test_onyo_new_clones(inventory: Inventory) -> None:
assert inventory.repo.is_asset_path(new_asset_path1)
new_asset = inventory.get_asset(new_asset_path1)
# equals existing asset except for path-pseudo-keys and serial:
# Actually: history differs as well. onyo.is. doesn't, though
for k, v in existing_asset.items():
if k != "serial" and not k.startswith('onyo.path.') and not k.startswith('onyo.was.'):
assert v == new_asset[k], f"{k}: {v} != {new_asset[k]}"
assert all(v == new_asset[k] for k, v in existing_asset.items()
if k != "serial" and not k.startswith('onyo.path'))
if k != "serial" and not k.startswith('onyo.path') and not k.startswith('onyo.was.'))
assert new_asset['serial'] == 'ANOTHER'

# second new asset
Expand All @@ -275,7 +279,7 @@ def test_onyo_new_clones(inventory: Inventory) -> None:
new_asset = inventory.get_asset(new_asset_path2)
# equals existing asset except for path-pseudo-keys and serial:
assert all(v == new_asset[k] for k, v in existing_asset.items()
if k != "serial" and not k.startswith('onyo.path'))
if k != "serial" and not k.startswith('onyo.path') and not k.startswith('onyo.was.'))
assert new_asset['serial'] == 'whatever'
assert inventory.repo.git.is_clean_worktree()

Expand Down
Loading