Skip to content

Commit

Permalink
Expand validation scripts into 'codetidy' package
Browse files Browse the repository at this point in the history
We're expanding this project to manage all kinds of code tidy up actions
like: validate doctrings, validate return types, find poor names etc.

The name 'codetidy' seems like a suitable package name for this group of
activities.

--

In more detail

We were getting some duplication across scripts so it is time to
refactor. We extracted `CodeBlock` into its own class so we could get
access to the function definitions, nodes, tree etc for a specific code
block.

We also use the strategy pattern to combine filters together and filter
the function definitions by those filters. For instance we want to find
all docstrings that are adjacent to the code. Or find any standalone
functions or methods that are missing return types.

Note: The `ast.FunctionDef` discovers both standalone functions and
methods inside classes. In the ast module, `ast.FunctionDef` is used to
represent both functions and methods. In Python's abstract syntax tree
(AST), there is no separate node type for methods; both functions and
methods are represented by ast.FunctionDef nodes.

We've also added a script to find poor names in the code base so we can
tidy those up.

Now we have three types of validation:
- Find poor names
- Validate docstrings
- Validate return types

The rest can be done via Pylance and the pyrightconfig.json config file.
  • Loading branch information
webventurer committed Nov 19, 2024
1 parent 01c8881 commit 5a21146
Show file tree
Hide file tree
Showing 10 changed files with 391 additions and 133 deletions.
38 changes: 0 additions & 38 deletions .vscode/tasks.json

This file was deleted.

7 changes: 7 additions & 0 deletions admin/codetidy/.inspectignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
admin
venv
.venv
.git
.vscode
__pycache__
thirdparty
23 changes: 23 additions & 0 deletions admin/codetidy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# ruff: noqa

from typing import Callable

__all__: list[str] = []


def export(defn: Callable) -> Callable:
"""Add this decorator to any func you wish to import into this namespace"""

globals()[defn.__name__] = defn
__all__.append(defn.__name__)
return defn


# Needed for Pylance type checks or it will complain about missing imports
def __getattr__(name: str) -> Callable:
if name in globals():
return globals()[name]
raise AttributeError(f"module {__name__} has no attribute {name}")


from . import inspector
40 changes: 40 additions & 0 deletions admin/codetidy/find_poor_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Find poor variable name patterns in the codebase"""

__package__ = __import__("config").infer_package(__file__) # Allow rel imports


import click

from . import inspector

filters = [
inspector.vars_that_start_with_df,
inspector.has_type_prefix,
inspector.has_type_suffix,
inspector.shadows_builtin,
inspector.is_meaningless_name,
inspector.has_bad_plural,
]

filters_for_later = [
inspector.is_generic_name,
inspector.has_number_in_name,
inspector.is_too_long,
inspector.is_confusing_abbreviation,
]


def find_poor_names(file: str):
block = inspector.CodeBlock.from_file(file)
for name in block.find_matching_variables(filters):
print(f"{file:<60} Poor name: {name}")


@click.command()
@click.argument("path", required=True, default=".")
def main(path: str):
inspector.process_files(path, find_poor_names)


if __name__ == "__main__":
main()
204 changes: 204 additions & 0 deletions admin/codetidy/inspector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
"""Inspect Python code for for specific patterns we can fix manually"""

import ast
import os
import sys
from typing import Callable, Self, TypeGuard

from . import export


class CodeBlock:
@classmethod
def from_file(cls, filename: str) -> Self:
with open(filename, "r") as f:
return cls(f.read())

def __init__(self, code: str):
self.code = code

@property
def tree(self) -> ast.Module:
return ast.parse(self.code)

@property
def lines(self) -> list[str]:
return self.code.splitlines()

@property
def nodes(self) -> list[ast.AST]:
return list(ast.walk(self.tree))

@property
def func_defs(self) -> list[ast.FunctionDef]:
return [n for n in self.nodes if isinstance(n, ast.FunctionDef)]

def match_all_cond(self, f: ast.FunctionDef, conds: list[Callable]) -> bool:
return all(cond(f, self.lines) for cond in conds)

def is_variable_node(self, node: ast.AST) -> TypeGuard[ast.Name]:
return isinstance(node, ast.Name) and isinstance(node.ctx, ast.Store)

def matches_any_pattern(self, node: ast.AST, patterns: list) -> bool:
return any(pattern(node) for pattern in patterns)

def get_variable_nodes(self) -> list[ast.Name]:
return [node for node in self.nodes if self.is_variable_node(node)]

def get_matching_names(self, nodes: list[ast.Name], patterns: list) -> list:
return [n.id for n in nodes if self.matches_any_pattern(n, patterns)]

def find_matching_functions(self, funcs: list[Callable]) -> list[str]:
return [f.name for f in self.func_defs if self.match_all_cond(f, funcs)]

def find_matching_variables(self, patterns: list[Callable]) -> list[str]:
return self.get_matching_names(self.get_variable_nodes(), patterns)


# Function definition filters


@export
def has_docstring(node: ast.FunctionDef, *args: tuple) -> bool:
return ast.get_docstring(node) is not None


@export
def docstring_adjacent_to_code(node: ast.FunctionDef, lines: list[str]) -> bool:
if (docstring_end_lineno := node.body[0].end_lineno) is None:
return False
return bool(
docstring_end_lineno < len(lines)
and lines[docstring_end_lineno].strip()
)


def is_node_with_lineno(node: ast.AST) -> TypeGuard[ast.stmt | ast.expr]:
"""Only ast.stmt and ast.expr types guaranteed to have lineno attribute."""

return isinstance(node, (ast.stmt, ast.expr))


@export
def no_type_ignore_comment(node: ast.AST, lines: list[str]) -> bool:
if not is_node_with_lineno(node):
return False
return "# type: ignore" not in lines[node.lineno - 1]


@export
def callables_with_return_values(node: ast.FunctionDef, *args: tuple) -> bool:
return any(
isinstance(subnode, ast.Return) and subnode.value is not None
for subnode in ast.walk(node)
)


@export
def no_return_type_annotation(node: ast.FunctionDef, *args: tuple) -> bool:
return node.returns is None


# Variable filters


@export
def vars_that_start_with_df(node: ast.AST, starts_with: str = "df_") -> bool:
return isinstance(node, ast.Name) and node.id.startswith(starts_with)


@export
def has_type_prefix(node: ast.AST) -> bool:
pf = ("str_", "dict_", "list_", "int_", "float_", "bool_")
return isinstance(node, ast.Name) and any(node.id.startswith(p) for p in pf)


@export
def has_type_suffix(node: ast.AST) -> bool:
sf = ("_str", "_dict", "_list", "_int", "_float", "_bool")
return isinstance(node, ast.Name) and any(node.id.endswith(p) for p in sf)


@export
def is_generic_name(node: ast.AST) -> bool:
generic_names = {"data", "info", "result", "temp"}
return isinstance(node, ast.Name) and node.id in generic_names


@export
def shadows_builtin(node: ast.AST) -> bool:
builtin_names = {"list", "dict", "str", "min", "max", "id", "type", "def"}
return isinstance(node, ast.Name) and node.id in builtin_names


@export
def is_meaningless_name(node: ast.AST) -> bool:
meaningless = {"thing", "stuff", "xyz"}
return isinstance(node, ast.Name) and node.id in meaningless


@export
def has_number_in_name(node: ast.AST) -> bool:
return isinstance(node, ast.Name) and any(c.isdigit() for c in node.id)


@export
def is_too_long(node: ast.AST, max_length: int = 30) -> bool:
return isinstance(node, ast.Name) and len(node.id) > max_length


@export
def is_confusing_abbreviation(node: ast.AST) -> bool:
abbreviations = {"num", "val", "calc", "arr", "tmp", "usr", "cfg"}
return isinstance(node, ast.Name) and node.id in abbreviations


@export
def has_bad_plural(node: ast.AST) -> bool:
bad_plurals = {"datas", "informations", "content_stuff"}
return isinstance(node, ast.Name) and node.id in bad_plurals


@export
def has_mixed_case(node: ast.AST) -> bool:
if not isinstance(node, ast.Name):
return False
has_camel = any(c.isupper() for c in node.id[1:])
has_screaming = node.id.isupper() and "_" in node.id
return has_camel or has_screaming


# File processing


def is_test_context() -> bool:
test_runners = {"test_", "unittest", "nose2", "pytest"}
path_parts = sys.argv[0].split(os.sep)
return any(runner in path for path in path_parts for runner in test_runners)


def read_ignore_file(path: str) -> list[str]:
with open(path, "r") as f:
return [line.strip() for line in f]


def filter_ignore_lines(lines: list[str]) -> list[str]:
return [line for line in lines if not line.startswith("#")]


def load_ignore_paths(path: str = ".inspectignore") -> list[str]:
return filter_ignore_lines(read_ignore_file(path))


IGNORE_PATHS = [] if is_test_context() else load_ignore_paths()


def filter_dirs(dirs: list[str]) -> list[str]:
return [d for d in dirs if d not in IGNORE_PATHS]


def process_files(directory: str, file_processor: Callable):
for root, dirs, files in os.walk(directory):
dirs[:] = filter_dirs(dirs)
for file in (f for f in files if f.endswith(".py")):
file_processor(os.path.join(root, file))
16 changes: 16 additions & 0 deletions admin/type_checks/foo.py → admin/codetidy/test_file.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""Some test functions to run the validator scripts against"""


def a_function(a: int, b: int):
return a + b

Expand All @@ -6,6 +9,19 @@ def function_returns_none():
pass


def a_func_with_a_docstring_and_no_space_after(a: int, b: int) -> int:
"""
This is a docstring.
"""
return a + b


def a_func_with_a_docstring_and_a_newline_after(a: int, b: int) -> int:
"""This is a docstring"""

return a + b


class Foo:
def __init__(self, a: int, b: int):
self.a = a
Expand Down
30 changes: 30 additions & 0 deletions admin/codetidy/validate_docstrings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""
A simple script to find all functions in a Python project that have a docstring
but do not have a blank line immediately following the docstring. We can then
add the blank line manually for better readability.
"""

__package__ = __import__("config").infer_package(__file__) # Allow rel imports


import click

from . import inspector

filters = [inspector.has_docstring, inspector.docstring_adjacent_to_code]


def find_functions_without_blank_line(file: str):
block = inspector.CodeBlock.from_file(file)
if callables := block.find_matching_functions(filters):
print(f"{file:<40} No blank line after docstring: {callables}")


@click.command()
@click.argument("path", required=True, default=".")
def main(path: str):
inspector.process_files(path, find_functions_without_blank_line)


if __name__ == "__main__":
main()
Loading

0 comments on commit 5a21146

Please sign in to comment.