diff --git a/.vscode/tasks.json b/.vscode/tasks.json deleted file mode 100644 index 15810d5..0000000 --- a/.vscode/tasks.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "version": "2.0.0", - "tasks": [ - { - "label": "Check for missing return types", - "type": "shell", - "command": ".venv/bin/python", - "args": [ - "admin/type_checks/callables_with_no_return_types.py", - "." - ], - "isBackground": true, - "problemMatcher": { - "owner": "custom", - "fileLocation": [ - "relative", - "${workspaceFolder}" - ], - "pattern": [ - { - "regexp": "^(.+?)\\s{2,}(.*)$", - "file": 1, - "line": 1, - "message": 2 - } - ], - "background": { - "beginsPattern": "^.*Starting.*$", - "endsPattern": "^.*Finished.*$" - } - }, - "group": { - "kind": "build", - "isDefault": true - } - } - ] -} diff --git a/admin/codetidy/.inspectignore b/admin/codetidy/.inspectignore new file mode 100644 index 0000000..78ca896 --- /dev/null +++ b/admin/codetidy/.inspectignore @@ -0,0 +1,7 @@ +admin +venv +.venv +.git +.vscode +__pycache__ +thirdparty diff --git a/admin/codetidy/__init__.py b/admin/codetidy/__init__.py new file mode 100644 index 0000000..9831548 --- /dev/null +++ b/admin/codetidy/__init__.py @@ -0,0 +1,23 @@ +# ruff: noqa + +from typing import Callable + +__all__: list[str] = [] + + +def export(defn: Callable) -> Callable: + """Add this decorator to any func you wish to import into this namespace""" + + globals()[defn.__name__] = defn + __all__.append(defn.__name__) + return defn + + +# Needed for Pylance type checks or it will complain about missing imports +def __getattr__(name: str) -> Callable: + if name in globals(): + return globals()[name] + raise AttributeError(f"module {__name__} has no attribute {name}") + + +from . import inspector diff --git a/admin/codetidy/find_poor_names.py b/admin/codetidy/find_poor_names.py new file mode 100644 index 0000000..f971a65 --- /dev/null +++ b/admin/codetidy/find_poor_names.py @@ -0,0 +1,40 @@ +"""Find poor variable name patterns in the codebase""" + +__package__ = __import__("config").infer_package(__file__) # Allow rel imports + + +import click + +from . import inspector + +filters = [ + inspector.vars_that_start_with_df, + inspector.has_type_prefix, + inspector.has_type_suffix, + inspector.shadows_builtin, + inspector.is_meaningless_name, + inspector.has_bad_plural, +] + +filters_for_later = [ + inspector.is_generic_name, + inspector.has_number_in_name, + inspector.is_too_long, + inspector.is_confusing_abbreviation, +] + + +def find_poor_names(file: str): + block = inspector.CodeBlock.from_file(file) + for name in block.find_matching_variables(filters): + print(f"{file:<60} Poor name: {name}") + + +@click.command() +@click.argument("path", required=True, default=".") +def main(path: str): + inspector.process_files(path, find_poor_names) + + +if __name__ == "__main__": + main() diff --git a/admin/codetidy/inspector.py b/admin/codetidy/inspector.py new file mode 100644 index 0000000..2fb9623 --- /dev/null +++ b/admin/codetidy/inspector.py @@ -0,0 +1,204 @@ +"""Inspect Python code for for specific patterns we can fix manually""" + +import ast +import os +import sys +from typing import Callable, Self, TypeGuard + +from . import export + + +class CodeBlock: + @classmethod + def from_file(cls, filename: str) -> Self: + with open(filename, "r") as f: + return cls(f.read()) + + def __init__(self, code: str): + self.code = code + + @property + def tree(self) -> ast.Module: + return ast.parse(self.code) + + @property + def lines(self) -> list[str]: + return self.code.splitlines() + + @property + def nodes(self) -> list[ast.AST]: + return list(ast.walk(self.tree)) + + @property + def func_defs(self) -> list[ast.FunctionDef]: + return [n for n in self.nodes if isinstance(n, ast.FunctionDef)] + + def match_all_cond(self, f: ast.FunctionDef, conds: list[Callable]) -> bool: + return all(cond(f, self.lines) for cond in conds) + + def is_variable_node(self, node: ast.AST) -> TypeGuard[ast.Name]: + return isinstance(node, ast.Name) and isinstance(node.ctx, ast.Store) + + def matches_any_pattern(self, node: ast.AST, patterns: list) -> bool: + return any(pattern(node) for pattern in patterns) + + def get_variable_nodes(self) -> list[ast.Name]: + return [node for node in self.nodes if self.is_variable_node(node)] + + def get_matching_names(self, nodes: list[ast.Name], patterns: list) -> list: + return [n.id for n in nodes if self.matches_any_pattern(n, patterns)] + + def find_matching_functions(self, funcs: list[Callable]) -> list[str]: + return [f.name for f in self.func_defs if self.match_all_cond(f, funcs)] + + def find_matching_variables(self, patterns: list[Callable]) -> list[str]: + return self.get_matching_names(self.get_variable_nodes(), patterns) + + +# Function definition filters + + +@export +def has_docstring(node: ast.FunctionDef, *args: tuple) -> bool: + return ast.get_docstring(node) is not None + + +@export +def docstring_adjacent_to_code(node: ast.FunctionDef, lines: list[str]) -> bool: + if (docstring_end_lineno := node.body[0].end_lineno) is None: + return False + return bool( + docstring_end_lineno < len(lines) + and lines[docstring_end_lineno].strip() + ) + + +def is_node_with_lineno(node: ast.AST) -> TypeGuard[ast.stmt | ast.expr]: + """Only ast.stmt and ast.expr types guaranteed to have lineno attribute.""" + + return isinstance(node, (ast.stmt, ast.expr)) + + +@export +def no_type_ignore_comment(node: ast.AST, lines: list[str]) -> bool: + if not is_node_with_lineno(node): + return False + return "# type: ignore" not in lines[node.lineno - 1] + + +@export +def callables_with_return_values(node: ast.FunctionDef, *args: tuple) -> bool: + return any( + isinstance(subnode, ast.Return) and subnode.value is not None + for subnode in ast.walk(node) + ) + + +@export +def no_return_type_annotation(node: ast.FunctionDef, *args: tuple) -> bool: + return node.returns is None + + +# Variable filters + + +@export +def vars_that_start_with_df(node: ast.AST, starts_with: str = "df_") -> bool: + return isinstance(node, ast.Name) and node.id.startswith(starts_with) + + +@export +def has_type_prefix(node: ast.AST) -> bool: + pf = ("str_", "dict_", "list_", "int_", "float_", "bool_") + return isinstance(node, ast.Name) and any(node.id.startswith(p) for p in pf) + + +@export +def has_type_suffix(node: ast.AST) -> bool: + sf = ("_str", "_dict", "_list", "_int", "_float", "_bool") + return isinstance(node, ast.Name) and any(node.id.endswith(p) for p in sf) + + +@export +def is_generic_name(node: ast.AST) -> bool: + generic_names = {"data", "info", "result", "temp"} + return isinstance(node, ast.Name) and node.id in generic_names + + +@export +def shadows_builtin(node: ast.AST) -> bool: + builtin_names = {"list", "dict", "str", "min", "max", "id", "type", "def"} + return isinstance(node, ast.Name) and node.id in builtin_names + + +@export +def is_meaningless_name(node: ast.AST) -> bool: + meaningless = {"thing", "stuff", "xyz"} + return isinstance(node, ast.Name) and node.id in meaningless + + +@export +def has_number_in_name(node: ast.AST) -> bool: + return isinstance(node, ast.Name) and any(c.isdigit() for c in node.id) + + +@export +def is_too_long(node: ast.AST, max_length: int = 30) -> bool: + return isinstance(node, ast.Name) and len(node.id) > max_length + + +@export +def is_confusing_abbreviation(node: ast.AST) -> bool: + abbreviations = {"num", "val", "calc", "arr", "tmp", "usr", "cfg"} + return isinstance(node, ast.Name) and node.id in abbreviations + + +@export +def has_bad_plural(node: ast.AST) -> bool: + bad_plurals = {"datas", "informations", "content_stuff"} + return isinstance(node, ast.Name) and node.id in bad_plurals + + +@export +def has_mixed_case(node: ast.AST) -> bool: + if not isinstance(node, ast.Name): + return False + has_camel = any(c.isupper() for c in node.id[1:]) + has_screaming = node.id.isupper() and "_" in node.id + return has_camel or has_screaming + + +# File processing + + +def is_test_context() -> bool: + test_runners = {"test_", "unittest", "nose2", "pytest"} + path_parts = sys.argv[0].split(os.sep) + return any(runner in path for path in path_parts for runner in test_runners) + + +def read_ignore_file(path: str) -> list[str]: + with open(path, "r") as f: + return [line.strip() for line in f] + + +def filter_ignore_lines(lines: list[str]) -> list[str]: + return [line for line in lines if not line.startswith("#")] + + +def load_ignore_paths(path: str = ".inspectignore") -> list[str]: + return filter_ignore_lines(read_ignore_file(path)) + + +IGNORE_PATHS = [] if is_test_context() else load_ignore_paths() + + +def filter_dirs(dirs: list[str]) -> list[str]: + return [d for d in dirs if d not in IGNORE_PATHS] + + +def process_files(directory: str, file_processor: Callable): + for root, dirs, files in os.walk(directory): + dirs[:] = filter_dirs(dirs) + for file in (f for f in files if f.endswith(".py")): + file_processor(os.path.join(root, file)) diff --git a/admin/type_checks/foo.py b/admin/codetidy/test_file.py similarity index 53% rename from admin/type_checks/foo.py rename to admin/codetidy/test_file.py index f395f10..2baeb96 100644 --- a/admin/type_checks/foo.py +++ b/admin/codetidy/test_file.py @@ -1,3 +1,6 @@ +"""Some test functions to run the validator scripts against""" + + def a_function(a: int, b: int): return a + b @@ -6,6 +9,19 @@ def function_returns_none(): pass +def a_func_with_a_docstring_and_no_space_after(a: int, b: int) -> int: + """ + This is a docstring. + """ + return a + b + + +def a_func_with_a_docstring_and_a_newline_after(a: int, b: int) -> int: + """This is a docstring""" + + return a + b + + class Foo: def __init__(self, a: int, b: int): self.a = a diff --git a/admin/codetidy/tests/__init__.py b/admin/codetidy/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/admin/codetidy/tests/test_codetidy.py b/admin/codetidy/tests/test_codetidy.py new file mode 100644 index 0000000..4a9312e --- /dev/null +++ b/admin/codetidy/tests/test_codetidy.py @@ -0,0 +1,353 @@ +__package__ = "admin.codetidy.tests" + +import ast +import dataclasses +import sys +import unittest +from typing import Callable + +from .. import inspector + +this = sys.modules[__name__] + + +@dataclasses.dataclass +class TestCase: + filters: list[Callable] + expected: list[str] + code: str + + +func_with_docstring = TestCase( + filters=[inspector.has_docstring], + expected=["foo"], + code=''' +def foo(): + """This is a docstring.""" + pass +''', +) + +docstring_adjacent_to_code = TestCase( + filters=[inspector.has_docstring, inspector.docstring_adjacent_to_code], + expected=["foo"], + code=''' +def foo(): + """This is a docstring.""" + pass +''', +) + +class_with_docstring_adjacent_to_code = TestCase( + filters=[inspector.has_docstring, inspector.docstring_adjacent_to_code], + expected=["__init__"], + code=''' +class Foo: + def __init__(self): + """This is a docstring for __init__""" + pass +''', +) + + +class TestCaseMixin: + """Mixin for test cases that check names against patterns""" + + @property + def test_names(self) -> list[str]: + raise NotImplementedError + + @property + def filter_func(self) -> str: + raise NotImplementedError + + def check_names(self, test_key: str): + test_case = getattr(this, test_key) + + block = inspector.CodeBlock(test_case.code) + poor_names = getattr(block, self.filter_func)(test_case.filters) + + self.assertEqual(test_case.expected, poor_names) # type: ignore + + def test_all_cases(self): + if self.__class__.__name__ == "TestCaseMixin": + return + for test_name in self.test_names: + self.check_names(test_name) + + +class CodeBlockTest(unittest.TestCase): + def extract_names(self, funcs: list[ast.FunctionDef]) -> list[str]: + return [f.name for f in funcs] + + def test_fetch_functions(self): + block = inspector.CodeBlock(func_with_docstring.code) + + self.assertEqual(["foo"], self.extract_names(block.func_defs)) + + def test_fetch_functions_in_class(self): + block = inspector.CodeBlock(class_with_docstring_adjacent_to_code.code) + + self.assertEqual(["__init__"], self.extract_names(block.func_defs)) + + +class DocstringTest(TestCaseMixin, unittest.TestCase): + @property + def test_names(self) -> list[str]: + return [ + "func_with_docstring", + "docstring_adjacent_to_code", + "class_with_docstring_adjacent_to_code", + ] + + @property + def filter_func(self) -> str: + return "find_matching_functions" + + +func_with_no_return_value = TestCase( + filters=[inspector.callables_with_return_values], + expected=[], + code=""" +def foo(): + pass +""", +) + +func_with_return_value = TestCase( + filters=[inspector.callables_with_return_values], + expected=["foo"], + code=""" +def foo(): + return 1 +""", +) + +func_with_no_return_type_annotation = TestCase( + filters=[inspector.no_return_type_annotation], + expected=["foo"], + code=""" +def foo(): + pass +""", +) + +func_with_return_type_annotation = TestCase( + filters=[inspector.no_return_type_annotation], + expected=[], + code=""" +def foo() -> int: + return 1 +""", +) + +func_with_type_ignore_comment = TestCase( + filters=[inspector.no_type_ignore_comment], + expected=[], + code=""" +def foo(): # type: ignore + pass +""", +) + + +func_with_no_type_ignore_comment = TestCase( + filters=[inspector.no_type_ignore_comment], + expected=["foo"], + code=""" +def foo(): + pass +""", +) + + +class ReturnValuesTest(TestCaseMixin, unittest.TestCase): + @property + def test_names(self): + return [ + "func_with_no_return_value", + "func_with_return_value", + "func_with_no_return_type_annotation", + "func_with_return_type_annotation", + "func_with_type_ignore_comment", + "func_with_no_type_ignore_comment", + ] + + @property + def filter_func(self) -> str: + return "find_matching_functions" + + +# Type prefix/suffix patterns +db_prefix = TestCase( + filters=[inspector.vars_that_start_with_df], + expected=["df_data"], + code=""" + +def foo() -> int: + df_data = pd.DataFrame() + + """, +) + +type_prefix = TestCase( + filters=[inspector.has_type_prefix], + expected=["str_name", "list_items", "dict_config"], + code=""" + +def foo() -> int: + str_name = "foo" + list_items = [] + dict_config = {} + + """, +) + +type_suffix = TestCase( + filters=[inspector.has_type_suffix], + expected=["name_str", "items_list", "config_dict"], + code=""" + +def foo() -> int: + name_str = "foo" + items_list = [] + config_dict = {} + + """, +) + +# Generic/meaningless patterns +generic_names = TestCase( + filters=[inspector.is_generic_name], + expected=["data", "info", "result", "temp"], + code=""" + +def foo() -> int: + data = 1 + info = {} + result = [] + temp = 0 + +""", +) + +meaningless_names = TestCase( + filters=[inspector.is_meaningless_name], + expected=["thing", "stuff"], + code=""" +def foo() -> int: + foo = 1 + bar = 2 + thing = 3 + stuff = 4 +""", +) + +# Built-in conflicts +builtin_shadows = TestCase( + filters=[inspector.shadows_builtin], + expected=["list", "dict", "str", "min"], + code=""" +def foo() -> int: + list = [] + dict = {} + str = "text" + min = 0 + """, +) + +# Bad naming patterns +numbered_names = TestCase( + filters=[inspector.has_number_in_name], + expected=["item1", "string2", "data3"], + code=""" +def foo() -> int: + item1 = "first" + string2 = "second" + data3 = "third" +""", +) + +long_names = TestCase( + filters=[inspector.is_too_long], + expected=[ + "calculate_total_sum_of_all_user_transactions", + "get_normalized_customer_address_data", + ], + code=""" +def foo() -> int: + calculate_total_sum_of_all_user_transactions = 0 + get_normalized_customer_address_data = "" +""", +) + +# Poor naming conventions +abbreviated_names = TestCase( + filters=[inspector.is_confusing_abbreviation], + expected=["num", "val", "calc", "arr"], + code=""" +def foo() -> int: + num = 1 + val = 2 + calc = 3 + arr = [] +""", +) + +bad_plurals = TestCase( + filters=[inspector.has_bad_plural], + expected=["datas", "informations", "content_stuff"], + code=""" +def foo() -> int: + datas = [] + informations = {} + content_stuff = "" +""", +) + +mixed_case = TestCase( + filters=[inspector.has_mixed_case], + expected=["userId", "customerNAME", "DB_Config"], + code=""" +def foo() -> int: + userId = 1 + customerNAME = "test" + DB_Config = {} + """, +) + + +class VariableNameTest(TestCaseMixin, unittest.TestCase): + @property + def type_pattern_names(self) -> list[str]: + return ["db_prefix", "type_prefix", "type_suffix"] + + @property + def naming_pattern_names(self) -> list[str]: + return ["generic_names", "meaningless_names", "builtin_shadows"] + + @property + def bad_name_pattern_names(self) -> list[str]: + return [ + "numbered_names", + "long_names", + "abbreviated_names", + "bad_plurals", + "mixed_case", + ] + + @property + def test_names(self) -> list[str]: + return [ + *self.type_pattern_names, + *self.naming_pattern_names, + *self.bad_name_pattern_names, + ] + + @property + def filter_func(self) -> str: + return "find_matching_variables" + + +if __name__ == "__main__": + unittest.main() diff --git a/admin/codetidy/validate_docstrings.py b/admin/codetidy/validate_docstrings.py new file mode 100644 index 0000000..03ed87f --- /dev/null +++ b/admin/codetidy/validate_docstrings.py @@ -0,0 +1,30 @@ +""" +A simple script to find all functions in a Python project that have a docstring +but do not have a blank line immediately following the docstring. We can then +add the blank line manually for better readability. +""" + +__package__ = __import__("config").infer_package(__file__) # Allow rel imports + + +import click + +from . import inspector + +filters = [inspector.has_docstring, inspector.docstring_adjacent_to_code] + + +def find_functions_without_blank_line(file: str): + block = inspector.CodeBlock.from_file(file) + if callables := block.find_matching_functions(filters): + print(f"{file:<40} No blank line after docstring: {callables}") + + +@click.command() +@click.argument("path", required=True, default=".") +def main(path: str): + inspector.process_files(path, find_functions_without_blank_line) + + +if __name__ == "__main__": + main() diff --git a/admin/codetidy/validate_return_types.py b/admin/codetidy/validate_return_types.py new file mode 100644 index 0000000..a48ced4 --- /dev/null +++ b/admin/codetidy/validate_return_types.py @@ -0,0 +1,59 @@ +""" +A simple script to find all callables in a Python project that do have something +to return, however there is no return type annotation. We can then add return +type annotations to these callables manually. We specifically avoid adding "-> +None:" to functions/methods which return nothing as it clutters up the page while not adding anything helpful. + +Note: Also do a manual search for "-> None:" so you can remove them for +readability. + +It's easier than running mypy.ini and Pylance doesn't (yet) have a way to report +on missing return type annotations. +""" + +__package__ = __import__("config").infer_package(__file__) # Allow rel imports + +import re + +import click + +from . import inspector + +filters = [ + inspector.no_type_ignore_comment, + inspector.no_return_type_annotation, + inspector.callables_with_return_values, +] + + +SKIP_PATTERNS = [ + "^test_.*", + "^setUp$", + "^tearDown$", + "^root$", +] + + +def matches_skip_pattern(callable_name: str) -> bool: + return any(re.match(pattern, callable_name) for pattern in SKIP_PATTERNS) + + +def skip_callables_that_match_regex(callables: list[str]) -> list[str]: + return [c for c in callables if not matches_skip_pattern(c)] + + +def find_missing_return_types(file: str): + block = inspector.CodeBlock.from_file(file) + callables = block.find_matching_functions(filters) + if filtered_callables := skip_callables_that_match_regex(callables): + print(f"{file:<40} No return type: {filtered_callables}") + + +@click.command() +@click.argument("path", required=True, default=".") +def main(path: str): + inspector.process_files(path, find_missing_return_types) + + +if __name__ == "__main__": + main() diff --git a/admin/set_github_secret.sh b/admin/set_github_secret.sh new file mode 100755 index 0000000..234ebc8 --- /dev/null +++ b/admin/set_github_secret.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +cd .. + +if [ -f .env ]; then + export $(grep -v '^#' .env.keys | xargs) +else + echo ".env.keys file not found!" + exit 1 +fi + +gh secret set DOTENV_PRIVATE_KEY_VAULT -b"$DOTENV_PRIVATE_KEY_VAULT" diff --git a/admin/type_checks/callables_with_no_return_types.py b/admin/type_checks/callables_with_no_return_types.py deleted file mode 100644 index 0672dcc..0000000 --- a/admin/type_checks/callables_with_no_return_types.py +++ /dev/null @@ -1,95 +0,0 @@ -""" -A simple script to find all callables in a Python project that do have something -to return, however there is no return type annotation. We can then add return -type annotations to these callables manually. We specifically avoid adding "-> -None:" to functions/methods which return nothing as it clutters up the page -while not adding anything helpful. - -Note: Also do a manual search for "-> None:" so you can remove them for -readability. - -It's easier than running mypy.ini and Pylance doesn't (yet) have a way to report -on missing return type annotations. -""" - -import ast -import os -import re - -import click - - -def load_file(filename: str) -> str: - with open(filename, "r") as f: - return f.read() - - -def callables_with_no_return_values(node: ast.FunctionDef) -> bool: - """Check if a function/method has a return statement that is not None.""" - - return any( - isinstance(subnode, ast.Return) and subnode.value is not None - for subnode in ast.walk(node) - ) - - -def no_return_type_annotation(node: ast.FunctionDef) -> bool: - """Return True if function/method has no return type annotation""" - - return node.returns is None - - -def get_callables_without_return_type(tree: ast.AST) -> list[str]: - return [ - node.name - for node in ast.walk(tree) - if isinstance(node, ast.FunctionDef) # Both functions and class methods - and no_return_type_annotation(node) - and callables_with_no_return_values(node) - ] - - -SKIP_PATTERNS = [ - "^test_.*", - "^setUp$", - "^tearDown$", - "^root$", -] - - -def skip_callables_that_match_regex(callables: list[str]) -> list[str]: - return [ - c for c in callables if not any(re.match(r, c) for r in SKIP_PATTERNS) - ] - - -def find_missing_return_types(file: str): - tree = ast.parse(load_file(file)) - callables = get_callables_without_return_type(tree) - if filtered_callables := skip_callables_that_match_regex(callables): - print(f"{file:<40} Missing return types: {filtered_callables}") - - -IGNORE_PATHS = ["venv", ".venv", "type_checks"] - - -def filter_dirs(dirs: list[str]) -> list[str]: - return [d for d in dirs if d not in IGNORE_PATHS] - - -def process_files(directory: str): - for root, dirs, files in os.walk(directory): - dirs[:] = filter_dirs(dirs) - for file in files: - if file.endswith(".py"): - find_missing_return_types(os.path.join(root, file)) - - -@click.command() -@click.argument("path", required=True, default=".") -def main(path: str): - process_files(path) - - -if __name__ == "__main__": - main()