Skip to content

Commit

Permalink
feat(script): Verify and rollback doc_url for gone checkers (e.g., …
Browse files Browse the repository at this point in the history
…dealpha)

The checker label configuration at `/config/labels/analyzers` most often
contains a `doc_url` entry which points to the documentation URL of the
checker, as shown in the UI.
When the user clicks this, the browser redirects them to this page,
however, these external links are very susceptible to link rot,
especially when analysers entirely decomission checkers (e.g.,
`clang-tidy/cert-dcl21-cpp`) or checkers change name during a
dealphafication (e.g., `alpha.cplusplus.EnumCastOutOfRange` ->
`optin.core.EnumCastOutOfRange`).
In these cases, older analysis results stored with the older (or still
extant) check will have a `doc_url` that points to nowhere in the
upstream.
In addition, there were several identified cases where the links were
recognised as broken (both by this tool and by an actual browser) but
the checker was still extant, simply because of a typo:
`cplusplus.PlacementNew`, `#wdeprecated-deprecated-coroutine` (instead
of `#wdeprecated-coroutine`), `#wclang-diagnostic-unsafe-buffer-usage`
(instead of `#wunsafe-buffer-usage`).

This patch adds an opt-in, developer-only tool under `/scripts/labels`,
which automatically checks (by the way of HTTP requests and HTML DOM
scraping) whether the existing URLs still point to alive links, and
reports this status.
If there is analyser-specific additional knowledge (e.g., ClangSA and
Clang-Tidy is implemented as such as of now), it uses additional
heuristics (most of which is available through reusable library
components for future development!) to figure out a fixed version of the
`doc_url` by normalising `#anchors` to fix typos, and looking up earlier
releases in which the checked under verification was still extant.
  • Loading branch information
whisperity committed Apr 18, 2024
1 parent 9a85f26 commit c9e798c
Show file tree
Hide file tree
Showing 27 changed files with 2,425 additions and 0 deletions.
1 change: 1 addition & 0 deletions scripts/labels/compiler_warnings.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# FIXME: Subsume into the newer label_tool package.
import argparse
import json
import urllib3
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# FIXME: Subsume into the newer label_tool/doc_url package!
import argparse
import json
import sys
Expand Down
29 changes: 29 additions & 0 deletions scripts/labels/label_tool/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# -------------------------------------------------------------------------
#
# Part of the CodeChecker project, under the Apache License v2.0 with
# LLVM Exceptions. See LICENSE for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# -------------------------------------------------------------------------
"""
This library ships reusable components and user-facing tools to verify,
generate, and adapt the checker labels in the CodeChecker configuration
structure.
"""
from . import \
checker_labels, \
http_, \
multiprocess, \
output, \
transformer, \
util


__all__ = [
"checker_labels",
"http_",
"multiprocess",
"output",
"transformer",
"util",
]
68 changes: 68 additions & 0 deletions scripts/labels/label_tool/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env python3
# -------------------------------------------------------------------------
#
# Part of the CodeChecker project, under the Apache License v2.0 with
# LLVM Exceptions. See LICENSE for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# -------------------------------------------------------------------------
"""Dispatching to the top-level tools implemented in the package."""
import argparse
import sys

try:
from .doc_url.verify_tool import __main__ as doc_url_verify
except ModuleNotFoundError as e:
import traceback
traceback.print_exc()

print("\nFATAL: Failed to import some required modules! "
"Please make sure you also install the contents of the "
"'requirements.txt' of this tool into your virtualenv:\n"
"\tpip install -r scripts/requirements.txt",
file=sys.stderr)
sys.exit(1)


def args() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog=__package__,
description="""
Tooling related to creating, managing, verifying, and updating the checker
labels in a CodeChecker config directory.
This main script is the union of several independent tools using a common
internal library.
""",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
subparsers = parser.add_subparsers(
title="subcommands",
description="Please select a subcommand to continue.",
dest="subcommand",
required=True)

def add_subparser(command: str, package):
subparser = subparsers.add_parser(
command,
prog=package.__package__,
help=package.short_help,
description=package.description,
epilog=package.epilogue,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
subparser = package.args(subparser)
subparser.set_defaults(__main=package.main)

add_subparser("doc_url_verify", doc_url_verify)

return parser


if __name__ == "__main__":
def _main():
_args = args().parse_args()
del _args.__dict__["subcommand"]

main = _args.__dict__["__main"]
del _args.__dict__["__main"]

sys.exit(main(_args) or 0)
_main()
137 changes: 137 additions & 0 deletions scripts/labels/label_tool/checker_labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# -------------------------------------------------------------------------
#
# Part of the CodeChecker project, under the Apache License v2.0 with
# LLVM Exceptions. See LICENSE for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# -------------------------------------------------------------------------
"""Provides I/O with the configuration files that describe checker labels."""
from collections import deque
import json
import pathlib
from typing import Dict, List, Optional, cast

from .output import Settings as OutputSettings, error, trace


_ConfigFileLabels = Dict[str, List[str]]

SingleLabels = Dict[str, Optional[str]]
Labels = Dict[str, Dict[str, str]]


def _load_json(path: pathlib.Path) -> Dict:
try:
with path.open("r") as file:
return json.load(file)
except OSError:
import traceback
traceback.print_exc()

error("Failed to open label config file '%s'", path)
raise
except json.JSONDecodeError:
import traceback
traceback.print_exc()

error("Failed to parse label config file '%s'", path)
raise


def _save_json(path: pathlib.Path, data: Dict):
try:
with path.open("w") as file:
json.dump(data, file, indent=2)
file.write('\n')
except OSError:
import traceback
traceback.print_exc()

error("Failed to write label config file '%s'", path)
raise
except (TypeError, ValueError):
import traceback
traceback.print_exc()

error("Failed to encode label config file '%s'", path)
raise


class MultipleLabelsError(Exception):
"""
Raised by `get_checker_labels` if multiple labels exist for the same key.
"""

def __init__(self, key):
super().__init__("Multiple labels with key: %s", key)
self.key = key


def get_checker_labels(analyser: str, path: pathlib.Path, key: str) \
-> SingleLabels:
"""
Loads and filters the checker config label file available at `path`
for the `key` label. Raises `MultipleLabelsError` if there is at least
two labels with the same `key`.
"""
try:
label_cfg = cast(_ConfigFileLabels, _load_json(path)["labels"])
except KeyError:
error("'%s' is not a label config file", path)
raise

prefix = f"{key}:"
filtered_labels = {
checker: [label.replace(prefix, '', 1)
for label in labels if label.startswith(prefix)]
for checker, labels in label_cfg.items()}
if OutputSettings.trace():
deque((trace("No '%s:' label found for '%s/%s'",
key, analyser, checker)
for checker, labels in filtered_labels.items()
if not labels), maxlen=0)

if any(len(labels) > 1 for labels in filtered_labels.values()):
raise MultipleLabelsError(key)
return {checker: labels[0] if labels else None
for checker, labels in filtered_labels.items()}


def update_checker_labels(analyser: str,
path: pathlib.Path,
key: str,
updates: SingleLabels):
"""
Loads a checker config label file available at `path` and updates the
`key` labels based on the `updates` structure, overwriting or adding the
existing label (or raising `MultipleLabelsError` if it is not unique which
one to overwrite), then writes the resulting data structure back to `path`.
"""
try:
config = _load_json(path)
label_cfg = cast(_ConfigFileLabels, config["labels"])
except KeyError:
error("'%s's '%s' is not a label config file", analyser, path)
raise

prefix = f"{key}:"
label_indices = {
checker: [index for index, label in enumerate(labels)
if label.startswith(prefix)]
for checker, labels in label_cfg.items()
}

if any(len(indices) > 1 for indices in label_indices.values()):
raise MultipleLabelsError(key)
label_indices = {checker: indices[0] if len(indices) == 1 else None
for checker, indices in label_indices.items()}
for checker, new_label in updates.items():
checker_labels = label_cfg[checker]
idx = label_indices[checker]
e = f"{key}:{new_label}"
if idx is not None:
checker_labels[idx] = e
else:
checker_labels.insert(0, e)

_save_json(path, config)
20 changes: 20 additions & 0 deletions scripts/labels/label_tool/doc_url/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -------------------------------------------------------------------------
#
# Part of the CodeChecker project, under the Apache License v2.0 with
# LLVM Exceptions. See LICENSE for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# -------------------------------------------------------------------------
"""
Verifies and generates fixed ``doc_url`` labels for checkers in the
configuration.
"""
from . import \
output, \
verifiers


__all__ = [
"output",
"verifiers",
]
42 changes: 42 additions & 0 deletions scripts/labels/label_tool/doc_url/output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# -------------------------------------------------------------------------
#
# Part of the CodeChecker project, under the Apache License v2.0 with
# LLVM Exceptions. See LICENSE for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# -------------------------------------------------------------------------
"""Tool-level output settings."""
from ..util import _Singleton


class Settings(_Singleton):
"""Tool-level output settings."""

def __init__(self):
"""Returns the instance that was loaded as a `_Singleton`."""
if "_report_missing" not in self.__dict__:
self._report_missing: bool = False
if "_report_ok" not in self.__dict__:
self._report_ok: bool = False

@staticmethod
def factory():
"""Initialises the `_Singleton`."""
o = Settings()
return o

@staticmethod
def report_missing() -> bool:
return Settings.factory()._report_missing # type: ignore

@staticmethod
def set_report_missing(v: bool):
Settings.factory()._report_missing = v # type: ignore

@staticmethod
def report_ok() -> bool:
return Settings.factory()._report_ok # type: ignore

@staticmethod
def set_report_ok(v: bool):
Settings.factory()._report_ok = v # type: ignore
24 changes: 24 additions & 0 deletions scripts/labels/label_tool/doc_url/verifiers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# -------------------------------------------------------------------------
#
# Part of the CodeChecker project, under the Apache License v2.0 with
# LLVM Exceptions. See LICENSE for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# -------------------------------------------------------------------------
"""
Implements the logic for generic and analyser-specific verification and
translation of documentation URLs.
"""
from .analyser_selection import select_verifier
from .generic import Outcome, \
HTTPStatusCodeVerifier, HTMLAnchorVerifier
from .status import Status


__all__ = [
"select_verifier",
"Outcome",
"HTTPStatusCodeVerifier",
"HTMLAnchorVerifier",
"Status",
]
61 changes: 61 additions & 0 deletions scripts/labels/label_tool/doc_url/verifiers/analyser_selection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# -------------------------------------------------------------------------
#
# Part of the CodeChecker project, under the Apache License v2.0 with
# LLVM Exceptions. See LICENSE for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# -------------------------------------------------------------------------
"""
Selects the appropriate verification engine for the analyser configuration.
"""
from collections import defaultdict
from typing import Dict, Iterable, Tuple, Type, Union

from ...checker_labels import SingleLabels

from .generic import HTMLAnchorVerifier, HTTPStatusCodeVerifier

from .clang_diagnostic import ClangDiagnosticVerifier
from .clang_tidy import ClangTidyVerifier
from .clangsa import ClangSAVerifier


class _Generic:
"""
Tag type that decides between the raw `HTTPStatusCodeVerifier` for direct
links and the `HTMLAnchorVerifier` for single-page multi-section links.
"""

@staticmethod
def select(labels: SingleLabels) -> Type:
return HTMLAnchorVerifier if any('#' in label
for label in labels.values()
if label) \
else HTTPStatusCodeVerifier


AnalyserVerifiers: Dict[str, Union[Type, Tuple[Type, ...]]] = defaultdict(
lambda: _Generic,
{
"clangsa": ClangSAVerifier,
"clang-tidy": (ClangTidyVerifier, ClangDiagnosticVerifier,),
}
)


def select_verifier(analyser: str, labels: SingleLabels) -> Iterable[Type]:
"""
Dispatches the `analyser` to one of the verifier classes and returns
which class(es) should be used for the verification.
"""
verifiers = AnalyserVerifiers[analyser]
if not verifiers:
return iter(())
if not isinstance(verifiers, tuple):
verifiers = (verifiers,)

if verifiers[0] is _Generic:
verifiers = (_Generic.select(labels),)
AnalyserVerifiers[analyser] = verifiers[0]

return iter(verifiers)

0 comments on commit c9e798c

Please sign in to comment.