-
Notifications
You must be signed in to change notification settings - Fork 352
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(script): Verify and rollback
doc_url
for gone checkers (e.g., …
…dealpha) The checker label configuration at `/config/labels/analyzers` most often contains a `doc_url` entry which points to the documentation URL of the checker, as shown in the UI. When the user clicks this, the browser redirects them to this page, however, these external links are very susceptible to link rot, especially when analysers entirely decomission checkers (e.g., `clang-tidy/cert-dcl21-cpp`) or checkers change name during a dealphafication (e.g., `alpha.cplusplus.EnumCastOutOfRange` -> `optin.core.EnumCastOutOfRange`). In these cases, older analysis results stored with the older (or still extant) check will have a `doc_url` that points to nowhere in the upstream. In addition, there were several identified cases where the links were recognised as broken (both by this tool and by an actual browser) but the checker was still extant, simply because of a typo: `cplusplus.PlacementNew`, `#wdeprecated-deprecated-coroutine` (instead of `#wdeprecated-coroutine`), `#wclang-diagnostic-unsafe-buffer-usage` (instead of `#wunsafe-buffer-usage`). This patch adds an opt-in, developer-only tool under `/scripts/labels`, which automatically checks (by the way of HTTP requests and HTML DOM scraping) whether the existing URLs still point to alive links, and reports this status. If there is analyser-specific additional knowledge (e.g., ClangSA and Clang-Tidy is implemented as such as of now), it uses additional heuristics (most of which is available through reusable library components for future development!) to figure out a fixed version of the `doc_url` by normalising `#anchors` to fix typos, and looking up earlier releases in which the checked under verification was still extant.
- Loading branch information
1 parent
9a85f26
commit c9e798c
Showing
27 changed files
with
2,425 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
# FIXME: Subsume into the newer label_tool package. | ||
import argparse | ||
import json | ||
import urllib3 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
# FIXME: Subsume into the newer label_tool/doc_url package! | ||
import argparse | ||
import json | ||
import sys | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# ------------------------------------------------------------------------- | ||
# | ||
# Part of the CodeChecker project, under the Apache License v2.0 with | ||
# LLVM Exceptions. See LICENSE for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# ------------------------------------------------------------------------- | ||
""" | ||
This library ships reusable components and user-facing tools to verify, | ||
generate, and adapt the checker labels in the CodeChecker configuration | ||
structure. | ||
""" | ||
from . import \ | ||
checker_labels, \ | ||
http_, \ | ||
multiprocess, \ | ||
output, \ | ||
transformer, \ | ||
util | ||
|
||
|
||
__all__ = [ | ||
"checker_labels", | ||
"http_", | ||
"multiprocess", | ||
"output", | ||
"transformer", | ||
"util", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#!/usr/bin/env python3 | ||
# ------------------------------------------------------------------------- | ||
# | ||
# Part of the CodeChecker project, under the Apache License v2.0 with | ||
# LLVM Exceptions. See LICENSE for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# ------------------------------------------------------------------------- | ||
"""Dispatching to the top-level tools implemented in the package.""" | ||
import argparse | ||
import sys | ||
|
||
try: | ||
from .doc_url.verify_tool import __main__ as doc_url_verify | ||
except ModuleNotFoundError as e: | ||
import traceback | ||
traceback.print_exc() | ||
|
||
print("\nFATAL: Failed to import some required modules! " | ||
"Please make sure you also install the contents of the " | ||
"'requirements.txt' of this tool into your virtualenv:\n" | ||
"\tpip install -r scripts/requirements.txt", | ||
file=sys.stderr) | ||
sys.exit(1) | ||
|
||
|
||
def args() -> argparse.ArgumentParser: | ||
parser = argparse.ArgumentParser( | ||
prog=__package__, | ||
description=""" | ||
Tooling related to creating, managing, verifying, and updating the checker | ||
labels in a CodeChecker config directory. | ||
This main script is the union of several independent tools using a common | ||
internal library. | ||
""", | ||
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | ||
subparsers = parser.add_subparsers( | ||
title="subcommands", | ||
description="Please select a subcommand to continue.", | ||
dest="subcommand", | ||
required=True) | ||
|
||
def add_subparser(command: str, package): | ||
subparser = subparsers.add_parser( | ||
command, | ||
prog=package.__package__, | ||
help=package.short_help, | ||
description=package.description, | ||
epilog=package.epilogue, | ||
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | ||
subparser = package.args(subparser) | ||
subparser.set_defaults(__main=package.main) | ||
|
||
add_subparser("doc_url_verify", doc_url_verify) | ||
|
||
return parser | ||
|
||
|
||
if __name__ == "__main__": | ||
def _main(): | ||
_args = args().parse_args() | ||
del _args.__dict__["subcommand"] | ||
|
||
main = _args.__dict__["__main"] | ||
del _args.__dict__["__main"] | ||
|
||
sys.exit(main(_args) or 0) | ||
_main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
# ------------------------------------------------------------------------- | ||
# | ||
# Part of the CodeChecker project, under the Apache License v2.0 with | ||
# LLVM Exceptions. See LICENSE for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# ------------------------------------------------------------------------- | ||
"""Provides I/O with the configuration files that describe checker labels.""" | ||
from collections import deque | ||
import json | ||
import pathlib | ||
from typing import Dict, List, Optional, cast | ||
|
||
from .output import Settings as OutputSettings, error, trace | ||
|
||
|
||
_ConfigFileLabels = Dict[str, List[str]] | ||
|
||
SingleLabels = Dict[str, Optional[str]] | ||
Labels = Dict[str, Dict[str, str]] | ||
|
||
|
||
def _load_json(path: pathlib.Path) -> Dict: | ||
try: | ||
with path.open("r") as file: | ||
return json.load(file) | ||
except OSError: | ||
import traceback | ||
traceback.print_exc() | ||
|
||
error("Failed to open label config file '%s'", path) | ||
raise | ||
except json.JSONDecodeError: | ||
import traceback | ||
traceback.print_exc() | ||
|
||
error("Failed to parse label config file '%s'", path) | ||
raise | ||
|
||
|
||
def _save_json(path: pathlib.Path, data: Dict): | ||
try: | ||
with path.open("w") as file: | ||
json.dump(data, file, indent=2) | ||
file.write('\n') | ||
except OSError: | ||
import traceback | ||
traceback.print_exc() | ||
|
||
error("Failed to write label config file '%s'", path) | ||
raise | ||
except (TypeError, ValueError): | ||
import traceback | ||
traceback.print_exc() | ||
|
||
error("Failed to encode label config file '%s'", path) | ||
raise | ||
|
||
|
||
class MultipleLabelsError(Exception): | ||
""" | ||
Raised by `get_checker_labels` if multiple labels exist for the same key. | ||
""" | ||
|
||
def __init__(self, key): | ||
super().__init__("Multiple labels with key: %s", key) | ||
self.key = key | ||
|
||
|
||
def get_checker_labels(analyser: str, path: pathlib.Path, key: str) \ | ||
-> SingleLabels: | ||
""" | ||
Loads and filters the checker config label file available at `path` | ||
for the `key` label. Raises `MultipleLabelsError` if there is at least | ||
two labels with the same `key`. | ||
""" | ||
try: | ||
label_cfg = cast(_ConfigFileLabels, _load_json(path)["labels"]) | ||
except KeyError: | ||
error("'%s' is not a label config file", path) | ||
raise | ||
|
||
prefix = f"{key}:" | ||
filtered_labels = { | ||
checker: [label.replace(prefix, '', 1) | ||
for label in labels if label.startswith(prefix)] | ||
for checker, labels in label_cfg.items()} | ||
if OutputSettings.trace(): | ||
deque((trace("No '%s:' label found for '%s/%s'", | ||
key, analyser, checker) | ||
for checker, labels in filtered_labels.items() | ||
if not labels), maxlen=0) | ||
|
||
if any(len(labels) > 1 for labels in filtered_labels.values()): | ||
raise MultipleLabelsError(key) | ||
return {checker: labels[0] if labels else None | ||
for checker, labels in filtered_labels.items()} | ||
|
||
|
||
def update_checker_labels(analyser: str, | ||
path: pathlib.Path, | ||
key: str, | ||
updates: SingleLabels): | ||
""" | ||
Loads a checker config label file available at `path` and updates the | ||
`key` labels based on the `updates` structure, overwriting or adding the | ||
existing label (or raising `MultipleLabelsError` if it is not unique which | ||
one to overwrite), then writes the resulting data structure back to `path`. | ||
""" | ||
try: | ||
config = _load_json(path) | ||
label_cfg = cast(_ConfigFileLabels, config["labels"]) | ||
except KeyError: | ||
error("'%s's '%s' is not a label config file", analyser, path) | ||
raise | ||
|
||
prefix = f"{key}:" | ||
label_indices = { | ||
checker: [index for index, label in enumerate(labels) | ||
if label.startswith(prefix)] | ||
for checker, labels in label_cfg.items() | ||
} | ||
|
||
if any(len(indices) > 1 for indices in label_indices.values()): | ||
raise MultipleLabelsError(key) | ||
label_indices = {checker: indices[0] if len(indices) == 1 else None | ||
for checker, indices in label_indices.items()} | ||
for checker, new_label in updates.items(): | ||
checker_labels = label_cfg[checker] | ||
idx = label_indices[checker] | ||
e = f"{key}:{new_label}" | ||
if idx is not None: | ||
checker_labels[idx] = e | ||
else: | ||
checker_labels.insert(0, e) | ||
|
||
_save_json(path, config) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# ------------------------------------------------------------------------- | ||
# | ||
# Part of the CodeChecker project, under the Apache License v2.0 with | ||
# LLVM Exceptions. See LICENSE for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# ------------------------------------------------------------------------- | ||
""" | ||
Verifies and generates fixed ``doc_url`` labels for checkers in the | ||
configuration. | ||
""" | ||
from . import \ | ||
output, \ | ||
verifiers | ||
|
||
|
||
__all__ = [ | ||
"output", | ||
"verifiers", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# ------------------------------------------------------------------------- | ||
# | ||
# Part of the CodeChecker project, under the Apache License v2.0 with | ||
# LLVM Exceptions. See LICENSE for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# ------------------------------------------------------------------------- | ||
"""Tool-level output settings.""" | ||
from ..util import _Singleton | ||
|
||
|
||
class Settings(_Singleton): | ||
"""Tool-level output settings.""" | ||
|
||
def __init__(self): | ||
"""Returns the instance that was loaded as a `_Singleton`.""" | ||
if "_report_missing" not in self.__dict__: | ||
self._report_missing: bool = False | ||
if "_report_ok" not in self.__dict__: | ||
self._report_ok: bool = False | ||
|
||
@staticmethod | ||
def factory(): | ||
"""Initialises the `_Singleton`.""" | ||
o = Settings() | ||
return o | ||
|
||
@staticmethod | ||
def report_missing() -> bool: | ||
return Settings.factory()._report_missing # type: ignore | ||
|
||
@staticmethod | ||
def set_report_missing(v: bool): | ||
Settings.factory()._report_missing = v # type: ignore | ||
|
||
@staticmethod | ||
def report_ok() -> bool: | ||
return Settings.factory()._report_ok # type: ignore | ||
|
||
@staticmethod | ||
def set_report_ok(v: bool): | ||
Settings.factory()._report_ok = v # type: ignore |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# ------------------------------------------------------------------------- | ||
# | ||
# Part of the CodeChecker project, under the Apache License v2.0 with | ||
# LLVM Exceptions. See LICENSE for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# ------------------------------------------------------------------------- | ||
""" | ||
Implements the logic for generic and analyser-specific verification and | ||
translation of documentation URLs. | ||
""" | ||
from .analyser_selection import select_verifier | ||
from .generic import Outcome, \ | ||
HTTPStatusCodeVerifier, HTMLAnchorVerifier | ||
from .status import Status | ||
|
||
|
||
__all__ = [ | ||
"select_verifier", | ||
"Outcome", | ||
"HTTPStatusCodeVerifier", | ||
"HTMLAnchorVerifier", | ||
"Status", | ||
] |
61 changes: 61 additions & 0 deletions
61
scripts/labels/label_tool/doc_url/verifiers/analyser_selection.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# ------------------------------------------------------------------------- | ||
# | ||
# Part of the CodeChecker project, under the Apache License v2.0 with | ||
# LLVM Exceptions. See LICENSE for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# ------------------------------------------------------------------------- | ||
""" | ||
Selects the appropriate verification engine for the analyser configuration. | ||
""" | ||
from collections import defaultdict | ||
from typing import Dict, Iterable, Tuple, Type, Union | ||
|
||
from ...checker_labels import SingleLabels | ||
|
||
from .generic import HTMLAnchorVerifier, HTTPStatusCodeVerifier | ||
|
||
from .clang_diagnostic import ClangDiagnosticVerifier | ||
from .clang_tidy import ClangTidyVerifier | ||
from .clangsa import ClangSAVerifier | ||
|
||
|
||
class _Generic: | ||
""" | ||
Tag type that decides between the raw `HTTPStatusCodeVerifier` for direct | ||
links and the `HTMLAnchorVerifier` for single-page multi-section links. | ||
""" | ||
|
||
@staticmethod | ||
def select(labels: SingleLabels) -> Type: | ||
return HTMLAnchorVerifier if any('#' in label | ||
for label in labels.values() | ||
if label) \ | ||
else HTTPStatusCodeVerifier | ||
|
||
|
||
AnalyserVerifiers: Dict[str, Union[Type, Tuple[Type, ...]]] = defaultdict( | ||
lambda: _Generic, | ||
{ | ||
"clangsa": ClangSAVerifier, | ||
"clang-tidy": (ClangTidyVerifier, ClangDiagnosticVerifier,), | ||
} | ||
) | ||
|
||
|
||
def select_verifier(analyser: str, labels: SingleLabels) -> Iterable[Type]: | ||
""" | ||
Dispatches the `analyser` to one of the verifier classes and returns | ||
which class(es) should be used for the verification. | ||
""" | ||
verifiers = AnalyserVerifiers[analyser] | ||
if not verifiers: | ||
return iter(()) | ||
if not isinstance(verifiers, tuple): | ||
verifiers = (verifiers,) | ||
|
||
if verifiers[0] is _Generic: | ||
verifiers = (_Generic.select(labels),) | ||
AnalyserVerifiers[analyser] = verifiers[0] | ||
|
||
return iter(verifiers) |
Oops, something went wrong.