diff --git a/config/seo.php b/config/seo.php index b59c94d..f900dc9 100644 --- a/config/seo.php +++ b/config/seo.php @@ -188,5 +188,11 @@ 'broken_link_check' => [ // Add status codes that should be considered as broken links. Empty array means all status codes starting with a 4, 5 or 0. 'status_codes' => [], + + // If you wish to skip running some checks on some paths, list the paths + // in the array below. You can use wildcards. + 'exclude_paths' => [ + // + ], ], ]; diff --git a/src/Checks/Content/BrokenLinkCheck.php b/src/Checks/Content/BrokenLinkCheck.php index a3da9e7..cf7e605 100644 --- a/src/Checks/Content/BrokenLinkCheck.php +++ b/src/Checks/Content/BrokenLinkCheck.php @@ -53,15 +53,7 @@ public function validateContent(Crawler $crawler): bool $content = collect($content)->filter(fn ($value) => $value !== null) ->map(fn ($link) => addBaseIfRelativeUrl($link, $this->url)) ->filter(function ($link) { - // Filter out all links that are mailto or tel - if (preg_match('/^mailto:/msi', $link) || - preg_match('/^tel:/msi', $link) || - filter_var($link, FILTER_VALIDATE_URL) === false - ) { - return false; - } - - return $link; + return $this->isValidLink($link) && ! $this->isExcludedLink($link); }) ->filter(function ($link) { return isBrokenLink($link) ? $link : false; @@ -89,4 +81,38 @@ public function validateContent(Crawler $crawler): bool return true; } + + private function isValidLink($link): bool + { + return ! preg_match('/^mailto:/msi', $link) && + ! preg_match('/^tel:/msi', $link) && + filter_var($link, FILTER_VALIDATE_URL) !== false; + } + + private function isExcludedLink($link): bool + { + $excludedPaths = config('seo.broken_link_check.exclude_paths'); + if (empty($excludedPaths)) { + return false; + } + + foreach ($excludedPaths as $path) { + if ($this->linkMatchesPath($link, $path)) { + return true; + } + } + + return false; + } + + private function linkMatchesPath($link, $path): bool + { + if (str_contains($path, '*')) { + $path = str_replace('/*', '', $path); + + return str_starts_with($link, $path); + } + + return str_contains($link, $path); + } } diff --git a/tests/Checks/Content/BrokenLinkCheckTest.php b/tests/Checks/Content/BrokenLinkCheckTest.php index f542c0a..c11b566 100644 --- a/tests/Checks/Content/BrokenLinkCheckTest.php +++ b/tests/Checks/Content/BrokenLinkCheckTest.php @@ -106,3 +106,18 @@ $this->assertTrue($check->check(Http::get('vormkracht10.nl/admin/dashboard'), $crawler)); }); + +it('can exclude certain paths from the broken link check', function () { + $check = new BrokenLinkCheck(); + $crawler = new Crawler(); + + config(['seo.broken_link_check.exclude_paths' => ['https://vormkracht10.nl/excluded']]); + + Http::fake([ + 'vormkracht10.nl' => Http::response('Excluded Link', 200), + ]); + + $crawler->addHtmlContent(Http::get('vormkracht10.nl')->body()); + + $this->assertTrue($check->check(Http::get('vormkracht10.nl'), $crawler)); +});