From 889178e494e7ef44f1c69b0dee5de19d59cc3f9a Mon Sep 17 00:00:00 2001 From: Bas van Dinther Date: Thu, 17 Aug 2023 15:21:46 +0200 Subject: [PATCH] Too long sentences check (#38) * wip * Improvements * Fix styling * Update README * Add pregmatch to make exploding sentences more accurate * Fix styling * Add tests * wip * Fix styling * If more than 20% of the sentences is too long, fail check * Update README * Fix styling * Improve TooLongSentenceCheck and make Action trait * Fix styling * Fix test * Improve information * Fix counting bug, should never be 0 * Update README --------- Co-authored-by: Baspa --- README.md | 160 +------------- resources/lang/en.json | 144 ++++++------- resources/lang/nl.json | 195 +++++++++--------- src/Checks/Content/TooLongSentenceCheck.php | 87 ++++++++ .../Content/TransitionWordRatioCheck.php | 29 +-- src/Traits/Actions.php | 31 +++ .../Content/TooLongSentenceCheckTest.php | 75 +++++++ 7 files changed, 373 insertions(+), 348 deletions(-) create mode 100644 src/Checks/Content/TooLongSentenceCheck.php create mode 100644 src/Traits/Actions.php create mode 100644 tests/Checks/Content/TooLongSentenceCheckTest.php diff --git a/README.md b/README.md index 8aef06c5..04f44b90 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ ## Introduction -This package is your guidance to get a better SEO score on search engines. Laravel SEO Scanner scans your code and crawls the routes from your app. The package has 23 checks that will check on performance, configurations, use of meta tags and content quality. +This package is your guidance to get a better SEO score on search engines. Laravel SEO Scanner scans your code and crawls the routes from your app. The package has 24 checks that will check on performance, configurations, use of meta tags and content quality. Easily configure which routes to scan, exclude or include specific checks or even add your own checks! Completing checks will further improve the SEO score and thus increase the chance of ranking higher at the search engines. @@ -80,162 +80,7 @@ php artisan migrate php artisan vendor:publish --tag="seo-config" ``` -This will be the contents of the published config file: - -```php -return [ - /* - |-------------------------------------------------------------------------- - | Cache - |-------------------------------------------------------------------------- - | - | The following array lists the cache options for the application. - | - */ - 'cache' => [ - // Only drivers that support tags are supported. - // These are: array, memcached and redis. - 'driver' => 'array', - ], - - /* - |-------------------------------------------------------------------------- - | Check classes - |-------------------------------------------------------------------------- - | - | The following array lists the "check" classes that will be registered - | with Laravel Seo. These checks run an check on the application via - | various methods. Feel free to customize it. - | - | An example of a check class: - | \Vormkracht10\Seo\Checks\Content\BrokenLinkCheck::class - | - */ - 'checks' => ['*'], - - // If you wish to skip running some checks, list the classes in the array below. - 'exclude_checks' => [], - - /* - |-------------------------------------------------------------------------- - | Check paths - |-------------------------------------------------------------------------- - | - | The following array lists the "checks" paths that will be searched - | recursively to find check classes. This option will only be used - | if the checks option above is set to the asterisk wildcard. The - | key is the base namespace to resolve the class name. - | - */ - 'check_paths' => [ - 'Vormkracht10\\Seo\\Checks' => base_path('vendor/vormkracht10/laravel-seo-scanner-scanner/src/Checks'), - ], - - /* - |-------------------------------------------------------------------------- - | Routes - |-------------------------------------------------------------------------- - | - | The following array lists the "checkable" routes that will be registered - | with Laravel Seo. These routes will be checked for SEO. Feel free to - | customize it. To check for specific routes, use the route name. - | - | An example of a checkable route: - | 'blog.index' - | - */ - 'check_routes' => true, - 'routes' => ['*'], - - // If you wish to skip running some checks on some routes, list the routes - // in the array below by using the route name. For example: - // 'blog.index' - 'exclude_routes' => [], - - // If you wish to skip running some checks on some paths, list the paths - // in the array below. - 'exclude_paths' => [ - 'admin/*', - 'nova/*', - 'horizon/*', - 'vapor-ui/*', - ], - - /* - |-------------------------------------------------------------------------- - | Database - |-------------------------------------------------------------------------- - | - | Here you can specify database related configurations like the connection - | that will be used to save the SEO scores. When you set the save - | option to true, the SEO score will be saved to the database. - | - */ - 'database' => [ - 'connection' => 'mysql', - 'save' => true, - 'prune' => [ - 'older_than_days' => 30, - ] - ], - - /* - |-------------------------------------------------------------------------- - | Models - |-------------------------------------------------------------------------- - | - | Here you can specify which models you want to check. When you specify a - | model, the SEO score will be saved to the database. This way you can - | check the SEO score of a specific page. - | - | An example of a model: - | \App\Models\BlogPost::class - | - */ - 'models' => [], - - 'http' => [ - /* - |-------------------------------------------------------------------------- - | Http client options - |-------------------------------------------------------------------------- - | - | Here you can specify the options of the http client. For example, in a - | local development environment you may want to disable the SSL - | certificate integrity check. - | - | An example of a http option: - | 'verify' => false - | - */ - 'options' => [], - - /* - |-------------------------------------------------------------------------- - | Http headers - |-------------------------------------------------------------------------- - | - | Here you can specify custom headers of the http client. - | - */ - 'headers' => [ - 'User-Agent' => 'Laravel SEO Scanner/1.0', - ], - ], - - /* - |-------------------------------------------------------------------------- - | Javascript rendering - |-------------------------------------------------------------------------- - | - | If your website uses javascript to render the content, you can enable - | javascript rendering. This will use a headless browser to render - | the content. - | - */ - 'javascript' => false, -]; -``` +Click here to see the [config file](https://github.com/vormkracht10/laravel-seo-scanner/blob/too-long-sentences-check/config/seo.php). ## Available checks @@ -255,6 +100,7 @@ These checks are available in the package. You can add or remove checks in the c ✅ The page contains no broken links.
✅ The page contains no broken images.
✅ Length of the content is at least 2100 characters.
+✅ No more than 20% of the content contains too long sentences (more than 20 words).
✅ A minimum of 30% of the sentences contain a transition word or phrase.
> Note: To change the locale of the transition words, you can publish the config file and change the locale in the config file. The default locale is `null` which uses the language of your `app` config. If set to `nl` or `en`, the transition words will be in Dutch or English. If you want to add more locales, you can create a pull request. diff --git a/resources/lang/en.json b/resources/lang/en.json index b27fb050..b659679d 100644 --- a/resources/lang/en.json +++ b/resources/lang/en.json @@ -1,4 +1,26 @@ { + "accordingly": "accordingly", + "additionally": "additionally", + "admittedly": "admittedly", + "after": "after", + "afterward": "afterward", + "all in all": "all in all", + "although": "although", + "an example of this is": "an example of this is", + "as a result": "as a result", + "as well as": "as well as", + "because": "because", + "before": "before", + "besides": "besides", + "but also": "but also", + "certainly": "certainly", + "compared to": "compared to", + "consequently": "consequently", + "conversely": "conversely", + "due to": "due to", + "during": "during", + "even though": "even though", + "eventually": "eventually", "failed.configuration.nofollow.meta": "The page contains a nofollow meta tag, while it should not.", "failed.configuration.nofollow.tag": "The page contains a nofollow tag, while it should not.", "failed.configuration.noindex.meta": "The page contains a noindex meta tag, while it should not.", @@ -15,9 +37,12 @@ "failed.content.no_heading": "The page does not contain any h1 tag, while it should.", "failed.content.no_title": "The page does not contain a title tag, while it should.", "failed.content.title_length": "The page title is :actualValue characters long. It should be max :expectedValue characters long.", + "failed.content.too_long_sentence": "The page has :actualValue too long sentences which exceeds 20% of the total sentences. Rectify :neededToFix sentences to meet the 20% limit.", + "failed.content.transition_words_ratio_check.no_phrases_found": "The page does not contain any transition words.", + "failed.content.transition_words_ratio_check.too_few_transition_words": "The page contains too few transition words. The recommended minimum is 30%, while the actual number is :actualValue%.", "failed.meta.description": "The page does not contain a description meta tag, while it should.", - "failed.meta.keyword_in_title_check": "The page title does not contain the focus keyword, while it should.", "failed.meta.keyword_in_first_paragraph_check": "The page does not contain the focus keyword in the first paragraph, while it should.", + "failed.meta.keyword_in_title_check": "The page title does not contain the focus keyword, while it should.", "failed.meta.no_lang": "The page does not contain a lang attribute, while it should.", "failed.meta.open_graph_image": "The page does not contain an open graph image, while it should.", "failed.meta.open_graph_image.broken": "The page contains a broken open graph image. This image was found: :actualValue.", @@ -31,87 +56,62 @@ "failed.performance.response": "The page returned a response code other than :expectedValue. The actual response code was :actualValue.", "failed.performance.ttfb": "The page took too long to load (max :expectedValuems). The actual time was :actualValuems.", "failed.performance.ttfb.missing_url": "We could not get the TTFB for this page.", - "failed.content.transition_words_ratio_check.too_few_transition_words": "The page contains too few transition words. The recommended minimum is 30%, while the actual number is :actualValue%.", - "failed.content.transition_words_ratio_check.no_phrases_found": "The page does not contain any transition words.", - "additionally": "additionally", - "moreover": "moreover", + "finally": "finally", + "first": "first", + "for example": "for example", + "for instance": "for instance", "furthermore": "furthermore", - "in addition": "in addition", - "not only": "not only", - "but also": "but also", - "as well as": "as well as", - "besides": "besides", - "what's more": "what's more", + "granted": "granted", "however": "however", - "nevertheless": "nevertheless", - "on the other hand": "on the other hand", + "in addition": "in addition", + "in comparison": "in comparison", + "in conclusion": "in conclusion", "in contrast": "in contrast", - "conversely": "conversely", - "although": "although", - "while": "while", - "yet": "yet", - "even though": "even though", + "in other words": "in other words", + "in particular": "in particular", + "in summary": "in summary", + "in the future": "in the future", + "in the meantime": "in the meantime", + "in the past": "in the past", + "in this case": "in this case", + "indeed": "indeed", + "just as": "just as", + "likewise": "likewise", + "meanwhile": "meanwhile", + "moreover": "moreover", + "naturally": "naturally", + "nevertheless": "nevertheless", + "next": "next", "nonetheless": "nonetheless", + "not only": "not only", + "of course": "of course", + "on the other hand": "on the other hand", + "overall": "overall", + "owing to": "owing to", + "second": "second", "similarly": "similarly", - "likewise": "likewise", - "in comparison": "in comparison", - "just as": "just as", - "compared to": "compared to", "similarly to": "similarly to", - "therefore": "therefore", - "thus": "thus", - "consequently": "consequently", - "as a result": "as a result", - "because": "because", "since": "since", "so": "so", - "due to": "due to", - "owing to": "owing to", - "accordingly": "accordingly", - "indeed": "indeed", - "certainly": "certainly", - "of course": "of course", - "undoubtedly": "undoubtedly", - "without a doubt": "without a doubt", - "naturally": "naturally", - "for example": "for example", - "for instance": "for instance", + "soon": "soon", + "specifically": "specifically", + "subsequently": "subsequently", "such as": "such as", - "to illustrate": "to illustrate", - "in particular": "in particular", - "first": "first", - "second": "second", - "third": "third", - "next": "next", + "that is to say": "that is to say", "then": "then", - "afterward": "afterward", - "finally": "finally", - "in the meantime": "in the meantime", - "subsequently": "subsequently", - "in conclusion": "in conclusion", + "therefore": "therefore", + "third": "third", + "thus": "thus", + "to clarify": "to clarify", + "to demonstrate": "to demonstrate", + "to illustrate": "to illustrate", "to sum up": "to sum up", "ultimately": "ultimately", - "in summary": "in summary", - "all in all": "all in all", - "overall": "overall", - "meanwhile": "meanwhile", - "before": "before", - "after": "after", - "during": "during", + "undoubtedly": "undoubtedly", "until": "until", - "eventually": "eventually", - "soon": "soon", - "in the past": "in the past", - "in the future": "in the future", - "in other words": "in other words", - "that is to say": "that is to say", - "specifically": "specifically", - "to clarify": "to clarify", - "in this case": "in this case", - "an example of this is": "an example of this is", - "to demonstrate": "to demonstrate", - "admittedly": "admittedly", - "granted": "granted", - "while it is true": "while it is true" -} - + "what's more": "what's more", + "while": "while", + "while it is true": "while it is true", + "without a doubt": "without a doubt", + "yet": "yet" +} \ No newline at end of file diff --git a/resources/lang/nl.json b/resources/lang/nl.json index e2ece158..40ef334c 100644 --- a/resources/lang/nl.json +++ b/resources/lang/nl.json @@ -1,114 +1,115 @@ { - "failed.configuration.nofollow.meta": "De pagina bevat een nofollow meta-tag, terwijl dat niet zou moeten.", - "failed.configuration.nofollow.tag": "De pagina bevat een nofollow-tag, terwijl dat niet zou moeten.", - "failed.configuration.noindex.meta": "De pagina bevat een noindex meta-tag, terwijl dat niet zou moeten.", - "failed.configuration.noindex.tag": "De pagina bevat een noindex-tag, terwijl dat niet zou moeten.", - "failed.configuration.robots.disallowed": "Het robots.txt-bestand voor deze pagina bevat een regel die deze pagina uitsluit.", - "failed.configuration.robots.missing_url": "We konden het robots.txt-bestand voor deze pagina niet ophalen.", - "failed.content.alt_tag": "De pagina bevat afbeeldingen zonder alt-tags of met lege alt-tags. Deze afbeeldingen zijn gevonden: :actualValue.", - "failed.content.broken_images": "De pagina bevat kapotte afbeeldingen. Deze afbeeldingen zijn gevonden: :actualValue.", - "failed.content.broken_links": "De pagina bevat kapotte links. Deze links zijn gevonden: :actualValue.", - "failed.content.length": "De inhoud is :actualValue tekens lang. Het zou minstens :expectedValue tekens lang moeten zijn.", - "failed.content.length.parse": "We konden de inhoud van deze pagina niet analyseren, probeer het opnieuw.", - "failed.content.mixed_content": "De pagina bevat links naar onveilige adressen, terwijl dat niet zou moeten. Deze links zijn gevonden: :actualValue.", - "failed.content.multiple_h1": "De pagina bevat meerdere h1-tags, terwijl dat niet zou moeten. Deze tags zijn gevonden: :actualValue.", - "failed.content.no_heading": "De pagina bevat geen enkele h1-tag, terwijl dat zou moeten.", - "failed.content.no_title": "De pagina bevat geen titel-tag, terwijl dat zou moeten.", - "failed.content.title_length": "De paginatitel is :actualValue tekens lang. Het zou maximaal :expectedValue tekens lang moeten zijn.", - "failed.meta.description": "De pagina bevat geen beschrijvende meta-tag, terwijl dat zou moeten.", - "failed.meta.no_lang": "De pagina bevat geen taalattribuut, terwijl dat zou moeten.", - "failed.meta.open_graph_image": "De pagina bevat geen Open Graph-afbeelding, terwijl dat zou moeten.", - "failed.meta.open_graph_image.broken": "De pagina bevat een kapotte Open Graph-afbeelding. Deze afbeelding is gevonden: :actualValue.", - "failed.meta.title": "De paginatitel bevat :actualValue in de titel, terwijl dat niet zou moeten.", - "failed.meta.title.no_content": "De paginatitel is leeg, terwijl dat niet zou moeten.", - "failed.performance.compression": "De pagina is niet gecomprimeerd (met gzip of deflate), terwijl dat zou moeten.", - "failed.performance.css_size": "De pagina bevat CSS-bestanden die te groot zijn (maximaal :expectedValue). Deze bestanden zijn gevonden: :actualValue.", - "failed.performance.html_size": "De pagina bevat HTML die te groot is (maximaal :expectedValue), de werkelijke grootte is :actualValue.", - "failed.performance.image_size": "De pagina bevat afbeeldingen die te groot zijn (maximaal :expectedValue). Deze afbeeldingen zijn gevonden: :actualValue.", - "failed.performance.javascript_size": "De pagina bevat Javascript-bestanden die te groot zijn (maximaal :expectedValue). Deze bestanden zijn gevonden: :actualValue.", - "failed.performance.response": "De pagina heeft een andere responscodes dan :expectedValue geretourneerd. De werkelijke responstijd was :actualValue.", - "failed.performance.ttfb": "De pagina heeft te lang geladen (maximaal :expectedValuems). De werkelijke tijd was :actualValuems.", - "failed.performance.ttfb.missing_url": "We konden de TTFB voor deze pagina niet ophalen.", - "failed.content.transition_words_ratio_check.too_few_transition_words": "De pagina bevat te weinig transitiewoorden. Het zou minstens 30% moeten zijn, maar het is :actualValue%.", - "failed.content.transition_words_ratio_check.no_phrases_found": "De pagina bevat geen zinnen.", + "accordingly": "overeenkomstig", "additionally": "daarnaast", - "moreover": "bovendien", - "furthermore": "verder", - "in addition": "daarnaast", - "not only": "niet alleen", - "but also": "maar ook", + "admittedly": "toegegeven", + "after": "na", + "afterward": "daarna", + "all in all": "alles bij elkaar genomen", + "although": "hoewel", + "an example of this is": "een voorbeeld hiervan is", + "as a result": "als resultaat", "as well as": "evenals", + "because": "omdat", + "before": "voordat", "besides": "bovendien", - "what's more": "wat meer is", - "however": "echter", - "nevertheless": "desalniettemin", - "on the other hand": "aan de andere kant", - "in contrast": "in tegenstelling", - "conversely": "omgekeerd", - "although": "hoewel", - "while": "terwijl", - "yet": "toch", - "even though": "zelfs als", - "nonetheless": "desondanks", - "similarly": "op dezelfde wijze", - "likewise": "evenzo", - "in comparison": "in vergelijking", - "just as": "net zoals", + "but also": "maar ook", + "certainly": "zeker", "compared to": "vergeleken met", - "similarly to": "net als", - "therefore": "daarom", - "thus": "zo", "consequently": "als gevolg", - "as a result": "als resultaat", - "because": "omdat", - "since": "aangezien", - "so": "dus", + "conversely": "omgekeerd", "due to": "vanwege", - "owing to": "te danken aan", - "accordingly": "overeenkomstig", - "indeed": "inderdaad", - "certainly": "zeker", - "of course": "natuurlijk", - "undoubtedly": "ongetwijfeld", - "without a doubt": "zonder twijfel", - "naturally": "natuurlijkerwijs", + "during": "tijdens", + "even though": "zelfs als", + "eventually": "uiteindelijk", + "failed.configuration.nofollow.meta": "The page contains a nofollow meta tag, while it should not.", + "failed.configuration.nofollow.tag": "The page contains a nofollow tag, while it should not.", + "failed.configuration.noindex.meta": "The page contains a noindex meta tag, while it should not.", + "failed.configuration.noindex.tag": "The page contains a noindex tag, while it should not.", + "failed.configuration.robots.disallowed": "The robots.txt file for this page contains a disallow rule for this page.", + "failed.configuration.robots.missing_url": "We could not get the robots.txt file for this page.", + "failed.content.alt_tag": "The page contains images without or empty alt tags. These images were found: :actualValue.", + "failed.content.broken_images": "The page contains broken images. These images were found: :actualValue.", + "failed.content.broken_links": "The page contains broken links. These links were found: :actualValue.", + "failed.content.length": "The content is :actualValue characters long. It should be at least :expectedValue characters long.", + "failed.content.length.parse": "We konden de inhoud van deze pagina niet analyseren, probeer het opnieuw.", + "failed.content.mixed_content": "The page contains links to insecure addresses, while it should not. These links were found :actualValue.", + "failed.content.multiple_h1": "The page contains multiple h1 tags, while it should not. These tags were found :actualValue.", + "failed.content.no_heading": "The page does not contain any h1 tag, while it should.", + "failed.content.no_title": "The page does not contain a title tag, while it should.", + "failed.content.title_length": "The page title is :actualValue characters long. It should be max :expectedValue characters long.", + "failed.content.too_long_sentence": "De pagina bevat :actualValue te lange zinnen welke meer dan 20% van het totale aantal zinnen uitmaken. Corrigeer :neededToFix zinnen om aan de limiet van 20% te voldoen.", + "failed.content.transition_words_ratio_check.no_phrases_found": "De pagina bevat geen zinnen.", + "failed.content.transition_words_ratio_check.too_few_transition_words": "De pagina bevat te weinig transitiewoorden. Het zou minstens 30% moeten zijn, maar het is :actualValue%.", + "failed.meta.description": "The page does not contain a description meta tag, while it should.", + "failed.meta.no_lang": "The page does not contain a lang attribute, while it should.", + "failed.meta.open_graph_image": "The page does not contain an open graph image, while it should.", + "failed.meta.open_graph_image.broken": "The page contains a broken open graph image. This image was found: :actualValue.", + "failed.meta.title": "The page title contains :actualValue in the title, while it should not.", + "failed.meta.title.no_content": "The page title is empty, while it should not be.", + "failed.performance.compression": "The page is not compressed (using either gzip or deflate), while it should be.", + "failed.performance.css_size": "The page contains CSS files that are too large (max :expectedValue). These files were found: :actualValue.", + "failed.performance.html_size": "The page contains HTML that is too large (max :expectedValue), the actual size is :actualValue.", + "failed.performance.image_size": "The page contains images that are too large (max :expectedValue). These images were found: :actualValue.", + "failed.performance.javascript_size": "The page contains Javascript files that are too large (max :expectedValue). These files were found: :actualValue.", + "failed.performance.response": "The page returned a response code other than :expectedValue. The actual response code was :actualValue.", + "failed.performance.ttfb": "The page took too long to load (max :expectedValuems). The actual time was :actualValuems.", + "failed.performance.ttfb.missing_url": "We could not get the TTFB for this page.", + "finally": "uiteindelijk", + "first": "eerst", "for example": "bijvoorbeeld", "for instance": "bijvoorbeeld", - "such as": "zoals", - "to illustrate": "ter illustratie", - "in particular": "in het bijzonder", - "first": "eerst", - "second": "tweede", - "third": "derde", - "next": "volgende", - "then": "dan", - "afterward": "daarna", - "finally": "uiteindelijk", - "in the meantime": "ondertussen", - "subsequently": "vervolgens", + "furthermore": "verder", + "granted": "toegegeven", + "however": "echter", + "in addition": "daarnaast", + "in comparison": "in vergelijking", "in conclusion": "ter conclusie", - "to sum up": "om samen te vatten", - "ultimately": "uiteindelijk", + "in contrast": "in tegenstelling", + "in other words": "met andere woorden", + "in particular": "in het bijzonder", "in summary": "samengevat", - "all in all": "alles bij elkaar genomen", - "overall": "over het geheel genomen", + "in the future": "in de toekomst", + "in the meantime": "ondertussen", + "in the past": "in het verleden", + "in this case": "in dit geval", + "indeed": "inderdaad", + "just as": "net zoals", + "likewise": "evenzo", "meanwhile": "ondertussen", - "before": "voordat", - "after": "na", - "during": "tijdens", - "until": "totdat", - "eventually": "uiteindelijk", + "moreover": "bovendien", + "naturally": "natuurlijkerwijs", + "nevertheless": "desalniettemin", + "next": "volgende", + "nonetheless": "desondanks", + "not only": "niet alleen", + "of course": "natuurlijk", + "on the other hand": "aan de andere kant", + "overall": "over het geheel genomen", + "owing to": "te danken aan", + "second": "tweede", + "similarly": "op dezelfde wijze", + "similarly to": "net als", + "since": "aangezien", + "so": "dus", "soon": "binnenkort", - "in the past": "in het verleden", - "in the future": "in de toekomst", - "in other words": "met andere woorden", - "that is to say": "dat wil zeggen", "specifically": "specifiek", + "subsequently": "vervolgens", + "such as": "zoals", + "that is to say": "dat wil zeggen", + "then": "dan", + "therefore": "daarom", + "third": "derde", + "thus": "zo", "to clarify": "om te verduidelijken", - "in this case": "in dit geval", - "an example of this is": "een voorbeeld hiervan is", "to demonstrate": "om te demonstreren", - "admittedly": "toegegeven", - "granted": "toegegeven", - "while it is true": "terwijl het waar is" + "to illustrate": "ter illustratie", + "to sum up": "om samen te vatten", + "ultimately": "uiteindelijk", + "undoubtedly": "ongetwijfeld", + "until": "totdat", + "what's more": "wat meer is", + "while": "terwijl", + "while it is true": "terwijl het waar is", + "without a doubt": "zonder twijfel", + "yet": "toch" } \ No newline at end of file diff --git a/src/Checks/Content/TooLongSentenceCheck.php b/src/Checks/Content/TooLongSentenceCheck.php new file mode 100644 index 00000000..503b542a --- /dev/null +++ b/src/Checks/Content/TooLongSentenceCheck.php @@ -0,0 +1,87 @@ +validateContent($response, $crawler)) { + return true; + } + + return false; + } + + public function validateContent(Response $response, Crawler $crawler): bool + { + $phrases = $this->extractPhrases( + $this->getTextContent($response, $crawler) + ); + + $sentencesWithTooManyWords = $this->calculateSentencesWithTooManyWords($phrases); + $this->actualValue = $sentencesWithTooManyWords; + + if (count($sentencesWithTooManyWords) === 0) { + return true; + } + + // If more than 20% of the total sentences are too long, fail + if (count($sentencesWithTooManyWords) / count($phrases) > 0.2) { + + // Count how many sentences needed to fix to fall below 20% + $sentencesNeededToFix = count($sentencesWithTooManyWords) - (count($phrases) * 0.2); + + if ($sentencesNeededToFix < 1) { + $sentencesNeededToFix = 1; + } + + $this->failureReason = __('failed.content.too_long_sentence', [ + 'actualValue' => count($this->actualValue), + 'neededToFix' => round($sentencesNeededToFix, 0, PHP_ROUND_HALF_UP), + ]); + + return false; + } + + return true; + } + + private function calculateSentencesWithTooManyWords(array $sentences): array + { + $tooLongSentences = []; + + foreach ($sentences as $sentence) { + if (str_word_count($sentence) > 20) { + $tooLongSentences[] = $sentence; + } + } + + return $tooLongSentences; + } +} diff --git a/src/Checks/Content/TransitionWordRatioCheck.php b/src/Checks/Content/TransitionWordRatioCheck.php index a6d17989..a9f12eba 100644 --- a/src/Checks/Content/TransitionWordRatioCheck.php +++ b/src/Checks/Content/TransitionWordRatioCheck.php @@ -3,15 +3,16 @@ namespace Vormkracht10\Seo\Checks\Content; use Illuminate\Http\Client\Response; -use Readability\Readability; use Symfony\Component\DomCrawler\Crawler; use Vormkracht10\Seo\Helpers\TransitionWords; use Vormkracht10\Seo\Interfaces\Check; +use Vormkracht10\Seo\Traits\Actions; use Vormkracht10\Seo\Traits\PerformCheck; class TransitionWordRatioCheck implements Check { - use PerformCheck; + use PerformCheck, + Actions; public string $title = 'Transition word ratio check'; @@ -40,17 +41,7 @@ public function check(Response $response, Crawler $crawler): bool public function validateContent(Response $response, Crawler $crawler): bool { - $body = $response->body(); - - if ($this->useJavascript) { - $body = $crawler->filter('body')->html(); - } - - $readability = new Readability($body); - - $readability->init(); - - $content = $readability->getContent()->textContent; + $content = $this->getTextContent($response, $crawler); if ($content == 'Sorry, Readability was unable to parse this page for content.') { $this->failureReason = __('failed.content.length.parse'); @@ -75,15 +66,9 @@ public function validateContent(Response $response, Crawler $crawler): bool public function calculatePercentageOfTransitionWordsInContent($content, $transitionWords) { - // Get phrases seperate by new line, dot, exclamation mark or question mark - $phrases = preg_split('/\n|\.|\!|\?/', $content); - - // Count all phrases where it has more than 5 words - $totalPhrases = array_filter($phrases, function ($phrase) { - return str_word_count($phrase) > 5; - }); + $phrases = $this->extractPhrases($content); - if (count($totalPhrases) === 0) { + if (count($phrases) === 0) { $this->actualValue = 0; $this->failureReason = __('failed.content.transition_words_ratio_check.no_phrases_found'); @@ -96,7 +81,7 @@ public function calculatePercentageOfTransitionWordsInContent($content, $transit $phrasesWithTransitionWord += $this->calculateNumberOfPhrasesWithTransitionWord($content, $transitionWord); } - return round($phrasesWithTransitionWord / count($totalPhrases) * 100, 0, PHP_ROUND_HALF_UP); + return round($phrasesWithTransitionWord / count($phrases) * 100, 0, PHP_ROUND_HALF_UP); } public function calculateNumberOfPhrasesWithTransitionWord(string $content, string $transitionWord): int diff --git a/src/Traits/Actions.php b/src/Traits/Actions.php new file mode 100644 index 00000000..d2a291eb --- /dev/null +++ b/src/Traits/Actions.php @@ -0,0 +1,31 @@ +body(); + + if ($this->useJavascript) { + $body = $crawler->filter('body')->html(); + } + + $readability = new Readability($body); + + $readability->init(); + + return $readability->getContent()->textContent; + } + + private function extractPhrases(string $content): array + { + // Get phrases seperate by new line, dot, exclamation mark or question mark + return preg_split('/\n|\.|\!|\?/', $content); + } +} diff --git a/tests/Checks/Content/TooLongSentenceCheckTest.php b/tests/Checks/Content/TooLongSentenceCheckTest.php new file mode 100644 index 00000000..550ca22d --- /dev/null +++ b/tests/Checks/Content/TooLongSentenceCheckTest.php @@ -0,0 +1,75 @@ + Http::response( + ' + + Test + + +

'.$body.'

+ ', + 200), + ]); + + $crawler->addHtmlContent(Http::get('vormkracht10.nl')->body()); + + $this->assertFalse($check->check(Http::get('vormkracht10.nl'), $crawler)); +}); + +it('can perform the too long sentence check on page with no too long sentence', function () { + $check = new TooLongSentenceCheck(); + $crawler = new Crawler(); + + $body = 'One two three four five six seven eight nine ten eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen'; + + Http::fake([ + 'vormkracht10.nl' => Http::response( + ' + + Test + + +

'.$body.'

+ ', + 200), + ]); + + $crawler->addHtmlContent(Http::get('vormkracht10.nl')->body()); + + $check->check(Http::get('vormkracht10.nl'), $crawler); + + $this->assertTrue($check->check(Http::get('vormkracht10.nl'), $crawler)); +}); + +it('can perform the too long sentence check on page with no body', function () { + $check = new TooLongSentenceCheck(); + $crawler = new Crawler(); + + Http::fake([ + 'vormkracht10.nl' => Http::response( + ' + + Test + + ', + 200), + ]); + + $crawler->addHtmlContent(Http::get('vormkracht10.nl')->body()); + + $check->check(Http::get('vormkracht10.nl'), $crawler); + + $this->assertTrue($check->check(Http::get('vormkracht10.nl'), $crawler)); +});