Skip to content

Commit

Permalink
[5.x] Fix case insensitive Comb search for UTF-8 characters (#11363)
Browse files Browse the repository at this point in the history
  • Loading branch information
heidkaemper authored Jan 22, 2025
1 parent 36c3b22 commit 2b490c1
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 9 deletions.
18 changes: 9 additions & 9 deletions src/Search/Comb/Comb.php
Original file line number Diff line number Diff line change
Expand Up @@ -539,11 +539,11 @@ private function searchOverData($params, $raw_query)
$escaped_chunk = preg_quote($chunk, '#');
$chunk_is_word = ! preg_match('#\s#', $chunk);
$regex = [
'partial_anywhere' => '#'.$escaped_chunk.'#i',
'partial_from_start_anywhere' => '#(^|\s)'.$escaped_chunk.'#i',
'whole_anywhere' => '#(^|\s)'.$escaped_chunk.'($|\s)#i',
'partial_from_start' => '#^'.$escaped_chunk.'#i',
'whole' => '#^'.$escaped_chunk.'$#i',
'partial_anywhere' => '#'.$escaped_chunk.'#iu',
'partial_from_start_anywhere' => '#(^|\s)'.$escaped_chunk.'#iu',
'whole_anywhere' => '#(^|\s)'.$escaped_chunk.'($|\s)#iu',
'partial_from_start' => '#^'.$escaped_chunk.'#iu',
'whole' => '#^'.$escaped_chunk.'$#iu',
];

// loop over each data property
Expand Down Expand Up @@ -710,8 +710,8 @@ private function searchOverData($params, $raw_query)
*/
private function removeDisallowedMatches($params)
{
$disallowed = '#'.implode('|', $params['disallowed']).'#i';
$required = '#(?=.*'.implode(')(?=.*', $params['required']).')#i';
$disallowed = '#'.implode('|', $params['disallowed']).'#iu';
$required = '#(?=.*'.implode(')(?=.*', $params['required']).')#iu';
$new_data = [];

// this only applies to boolean mode
Expand Down Expand Up @@ -1058,7 +1058,7 @@ private function extractSnippets($value, $chunks)
$escaped_chunks = collect($chunks)
->map(fn ($chunk) => preg_quote($chunk, '#'))
->join('|');
$regex = '#(.*?)('.$escaped_chunks.')(.{0,'.$length.'}(?:\s|$))#i';
$regex = '#(.*?)('.$escaped_chunks.')(.{0,'.$length.'}(?:\s|$))#iu';
if (! preg_match_all($regex, $value, $matches, PREG_SET_ORDER)) {
return [];
}
Expand All @@ -1081,7 +1081,7 @@ private function extractSnippets($value, $chunks)
}
$snippets[] = trim($snippet);
}
if (preg_match('#('.$escaped_chunks.')#i', $surplus)) {
if (preg_match('#('.$escaped_chunks.')#iu', $surplus)) {
$snippets[] = trim($surplus);
}

Expand Down
14 changes: 14 additions & 0 deletions tests/Search/CombTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,20 @@ public function it_can_search_for_slashes()
$this->assertSame(1, $result['info']['total_results']);
}

#[Test]
public function it_can_search_for_umlauts()
{
$comb = new Comb([
['content' => 'Üppercase umlaut'],
['content' => 'Lowercase ümlaut'],
]);

$result = $comb->lookUp('ü');
$this->assertIsArray($result);
$this->assertCount(2, $result);
$this->assertSame(2, $result['info']['total_results']);
}

public static function searchesProvider()
{
return [
Expand Down

0 comments on commit 2b490c1

Please sign in to comment.