From 0182b5f52e9b57a5bc3b81ab84b28ad32817cda7 Mon Sep 17 00:00:00 2001 From: Frank Ulrich Weber Date: Mon, 6 Nov 2023 14:04:26 +0100 Subject: [PATCH 1/7] Remove index-time boosting from Indexer --- Classes/Common/Indexer.php | 40 ++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/Classes/Common/Indexer.php b/Classes/Common/Indexer.php index dc6b194cd..1347a5f96 100644 --- a/Classes/Common/Indexer.php +++ b/Classes/Common/Indexer.php @@ -56,8 +56,7 @@ class Indexer 'sortables' => [], 'indexed' => [], 'stored' => [], - 'tokenized' => [], - 'fieldboost' => [] + 'tokenized' => [] ]; /** @@ -305,11 +304,6 @@ protected static function loadIndexConf(int $pid): void if ($indexing['index_autocomplete']) { self::$fields['autocomplete'][] = $indexing['index_name']; } - if ($indexing['index_boost'] > 0.0) { - self::$fields['fieldboost'][$indexing['index_name']] = floatval($indexing['index_boost']); - } else { - self::$fields['fieldboost'][$indexing['index_name']] = false; - } } self::$fieldsLoaded = true; } @@ -337,7 +331,7 @@ protected static function processLogical(Document $document, array $logicalUnit) if (!empty($metadata)) { $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey, 'general'); $validator = new DocumentValidator($metadata, explode(',', $extConf['requiredMetadataFields'])); - + if ($validator->hasAllMandatoryMetadataFields()) { $metadata['author'] = self::removeAppendsFromAuthor($metadata['author']); // set Owner if available @@ -357,8 +351,8 @@ protected static function processLogical(Document $document, array $logicalUnit) } // There can be only one toplevel unit per UID, independently of backend configuration $solrDoc->setField('toplevel', $logicalUnit['id'] == $doc->toplevelId ? true : false); - $solrDoc->setField('title', $metadata['title'][0], self::$fields['fieldboost']['title']); - $solrDoc->setField('volume', $metadata['volume'][0], self::$fields['fieldboost']['volume']); + $solrDoc->setField('title', $metadata['title'][0]); + $solrDoc->setField('volume', $metadata['volume'][0]); // verify date formatting if(strtotime($metadata['date'][0])) { $solrDoc->setField('date', self::getFormattedDate($metadata['date'][0])); @@ -384,16 +378,16 @@ protected static function processLogical(Document $document, array $logicalUnit) in_array('collection', self::$fields['facets']) && empty($metadata['collection']) && !empty($doc->metadataArray[$doc->toplevelId]['collection']) - ) { - $solrDoc->setField('collection_faceting', $doc->metadataArray[$doc->toplevelId]['collection']); - } - try { - $updateQuery->addDocument($solrDoc); - self::$solr->service->update($updateQuery); - } catch (\Exception $e) { - self::handleException($e->getMessage()); - return false; - } + ) { + $solrDoc->setField('collection_faceting', $doc->metadataArray[$doc->toplevelId]['collection']); + } + try { + $updateQuery->addDocument($solrDoc); + self::$solr->service->update($updateQuery); + } catch (\Exception $e) { + self::handleException($e->getMessage()); + return false; + } } else { Helper::log('Tip: If "record_id" field is missing then there is possibility that METS file still contains it but with the wrong source type attribute in "recordIdentifier" element', LOG_SEVERITY_NOTICE); return false; @@ -445,7 +439,7 @@ protected static function processPhysical(Document $document, int $page, array $ } } $solrDoc->setField('toplevel', false); - $solrDoc->setField('type', $physicalUnit['type'], self::$fields['fieldboost']['type']); + $solrDoc->setField('type', $physicalUnit['type']); $solrDoc->setField('collection', $doc->metadataArray[$doc->toplevelId]['collection']); $solrDoc->setField('location', $document->getLocation()); @@ -520,7 +514,7 @@ private static function processMetadata($document, $metadata, &$solrDoc): array !empty($data) && substr($indexName, -8) !== '_sorting' ) { - $solrDoc->setField(self::getIndexFieldName($indexName, $document->getPid()), $data, self::$fields['fieldboost'][$indexName]); + $solrDoc->setField(self::getIndexFieldName($indexName, $document->getPid()), $data); if (in_array($indexName, self::$fields['sortables'])) { // Add sortable fields to index. $solrDoc->setField($indexName . '_sorting', $metadata[$indexName . '_sorting'][0]); @@ -626,7 +620,7 @@ private static function getSolrDocument(Query $updateQuery, Document $document, $solrDoc->setField('partof', $document->getPartof()); $solrDoc->setField('root', $document->getCurrentDocument()->rootId); $solrDoc->setField('sid', $unit['id']); - $solrDoc->setField('type', $unit['type'], self::$fields['fieldboost']['type']); + $solrDoc->setField('type', $unit['type']); $solrDoc->setField('collection', $document->getCurrentDocument()->metadataArray[$document->getCurrentDocument()->toplevelId]['collection']); $solrDoc->setField('fulltext', $fullText); return $solrDoc; From bc80ca4e30a8ea69444cc084992cfadc8fd4fbae Mon Sep 17 00:00:00 2001 From: Frank Ulrich Weber Date: Mon, 6 Nov 2023 14:08:19 +0100 Subject: [PATCH 2/7] Adds query-time boosting for search --- Classes/Common/Solr/SolrSearch.php | 26 +++++++++++++++++-- Classes/Controller/CollectionController.php | 5 +++- Classes/Controller/SearchController.php | 5 +++- .../Domain/Repository/DocumentRepository.php | 5 ++-- 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/Classes/Common/Solr/SolrSearch.php b/Classes/Common/Solr/SolrSearch.php index f991f9ca4..c6f6e5c37 100644 --- a/Classes/Common/Solr/SolrSearch.php +++ b/Classes/Common/Solr/SolrSearch.php @@ -57,7 +57,13 @@ class SolrSearch implements \Countable, \Iterator, \ArrayAccess, QueryResultInte * @access private * @var QueryResult|null */ - private ?QueryResult $listedMetadata; + private QueryResult $listedMetadata; + + /** + * @access private + * @var QueryResult|null + */ + private QueryResult $indexedMetadata; /** * @access private @@ -90,13 +96,14 @@ class SolrSearch implements \Countable, \Iterator, \ArrayAccess, QueryResultInte * * @return void */ - public function __construct(DocumentRepository $documentRepository, $collections, array $settings, array $searchParams, QueryResult $listedMetadata = null) + public function __construct(DocumentRepository $documentRepository, $collections, array $settings, array $searchParams, QueryResult $listedMetadata = null, QueryResult $indexedMetadata = null) { $this->documentRepository = $documentRepository; $this->collections = $collections; $this->settings = $settings; $this->searchParams = $searchParams; $this->listedMetadata = $listedMetadata; + $this->indexedMetadata = $indexedMetadata; } /** @@ -664,6 +671,21 @@ protected function searchSolr($parameters = [], $enableCache = true) if ($enableCache === false || ($entry = $cache->get($cacheIdentifier)) === false) { $selectQuery = $solr->service->createSelect($parameters); + $edismax = $selectQuery->getEDisMax(); + + $queryFields = ''; + + if ($this->indexedMetadata) { + foreach ($this->indexedMetadata as $metadata) { + if ($metadata->getIndexIndexed()) { + $listMetadataRecord = $metadata->getIndexName() . '_' . ($metadata->getIndexTokenized() ? 't' : 'u') . ($metadata->getIndexStored() ? 's' : 'u') . ($metadata->getIndexIndexed() ? 'i' : 'u'); + $queryFields .= $listMetadataRecord.'^'.$metadata->getIndexBoost().' '; + } + } + } + + $edismax->setQueryFields($queryFields); + $grouping = $selectQuery->getGrouping(); $grouping->addField('uid'); $grouping->setLimit(100); // Results in group (TODO: check) diff --git a/Classes/Controller/CollectionController.php b/Classes/Controller/CollectionController.php index c7ecb958b..abd1f98ee 100644 --- a/Classes/Controller/CollectionController.php +++ b/Classes/Controller/CollectionController.php @@ -151,13 +151,16 @@ public function showAction(Collection $collection): void // get all metadata records to be shown in results $listedMetadata = $this->metadataRepository->findByIsListed(true); + // get all indexed metadata fileds + $indexedMetadata = $this->metadataRepository->findByIndexIndexed(true); + // get all sortable metadata records $sortableMetadata = $this->metadataRepository->findByIsSortable(true); // get all documents of given collection $solrResults = null; if (is_array($searchParams) && !empty($searchParams)) { - $solrResults = $this->documentRepository->findSolrByCollection($collection, $this->settings, $searchParams, $listedMetadata); + $solrResults = $this->documentRepository->findSolrByCollection($collection, $this->settings, $searchParams, $listedMetadata, $indexedMetadata); $itemsPerPage = $this->settings['list']['paginate']['itemsPerPage']; if (empty($itemsPerPage)) { diff --git a/Classes/Controller/SearchController.php b/Classes/Controller/SearchController.php index ae69761d1..6f94d4160 100644 --- a/Classes/Controller/SearchController.php +++ b/Classes/Controller/SearchController.php @@ -172,11 +172,14 @@ public function mainAction(): void // get all metadata records to be shown in results $listedMetadata = $this->metadataRepository->findByIsListed(true); + // get all indexed metadata fileds + $indexedMetadata = $this->metadataRepository->findByIndexIndexed(true); + $solrResults = null; $numResults = 0; // Do not execute the Solr search if used together with ListView plugin. if (!$listViewSearch) { - $solrResults = $this->documentRepository->findSolrWithoutCollection($this->settings, $this->searchParams, $listedMetadata); + $solrResults = $this->documentRepository->findSolrWithoutCollection($this->settings, $this->searchParams, $listedMetadata, $indexedMetadata); $numResults = $solrResults->getNumFound(); $itemsPerPage = $this->settings['list']['paginate']['itemsPerPage']; diff --git a/Classes/Domain/Repository/DocumentRepository.php b/Classes/Domain/Repository/DocumentRepository.php index 7e09a5df7..cdb0294b7 100644 --- a/Classes/Domain/Repository/DocumentRepository.php +++ b/Classes/Domain/Repository/DocumentRepository.php @@ -579,6 +579,7 @@ public function findChildrenOfEach(array $uids) * @param array $settings * @param array $searchParams * @param QueryResult $listedMetadata + * @param QueryResult $indexedMetadata * * @return SolrSearch */ @@ -632,13 +633,13 @@ public function findSolrWithoutCollection($settings, $searchParams, $listedMetad * * @return SolrSearch */ - private function findSolr($collections, $settings, $searchParams, $listedMetadata = null): SolrSearch + private function findSolr($collections, $settings, $searchParams, $listedMetadata = null, $indexedMetadata = null): SolrSearch { // set settings global inside this repository // (may be necessary when SolrSearch calls back) $this->settings = $settings; - $search = new SolrSearch($this, $collections, $settings, $searchParams, $listedMetadata); + $search = new SolrSearch($this, $collections, $settings, $searchParams, $listedMetadata, $indexedMetadata); $search->prepare(); return $search; } From fbe0f2dee928eb28a3b99f7308aadb4417fbe73a Mon Sep 17 00:00:00 2001 From: Frank Ulrich Weber Date: Tue, 7 Nov 2023 10:42:25 +0100 Subject: [PATCH 3/7] Adds query-time boosting for search --- Classes/Controller/ListViewController.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Classes/Controller/ListViewController.php b/Classes/Controller/ListViewController.php index 66bb47835..17ecce76f 100644 --- a/Classes/Controller/ListViewController.php +++ b/Classes/Controller/ListViewController.php @@ -96,10 +96,13 @@ public function mainAction(): void // get all metadata records to be shown in results $listedMetadata = $this->metadataRepository->findByIsListed(true); + // get all indexed metadata fileds + $indexedMetadata = $this->metadataRepository->findByIndexIndexed(true); + $solrResults = null; $numResults = 0; if (is_array($this->searchParams) && !empty($this->searchParams)) { - $solrResults = $this->documentRepository->findSolrByCollections($collections, $this->settings, $this->searchParams, $listedMetadata); + $solrResults = $this->documentRepository->findSolrByCollections($collections, $this->settings, $this->searchParams, $listedMetadata, $indexedMetadata); $numResults = $solrResults->getNumFound(); $itemsPerPage = $this->settings['list']['paginate']['itemsPerPage']; From 633f4fbc96615002b7d276ef872273949f1688f2 Mon Sep 17 00:00:00 2001 From: Frank Ulrich Weber Date: Tue, 9 Jan 2024 11:41:26 +0100 Subject: [PATCH 4/7] Fix typo --- Classes/Controller/CollectionController.php | 2 +- Classes/Controller/ListViewController.php | 2 +- Classes/Controller/SearchController.php | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Classes/Controller/CollectionController.php b/Classes/Controller/CollectionController.php index abd1f98ee..c44822b5c 100644 --- a/Classes/Controller/CollectionController.php +++ b/Classes/Controller/CollectionController.php @@ -151,7 +151,7 @@ public function showAction(Collection $collection): void // get all metadata records to be shown in results $listedMetadata = $this->metadataRepository->findByIsListed(true); - // get all indexed metadata fileds + // get all indexed metadata fields $indexedMetadata = $this->metadataRepository->findByIndexIndexed(true); // get all sortable metadata records diff --git a/Classes/Controller/ListViewController.php b/Classes/Controller/ListViewController.php index 17ecce76f..324e5585a 100644 --- a/Classes/Controller/ListViewController.php +++ b/Classes/Controller/ListViewController.php @@ -96,7 +96,7 @@ public function mainAction(): void // get all metadata records to be shown in results $listedMetadata = $this->metadataRepository->findByIsListed(true); - // get all indexed metadata fileds + // get all indexed metadata fields $indexedMetadata = $this->metadataRepository->findByIndexIndexed(true); $solrResults = null; diff --git a/Classes/Controller/SearchController.php b/Classes/Controller/SearchController.php index 6f94d4160..127943c54 100644 --- a/Classes/Controller/SearchController.php +++ b/Classes/Controller/SearchController.php @@ -172,7 +172,7 @@ public function mainAction(): void // get all metadata records to be shown in results $listedMetadata = $this->metadataRepository->findByIsListed(true); - // get all indexed metadata fileds + // get all indexed metadata fields $indexedMetadata = $this->metadataRepository->findByIndexIndexed(true); $solrResults = null; From cd9dbcc9d98b660d44195b52f97e72b815db3285 Mon Sep 17 00:00:00 2001 From: Frank Ulrich Weber Date: Tue, 9 Jan 2024 18:00:33 +0100 Subject: [PATCH 5/7] Fixes some Codacy and PHPStan Errors --- .github/phpstan.neon | 1 + Classes/Common/Solr/SolrSearch.php | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/phpstan.neon b/.github/phpstan.neon index 7307b130f..86b7418e6 100644 --- a/.github/phpstan.neon +++ b/.github/phpstan.neon @@ -2,6 +2,7 @@ parameters: ignoreErrors: - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::countByPid\(\)\.#' - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findByIsListed\(\)\.#' + - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findByIndexIndexed\(\)\.#' - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findByIsSortable\(\)\.#' - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByFeUserId\(\)\.#' - '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByIndexName\(\)\.#' diff --git a/Classes/Common/Solr/SolrSearch.php b/Classes/Common/Solr/SolrSearch.php index c6f6e5c37..79f7c97d8 100644 --- a/Classes/Common/Solr/SolrSearch.php +++ b/Classes/Common/Solr/SolrSearch.php @@ -678,8 +678,8 @@ protected function searchSolr($parameters = [], $enableCache = true) if ($this->indexedMetadata) { foreach ($this->indexedMetadata as $metadata) { if ($metadata->getIndexIndexed()) { - $listMetadataRecord = $metadata->getIndexName() . '_' . ($metadata->getIndexTokenized() ? 't' : 'u') . ($metadata->getIndexStored() ? 's' : 'u') . ($metadata->getIndexIndexed() ? 'i' : 'u'); - $queryFields .= $listMetadataRecord.'^'.$metadata->getIndexBoost().' '; + $listMetadataRecord = $metadata->getIndexName() . '_' . ($metadata->getIndexTokenized() ? 't' : 'u') . ($metadata->getIndexStored() ? 's' : 'u') . 'i'; + $queryFields .= $listMetadataRecord . '^' . $metadata->getIndexBoost() . ' '; } } } From f7506767209693b1ea009cc29bfd9f2c1ede1f7d Mon Sep 17 00:00:00 2001 From: Frank Ulrich Weber Date: Tue, 18 Jun 2024 17:59:19 +0200 Subject: [PATCH 6/7] Fix rebase/merge issues --- Classes/Common/Indexer.php | 22 +++++++++---------- .../Domain/Repository/DocumentRepository.php | 14 +++++++----- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/Classes/Common/Indexer.php b/Classes/Common/Indexer.php index 1347a5f96..98fa6885d 100644 --- a/Classes/Common/Indexer.php +++ b/Classes/Common/Indexer.php @@ -331,7 +331,7 @@ protected static function processLogical(Document $document, array $logicalUnit) if (!empty($metadata)) { $extConf = GeneralUtility::makeInstance(ExtensionConfiguration::class)->get(self::$extKey, 'general'); $validator = new DocumentValidator($metadata, explode(',', $extConf['requiredMetadataFields'])); - + if ($validator->hasAllMandatoryMetadataFields()) { $metadata['author'] = self::removeAppendsFromAuthor($metadata['author']); // set Owner if available @@ -378,16 +378,16 @@ protected static function processLogical(Document $document, array $logicalUnit) in_array('collection', self::$fields['facets']) && empty($metadata['collection']) && !empty($doc->metadataArray[$doc->toplevelId]['collection']) - ) { - $solrDoc->setField('collection_faceting', $doc->metadataArray[$doc->toplevelId]['collection']); - } - try { - $updateQuery->addDocument($solrDoc); - self::$solr->service->update($updateQuery); - } catch (\Exception $e) { - self::handleException($e->getMessage()); - return false; - } + ) { + $solrDoc->setField('collection_faceting', $doc->metadataArray[$doc->toplevelId]['collection']); + } + try { + $updateQuery->addDocument($solrDoc); + self::$solr->service->update($updateQuery); + } catch (\Exception $e) { + self::handleException($e->getMessage()); + return false; + } } else { Helper::log('Tip: If "record_id" field is missing then there is possibility that METS file still contains it but with the wrong source type attribute in "recordIdentifier" element', LOG_SEVERITY_NOTICE); return false; diff --git a/Classes/Domain/Repository/DocumentRepository.php b/Classes/Domain/Repository/DocumentRepository.php index cdb0294b7..25785b878 100644 --- a/Classes/Domain/Repository/DocumentRepository.php +++ b/Classes/Domain/Repository/DocumentRepository.php @@ -583,9 +583,9 @@ public function findChildrenOfEach(array $uids) * * @return SolrSearch */ - public function findSolrByCollection(Collection $collection, $settings, $searchParams, $listedMetadata = null) + public function findSolrByCollection(Collection $collection, $settings, $searchParams, $listedMetadata = null, $indexedMetadata = null) { - return $this->findSolr([$collection], $settings, $searchParams, $listedMetadata); + return $this->findSolr([$collection], $settings, $searchParams, $listedMetadata, $indexedMetadata); } /** @@ -597,12 +597,13 @@ public function findSolrByCollection(Collection $collection, $settings, $searchP * @param array $settings * @param array $searchParams * @param QueryResult $listedMetadata + * @param QueryResult $indexedMetadata * * @return SolrSearch */ - public function findSolrByCollections($collections, $settings, $searchParams, $listedMetadata = null): SolrSearch + public function findSolrByCollections($collections, $settings, $searchParams, $listedMetadata = null, $indexedMetadata = null): SolrSearch { - return $this->findSolr($collections, $settings, $searchParams, $listedMetadata); + return $this->findSolr($collections, $settings, $searchParams, $listedMetadata, $indexedMetadata); } /** @@ -613,12 +614,13 @@ public function findSolrByCollections($collections, $settings, $searchParams, $l * @param array $settings * @param array $searchParams * @param QueryResult $listedMetadata + * @param QueryResult $indexedMetadata * * @return SolrSearch */ - public function findSolrWithoutCollection($settings, $searchParams, $listedMetadata = null): SolrSearch + public function findSolrWithoutCollection($settings, $searchParams, $listedMetadata = null, $indexedMetadata = null): SolrSearch { - return $this->findSolr([], $settings, $searchParams, $listedMetadata); + return $this->findSolr([], $settings, $searchParams, $listedMetadata, $indexedMetadata); } /** From 94d9425aaf6e4458ed2474ee39d1984bb600e7fb Mon Sep 17 00:00:00 2001 From: Frank Ulrich Weber Date: Fri, 28 Jun 2024 10:31:22 +0200 Subject: [PATCH 7/7] Fix PHPStan issues --- Classes/Common/Solr/SolrSearch.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Classes/Common/Solr/SolrSearch.php b/Classes/Common/Solr/SolrSearch.php index 79f7c97d8..466e4af8c 100644 --- a/Classes/Common/Solr/SolrSearch.php +++ b/Classes/Common/Solr/SolrSearch.php @@ -57,13 +57,13 @@ class SolrSearch implements \Countable, \Iterator, \ArrayAccess, QueryResultInte * @access private * @var QueryResult|null */ - private QueryResult $listedMetadata; + private ?QueryResult $listedMetadata; /** * @access private * @var QueryResult|null */ - private QueryResult $indexedMetadata; + private ?QueryResult $indexedMetadata; /** * @access private