Skip to content

Commit

Permalink
Merge branch 'master' into fix-page
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastian-meyer committed Jan 29, 2024
2 parents 856c70d + 93a4b45 commit 07f424d
Show file tree
Hide file tree
Showing 22 changed files with 853 additions and 534 deletions.
1 change: 1 addition & 0 deletions .github/phpstan.neon
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ parameters:
- '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findByIsSortable\(\)\.#'
- '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByFeUserId\(\)\.#'
- '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByIndexName\(\)\.#'
- '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByLocation\(\)\.#'
- '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByPid\(\)\.#'
- '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByRecordId\(\)\.#'
- '#Call to an undefined method Kitodo\\Dlf\\Domain\\Repository\\[a-zA-Z]+Repository::findOneByRoot\(\)\.#'
Expand Down
16 changes: 8 additions & 8 deletions Classes/Command/HarvestCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int

if ($this->storagePid == 0) {
$io->error('ERROR: No valid PID (' . $this->storagePid . ') given.');
exit(1);
return BaseCommand::FAILURE;
}

if (
Expand All @@ -133,15 +133,15 @@ protected function execute(InputInterface $input, OutputInterface $output): int
}
if (empty($output_solrCores)) {
$io->error('ERROR: No valid Solr core ("' . $input->getOption('solr') . '") given. No valid cores found on PID ' . $this->storagePid . ".\n");
exit(1);
return BaseCommand::FAILURE;
} else {
$io->error('ERROR: No valid Solr core ("' . $input->getOption('solr') . '") given. ' . "Valid cores are (<uid>:<index_name>):\n" . implode("\n", $output_solrCores) . "\n");
exit(1);
return BaseCommand::FAILURE;
}
}
} else {
$io->error('ERROR: Required parameter --solr|-s is missing or array.');
exit(1);
return BaseCommand::FAILURE;
}

if (MathUtility::canBeInterpretedAsInteger($input->getOption('lib'))) {
Expand All @@ -152,11 +152,11 @@ protected function execute(InputInterface $input, OutputInterface $output): int
$baseUrl = $this->owner->getOaiBase();
} else {
$io->error('ERROR: Required parameter --lib|-l is not a valid UID.');
exit(1);
return BaseCommand::FAILURE;
}
if (!GeneralUtility::isValidUrl($baseUrl)) {
$io->error('ERROR: No valid OAI Base URL set for library with given UID ("' . $input->getOption('lib') . '").');
exit(1);
return BaseCommand::FAILURE;
} else {
try {
$oai = Endpoint::build($baseUrl);
Expand Down Expand Up @@ -198,7 +198,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int
}
if (empty($set)) {
$io->error('ERROR: OAI interface does not provide a set with given setSpec ("' . $input->getOption('set') . '").');
exit(1);
return BaseCommand::FAILURE;
}
}

Expand Down Expand Up @@ -261,7 +261,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int

$io->success('All done!');

return 0;
return BaseCommand::SUCCESS;
}

/**
Expand Down
60 changes: 40 additions & 20 deletions Classes/Command/IndexCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int

if ($this->storagePid == 0) {
$io->error('ERROR: No valid PID (' . $this->storagePid . ') given.');
exit(1);
return BaseCommand::FAILURE;
}

if (
Expand All @@ -117,15 +117,15 @@ protected function execute(InputInterface $input, OutputInterface $output): int
}
if (empty($output_solrCores)) {
$io->error('ERROR: No valid Solr core ("' . $input->getOption('solr') . '") given. No valid cores found on PID ' . $this->storagePid . ".\n");
exit(1);
return BaseCommand::FAILURE;
} else {
$io->error('ERROR: No valid Solr core ("' . $input->getOption('solr') . '") given. ' . "Valid cores are (<uid>:<index_name>):\n" . implode("\n", $output_solrCores) . "\n");
exit(1);
return BaseCommand::FAILURE;
}
}
} else {
$io->error('ERROR: Required parameter --solr|-s is missing or array.');
exit(1);
return BaseCommand::FAILURE;
}

if (
Expand All @@ -137,7 +137,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int
)
) {
$io->error('ERROR: Required parameter --doc|-d is not a valid document UID or URL.');
exit(1);
return BaseCommand::FAILURE;
}

if (!empty($input->getOption('owner'))) {
Expand Down Expand Up @@ -168,24 +168,12 @@ protected function execute(InputInterface $input, OutputInterface $output): int
} else if (GeneralUtility::isValidUrl($input->getOption('doc'))) {
$doc = AbstractDocument::getInstance($input->getOption('doc'), ['storagePid' => $this->storagePid], true);

if ($doc->recordId) {
$document = $this->documentRepository->findOneByRecordId($doc->recordId);
}

if ($document === null) {
// create new Document object
$document = GeneralUtility::makeInstance(Document::class);
}

// now there must exist a document object
if ($document) {
$document->setLocation($input->getOption('doc'));
}
$document = $this->getDocumentFromUrl($doc, $input->getOption('doc'));
}

if ($doc === null) {
$io->error('ERROR: Document "' . $input->getOption('doc') . '" could not be loaded.');
exit(1);
return BaseCommand::FAILURE;
}

$document->setSolrcore($solrCoreUid);
Expand All @@ -205,7 +193,39 @@ protected function execute(InputInterface $input, OutputInterface $output): int

$io->success('All done!');

return 0;
return BaseCommand::SUCCESS;
}

/**
* Get document from given URL. Find it in database, if not found create the new one.
*
* @access private
*
* @param AbstractDocument $doc
* @param string $url
*
* @return Document
*/
private function getDocumentFromUrl($doc, string $url): Document
{
$document = null;

if ($doc->recordId) {
$document = $this->documentRepository->findOneByRecordId($doc->recordId);
} else {
$document = $this->documentRepository->findOneByLocation($url);
}

if ($document === null) {
// create new Document object
$document = GeneralUtility::makeInstance(Document::class);
}

// now there must exist a document object
if ($document) {
$document->setLocation($url);
}

return $document;
}
}
14 changes: 7 additions & 7 deletions Classes/Command/ReindexCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int

if ($this->storagePid == 0) {
$io->error('ERROR: No valid PID (' . $this->storagePid . ') given.');
exit(1);
return BaseCommand::FAILURE;
}

if (
Expand All @@ -121,15 +121,15 @@ protected function execute(InputInterface $input, OutputInterface $output): int
}
if (empty($output_solrCores)) {
$io->error('ERROR: No valid Solr core ("' . $input->getOption('solr') . '") given. No valid cores found on PID ' . $this->storagePid . ".\n");
exit(1);
return BaseCommand::FAILURE;
} else {
$io->error('ERROR: No valid Solr core ("' . $input->getOption('solr') . '") given. ' . "Valid cores are (<uid>:<index_name>):\n" . implode("\n", $output_solrCores) . "\n");
exit(1);
return BaseCommand::FAILURE;
}
}
} else {
$io->error('ERROR: Required parameter --solr|-s is missing or array.');
exit(1);
return BaseCommand::FAILURE;
}

if (!empty($input->getOption('owner'))) {
Expand All @@ -152,13 +152,13 @@ protected function execute(InputInterface $input, OutputInterface $output): int
// "coll" may be a single integer or a comma-separated list of integers.
if (empty(array_filter(GeneralUtility::intExplode(',', $input->getOption('coll'), true)))) {
$io->error('ERROR: Parameter --coll|-c is not a valid comma-separated list of collection UIDs.');
exit(1);
return BaseCommand::FAILURE;
}
// Get all documents of given collections.
$documents = $this->documentRepository->findAllByCollectionsLimited(GeneralUtility::intExplode(',', $input->getOption('coll'), true), 0);
} else {
$io->error('ERROR: One of parameters --all|-a or --coll|-c must be given.');
exit(1);
return BaseCommand::FAILURE;
}

foreach ($documents as $id => $document) {
Expand Down Expand Up @@ -187,6 +187,6 @@ protected function execute(InputInterface $input, OutputInterface $output): int

$io->success('All done!');

return 0;
return BaseCommand::SUCCESS;
}
}
69 changes: 45 additions & 24 deletions Classes/Common/AbstractDocument.php
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ abstract public function getDownloadLocation(string $id): string;
* @abstract
*
* @param string $id The "@ID" attribute of the file node (METS) or the "@id" property of the IIIF resource
*
*
* @return array|null The set of file information
*/
abstract public function getFileInfo($id): ?array;
Expand Down Expand Up @@ -581,7 +581,7 @@ public static function &getInstance(string $location, array $settings = [], bool
}

// Sanitize input.
$pid = max(intval($settings['storagePid']), 0);
$pid = max((int) $settings['storagePid'], 0);
if ($documentFormat == 'METS') {
$instance = new MetsDocument($pid, $location, $xml, $settings);
} elseif ($documentFormat == 'IIIF') {
Expand All @@ -590,7 +590,7 @@ public static function &getInstance(string $location, array $settings = [], bool
$instance = new IiifManifest($pid, $location, $iiif);
}

if (!is_null($instance)) {
if ($instance !== null) {
self::setDocumentCache($location, $instance);
}

Expand Down Expand Up @@ -686,19 +686,7 @@ protected function getFullTextFromXml(string $id): string
if (!empty($fileContent) && !empty($this->formats[$textFormat])) {
$textMiniOcr = '';
if (!empty($this->formats[$textFormat]['class'])) {
$class = $this->formats[$textFormat]['class'];
// Get the raw text from class.
if (
class_exists($class)
&& ($obj = GeneralUtility::makeInstance($class)) instanceof FulltextInterface
) {
// Load XML from file.
$ocrTextXml = Helper::getXmlFileAsString($fileContent);
$textMiniOcr = $obj->getTextAsMiniOcr($ocrTextXml);
$this->rawTextArray[$id] = $textMiniOcr;
} else {
$this->logger->warning('Invalid class/method "' . $class . '->getRawText()" for text format "' . $textFormat . '"');
}
$textMiniOcr = $this->getRawTextFromClass($id, $fileContent, $textFormat);
}
$fullText = $textMiniOcr;
} else {
Expand All @@ -707,6 +695,38 @@ class_exists($class)
return $fullText;
}

/**
* Get raw text from class for given format.
*
* @access private
*
* @param $id
* @param $fileContent
* @param $textFormat
*
* @return string
*/
private function getRawTextFromClass($id, $fileContent, $textFormat): string
{
$textMiniOcr = '';
$class = $this->formats[$textFormat]['class'];
// Get the raw text from class.
if (class_exists($class)) {
$obj = GeneralUtility::makeInstance($class);
if ($obj instanceof FulltextInterface) {
// Load XML from file.
$ocrTextXml = Helper::getXmlFileAsString($fileContent);
$textMiniOcr = $obj->getTextAsMiniOcr($ocrTextXml);
$this->rawTextArray[$id] = $textMiniOcr;
} else {
$this->logger->warning('Invalid class/method "' . $class . '->getRawText()" for text format "' . $textFormat . '"');
}
} else {
$this->logger->warning('Class "' . $class . ' does not exists for "' . $textFormat . ' text format"');
}
return $textMiniOcr;
}

/**
* Get format of the OCR full text
*
Expand Down Expand Up @@ -744,7 +764,7 @@ public static function getTitle(int $uid, bool $recursive = false): string
{
$title = '';
// Sanitize input.
$uid = max(intval($uid), 0);
$uid = max($uid, 0);
if ($uid) {
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
->getQueryBuilderForTable('tx_dlf_documents');
Expand All @@ -762,15 +782,16 @@ public static function getTitle(int $uid, bool $recursive = false): string
->setMaxResults(1)
->execute();

if ($resArray = $result->fetchAssociative()) {
$resArray = $result->fetchAssociative();
if ($resArray) {
// Get title information.
$title = $resArray['title'];
$partof = $resArray['partof'];
// Search parent documents recursively for a title?
if (
$recursive
&& empty($title)
&& intval($partof)
&& (int) $partof
&& $partof != $uid
) {
$title = self::getTitle($partof, true);
Expand Down Expand Up @@ -950,7 +971,8 @@ public function registerNamespaces(&$obj): void
*
* @return array
*/
protected function initializeMetadata(string $format): array {
protected function initializeMetadata(string $format): array
{
return [
'title' => [],
'title_sorting' => [],
Expand Down Expand Up @@ -1161,7 +1183,7 @@ protected function magicGetTableOfContents(): array
*/
protected function _setCPid(int $value): void
{
$this->cPid = max(intval($value), 0);
$this->cPid = max($value, 0);
}

/**
Expand Down Expand Up @@ -1258,7 +1280,7 @@ public function __set(string $var, $value): void
*/
private static function getDocumentCache(string $location)
{
$cacheIdentifier = md5($location);
$cacheIdentifier = hash('md5', $location);
$cache = GeneralUtility::makeInstance(CacheManager::class)->getCache('tx_dlf_doc');
$cacheHit = $cache->get($cacheIdentifier);

Expand All @@ -1279,11 +1301,10 @@ private static function getDocumentCache(string $location)
*/
private static function setDocumentCache(string $location, AbstractDocument $currentDocument): void
{
$cacheIdentifier = md5($location);
$cacheIdentifier = hash('md5', $location);
$cache = GeneralUtility::makeInstance(CacheManager::class)->getCache('tx_dlf_doc');

// Save value in cache
$cache->set($cacheIdentifier, $currentDocument);
}

}
Loading

0 comments on commit 07f424d

Please sign in to comment.