Skip to content

Commit

Permalink
find the title from second page (#12157)
Browse files Browse the repository at this point in the history
* find the title from second page

find the title from second page

* fix method name

fix method name

* add a unit test file with a test cover page

add a unit test file with a test cover page

* change unit test cover page file

change unit test cover page file

* fix the pdf file with consistency cover

fix the pdf file with consistency cover
  • Loading branch information
leaf-soba authored Nov 25, 2024
1 parent a86adbb commit 77907c8
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ public ParserResult importDatabase(Path filePath) {

private static String extractTitleFromDocument(PDDocument document) throws IOException {
TitleExtractorByFontSize stripper = new TitleExtractorByFontSize();
return stripper.getTitleFromFirstPage(document);
return stripper.getTitle(document);
}

private static class TitleExtractorByFontSize extends PDFTextStripper {
Expand All @@ -230,9 +230,9 @@ public TitleExtractorByFontSize() {
this.textPositionsList = new ArrayList<>();
}

public String getTitleFromFirstPage(PDDocument document) throws IOException {
public String getTitle(PDDocument document) throws IOException {
this.setStartPage(1);
this.setEndPage(1);
this.setEndPage(2);
this.writeText(document, new StringWriter());
return findLargestFontText(textPositionsList);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ void pdfTitleExtraction(String expectedTitle, String filePath) throws Exception
private static Stream<Arguments> providePdfData() {
return Stream.of(
Arguments.of("Fundamentals of Distributed Computing: A Practical Tour of Vector Clock Systems", "/pdfs/PdfContentImporter/Baldoni2002.pdf"),
Arguments.of("JabRef Example for Reference Parsing", "/pdfs/IEEE/ieee-paper-cover.pdf"),
Arguments.of("On How We Can Teach – Exploring New Ways in Professional Software Development for Students", "/pdfs/PdfContentImporter/Kriha2018.pdf"),
Arguments.of("JabRef Example for Reference Parsing", "/pdfs/IEEE/ieee-paper.pdf"),
Arguments.of("Paper Title", "/org/jabref/logic/importer/util/LNCS-minimal.pdf"),
Expand Down
Binary file not shown.

0 comments on commit 77907c8

Please sign in to comment.