Skip to content

Commit

Permalink
add pdf to bib (#12138)
Browse files Browse the repository at this point in the history
* add pdf to bib

1. remove double space in unit test
2. fix a logic bug about space.
3. skip the normal space in the title character
4. rename pdf use last name not first name

* fix EqualsAvoidsNull issue

fix EqualsAvoidsNull

* fix unit test issue

fix unit test issue

* remove doi

remove doi
  • Loading branch information
leaf-soba authored Oct 31, 2024
1 parent 36adca5 commit ddecd8f
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,9 @@ private boolean isUnwantedText(TextPosition previousTextPosition, TextPosition t
if (textPosition == null || previousTextPosition == null) {
return false;
}
if (StringUtil.isBlank(textPosition.getUnicode())) {
return true;
}
// The title usually don't in the bottom 10% of a page.
if ((textPosition.getPageHeight() - textPosition.getYDirAdj())
< (textPosition.getPageHeight() * 0.1)) {
Expand Down Expand Up @@ -294,11 +297,11 @@ private boolean isLegalTitle(String candidateText) {
}

private boolean isThereSpace(TextPosition previous, TextPosition current) {
float XspaceThreshold = 0.5F;
float XspaceThreshold = 1F;
float YspaceThreshold = previous.getFontSizeInPt();
float Xgap = current.getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj());
float Ygap = current.getYDirAdj() - (previous.getYDirAdj() - previous.getHeightDir());
return Xgap > XspaceThreshold || Ygap > YspaceThreshold;
return Math.abs(Xgap) > XspaceThreshold || Math.abs(Ygap) > YspaceThreshold;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ void pdfTitleExtraction(String expectedTitle, String filePath) throws Exception

private static Stream<Arguments> providePdfData() {
return Stream.of(
Arguments.of("Fundamentals of Distributed Computing: A Practical Tour of Vector Clock Systems", "/pdfs/PdfContentImporter/Roberto2002.pdf"),
Arguments.of("Fundamentals of Distributed Computing: A Practical Tour of Vector Clock Systems", "/pdfs/PdfContentImporter/Baldoni2002.pdf"),
Arguments.of("On How We Can Teach – Exploring New Ways in Professional Software Development for Students", "/pdfs/PdfContentImporter/Kriha2018.pdf"),
Arguments.of("JabRef Example for Reference Parsing", "/pdfs/IEEE/ieee-paper.pdf"),
Arguments.of("Paper Title", "/org/jabref/logic/importer/util/LNCS-minimal.pdf"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,11 @@ @inproceedings{Keim2024TraceLinks
doi = {10.1145/3597503.3639130}
}

@inproceedings{Baldoni2002Fundamentals,
author = {Roberto Baldoni and Matthias Klusch}
abstract = {A distributed computation consists of a set of processes that cooperate to achieve a common goal. A main characteristic of these computations is that the processes do not already share a common global memory and that they communicate only by exchanging messages over a communication network. Moreover, message transfer delays are finite yet unpredictable. This computation model defines what is known as the asynchronous distributed system model, which includes systems that span large geographic areas and are subject to unpredictable loads.}
title = {Fundamentals of Distributed Computing: A Practical Tour of Vector Clock Systems}
url = {https://www.computer.org/csdl/magazine/ds/2002/02/o2001/13rRUEgs2Q8}
year = {2002}
file = {Baldoni2002.pdf}
}

0 comments on commit ddecd8f

Please sign in to comment.