Skip to content

Commit

Permalink
[MAINTENANCE] Added format unit tests. (#894)
Browse files Browse the repository at this point in the history
Co-authored-by: Oliver Stöhr <[email protected]>
  • Loading branch information
haogatyp and oliver-stoehr authored Oct 26, 2023
1 parent 8ba4471 commit c4aaf4f
Show file tree
Hide file tree
Showing 13 changed files with 1,218 additions and 0 deletions.
66 changes: 66 additions & 0 deletions Tests/Fixtures/Format/alto.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
<?xml version="1.0" encoding="UTF-8"?>
<alto xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://www.loc.gov/standards/alto/ns-v2#">
<Description>
<MeasurementUnit>pixel</MeasurementUnit>
<OCRProcessing ID="IdOcr">
<ocrProcessingStep>
<processingDateTime>2020-05-14</processingDateTime>
<processingSoftware>
<softwareCreator>ABBYY</softwareCreator>
<softwareName>ABBYY FineReader Engine</softwareName>
<softwareVersion>12</softwareVersion>
</processingSoftware>
</ocrProcessingStep>
</OCRProcessing>
</Description>
<Styles> </Styles>
<Layout>
<Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="2546" WIDTH="1801">
<PrintSpace HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0">
<Illustration ID="Page1_Block1" HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0"/>
<TextBlock ID="Page1_Block2" HEIGHT="241" WIDTH="1064" VPOS="2068" HPOS="470" language="de">
<Shape>
<Polygon POINTS="1506,2068 1533,2068 1533,2283 1534,2283 1534,2306 1509,2306 1509,2307 1104,2307 1104,2308 700,2308 700,2309 471,2309 471,2286 470,2286 470,2071 697,2071 697,2070 1101,2070 1101,2069 1506,2069 1506,2068"/>
</Shape>
<TextLine HEIGHT="102" WIDTH="628" VPOS="2076" HPOS="477">
<String WC="0.79777777194976807" CONTENT="Bürgertum" HEIGHT="95" WIDTH="437" VPOS="2083" HPOS="477"/>
<SP WIDTH="34" VPOS="2107" HPOS="915"/>
<String WC="0.66333335638046265" CONTENT="und" HEIGHT="76" WIDTH="155" VPOS="2076" HPOS="950"/>
</TextLine>
<TextLine HEIGHT="104" WIDTH="1051" VPOS="2199" HPOS="477">
<String WC="0.83142858743667603" CONTENT="Bürgerlichkeit" HEIGHT="102" WIDTH="574" VPOS="2201" HPOS="477"/>
<SP WIDTH="32" VPOS="2206" HPOS="1051"/>
<String WC="1." CONTENT="in" HEIGHT="68" WIDTH="74" VPOS="2205" HPOS="1084"/>
<SP WIDTH="34" VPOS="2204" HPOS="1159"/>
<String WC="0.8028571605682373" CONTENT="Dresden" HEIGHT="75" WIDTH="333" VPOS="2199" HPOS="1194"/>
</TextLine>
</TextBlock>
<TextBlock ID="Page1_Block3" HEIGHT="290" WIDTH="775" VPOS="307" HPOS="466" language="de">
<Shape>
<Polygon POINTS="1101,307 1241,307 1241,595 1104,595 1104,596 700,596 700,597 466,597 466,309 697,309 697,308 1101,308 1101,307"/>
</Shape>
<TextLine HEIGHT="98" WIDTH="752" VPOS="315" HPOS="473">
<String WC="0.75625002384185791" CONTENT="DRESDNER" HEIGHT="98" WIDTH="752" VPOS="315" HPOS="473"/>
</TextLine>
<TextLine HEIGHT="97" WIDTH="448" VPOS="492" HPOS="473">
<String WC="0.70399999618530273" CONTENT="HEFTE" HEIGHT="97" WIDTH="448" VPOS="492" HPOS="473"/>
</TextLine>
</TextBlock>
<GraphicalElement ID="Page1_Block4" HEIGHT="14" WIDTH="1674" VPOS="266" HPOS="55"/>
<GraphicalElement ID="Page1_Block5" HEIGHT="15" WIDTH="1674" VPOS="442" HPOS="55"/>
<GraphicalElement ID="Page1_Block6" HEIGHT="30" WIDTH="629" VPOS="680" HPOS="477"/>
<GraphicalElement ID="Page1_Block7" HEIGHT="8" WIDTH="170" VPOS="1963" HPOS="635"/>
<GraphicalElement ID="Page1_Block8" HEIGHT="141" WIDTH="11" VPOS="1019" HPOS="1197"/>
<GraphicalElement ID="Page1_Block9" HEIGHT="168" WIDTH="12" VPOS="948" HPOS="1411"/>
<Illustration ID="Page1_Block10" HEIGHT="175" WIDTH="88" VPOS="1469" HPOS="544">
<Shape>
<Polygon POINTS="544,1469 631,1469 631,1474 632,1474 632,1644 545,1644 545,1477 544,1477 544,1469"/>
</Shape>
</Illustration>
<Illustration ID="Page1_Block11" HEIGHT="207" WIDTH="61" VPOS="1657" HPOS="790"/>
</PrintSpace>
</Page>
</Layout>
</alto>
59 changes: 59 additions & 0 deletions Tests/Fixtures/Format/altoNoString.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
<?xml version="1.0" encoding="UTF-8"?>
<alto xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://www.loc.gov/standards/alto/ns-v2#">
<Description>
<MeasurementUnit>pixel</MeasurementUnit>
<OCRProcessing ID="IdOcr">
<ocrProcessingStep>
<processingDateTime>2020-05-14</processingDateTime>
<processingSoftware>
<softwareCreator>ABBYY</softwareCreator>
<softwareName>ABBYY FineReader Engine</softwareName>
<softwareVersion>12</softwareVersion>
</processingSoftware>
</ocrProcessingStep>
</OCRProcessing>
</Description>
<Styles> </Styles>
<Layout>
<Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="2546" WIDTH="1801">
<PrintSpace HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0">
<Illustration ID="Page1_Block1" HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0"/>
<TextBlock ID="Page1_Block2" HEIGHT="241" WIDTH="1064" VPOS="2068" HPOS="470" language="de">
<Shape>
<Polygon POINTS="1506,2068 1533,2068 1533,2283 1534,2283 1534,2306 1509,2306 1509,2307 1104,2307 1104,2308 700,2308 700,2309 471,2309 471,2286 470,2286 470,2071 697,2071 697,2070 1101,2070 1101,2069 1506,2069 1506,2068"/>
</Shape>
<TextLine HEIGHT="102" WIDTH="628" VPOS="2076" HPOS="477">
<SP WIDTH="34" VPOS="2107" HPOS="915"/>
</TextLine>
<TextLine HEIGHT="104" WIDTH="1051" VPOS="2199" HPOS="477">
<SP WIDTH="32" VPOS="2206" HPOS="1051"/>
<SP WIDTH="34" VPOS="2204" HPOS="1159"/>
</TextLine>
</TextBlock>
<TextBlock ID="Page1_Block3" HEIGHT="290" WIDTH="775" VPOS="307" HPOS="466" language="de">
<Shape>
<Polygon POINTS="1101,307 1241,307 1241,595 1104,595 1104,596 700,596 700,597 466,597 466,309 697,309 697,308 1101,308 1101,307"/>
</Shape>
<TextLine HEIGHT="98" WIDTH="752" VPOS="315" HPOS="473">
</TextLine>
<TextLine HEIGHT="97" WIDTH="448" VPOS="492" HPOS="473">
</TextLine>
</TextBlock>
<GraphicalElement ID="Page1_Block4" HEIGHT="14" WIDTH="1674" VPOS="266" HPOS="55"/>
<GraphicalElement ID="Page1_Block5" HEIGHT="15" WIDTH="1674" VPOS="442" HPOS="55"/>
<GraphicalElement ID="Page1_Block6" HEIGHT="30" WIDTH="629" VPOS="680" HPOS="477"/>
<GraphicalElement ID="Page1_Block7" HEIGHT="8" WIDTH="170" VPOS="1963" HPOS="635"/>
<GraphicalElement ID="Page1_Block8" HEIGHT="141" WIDTH="11" VPOS="1019" HPOS="1197"/>
<GraphicalElement ID="Page1_Block9" HEIGHT="168" WIDTH="12" VPOS="948" HPOS="1411"/>
<Illustration ID="Page1_Block10" HEIGHT="175" WIDTH="88" VPOS="1469" HPOS="544">
<Shape>
<Polygon POINTS="544,1469 631,1469 631,1474 632,1474 632,1644 545,1644 545,1477 544,1477 544,1469"/>
</Shape>
</Illustration>
<Illustration ID="Page1_Block11" HEIGHT="207" WIDTH="61" VPOS="1657" HPOS="790"/>
</PrintSpace>
</Page>
</Layout>
</alto>
38 changes: 38 additions & 0 deletions Tests/Fixtures/Format/altoNoTextBlock.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
<?xml version="1.0" encoding="UTF-8"?>
<alto xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://www.loc.gov/standards/alto/ns-v2#">
<Description>
<MeasurementUnit>pixel</MeasurementUnit>
<OCRProcessing ID="IdOcr">
<ocrProcessingStep>
<processingDateTime>2020-05-14</processingDateTime>
<processingSoftware>
<softwareCreator>ABBYY</softwareCreator>
<softwareName>ABBYY FineReader Engine</softwareName>
<softwareVersion>12</softwareVersion>
</processingSoftware>
</ocrProcessingStep>
</OCRProcessing>
</Description>
<Styles> </Styles>
<Layout>
<Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="2546" WIDTH="1801">
<PrintSpace HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0">
<Illustration ID="Page1_Block1" HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0"/>
<GraphicalElement ID="Page1_Block4" HEIGHT="14" WIDTH="1674" VPOS="266" HPOS="55"/>
<GraphicalElement ID="Page1_Block5" HEIGHT="15" WIDTH="1674" VPOS="442" HPOS="55"/>
<GraphicalElement ID="Page1_Block6" HEIGHT="30" WIDTH="629" VPOS="680" HPOS="477"/>
<GraphicalElement ID="Page1_Block7" HEIGHT="8" WIDTH="170" VPOS="1963" HPOS="635"/>
<GraphicalElement ID="Page1_Block8" HEIGHT="141" WIDTH="11" VPOS="1019" HPOS="1197"/>
<GraphicalElement ID="Page1_Block9" HEIGHT="168" WIDTH="12" VPOS="948" HPOS="1411"/>
<Illustration ID="Page1_Block10" HEIGHT="175" WIDTH="88" VPOS="1469" HPOS="544">
<Shape>
<Polygon POINTS="544,1469 631,1469 631,1474 632,1474 632,1644 545,1644 545,1477 544,1477 544,1469"/>
</Shape>
</Illustration>
<Illustration ID="Page1_Block11" HEIGHT="207" WIDTH="61" VPOS="1657" HPOS="790"/>
</PrintSpace>
</Page>
</Layout>
</alto>
48 changes: 48 additions & 0 deletions Tests/Fixtures/Format/altoNoTextLine.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?xml version="1.0" encoding="UTF-8"?>
<alto xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://www.loc.gov/standards/alto/ns-v2#">
<Description>
<MeasurementUnit>pixel</MeasurementUnit>
<OCRProcessing ID="IdOcr">
<ocrProcessingStep>
<processingDateTime>2020-05-14</processingDateTime>
<processingSoftware>
<softwareCreator>ABBYY</softwareCreator>
<softwareName>ABBYY FineReader Engine</softwareName>
<softwareVersion>12</softwareVersion>
</processingSoftware>
</ocrProcessingStep>
</OCRProcessing>
</Description>
<Styles> </Styles>
<Layout>
<Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="2546" WIDTH="1801">
<PrintSpace HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0">
<Illustration ID="Page1_Block1" HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0"/>
<TextBlock ID="Page1_Block2" HEIGHT="241" WIDTH="1064" VPOS="2068" HPOS="470" language="de">
<Shape>
<Polygon POINTS="1506,2068 1533,2068 1533,2283 1534,2283 1534,2306 1509,2306 1509,2307 1104,2307 1104,2308 700,2308 700,2309 471,2309 471,2286 470,2286 470,2071 697,2071 697,2070 1101,2070 1101,2069 1506,2069 1506,2068"/>
</Shape>
</TextBlock>
<TextBlock ID="Page1_Block3" HEIGHT="290" WIDTH="775" VPOS="307" HPOS="466" language="de">
<Shape>
<Polygon POINTS="1101,307 1241,307 1241,595 1104,595 1104,596 700,596 700,597 466,597 466,309 697,309 697,308 1101,308 1101,307"/>
</Shape>
</TextBlock>
<GraphicalElement ID="Page1_Block4" HEIGHT="14" WIDTH="1674" VPOS="266" HPOS="55"/>
<GraphicalElement ID="Page1_Block5" HEIGHT="15" WIDTH="1674" VPOS="442" HPOS="55"/>
<GraphicalElement ID="Page1_Block6" HEIGHT="30" WIDTH="629" VPOS="680" HPOS="477"/>
<GraphicalElement ID="Page1_Block7" HEIGHT="8" WIDTH="170" VPOS="1963" HPOS="635"/>
<GraphicalElement ID="Page1_Block8" HEIGHT="141" WIDTH="11" VPOS="1019" HPOS="1197"/>
<GraphicalElement ID="Page1_Block9" HEIGHT="168" WIDTH="12" VPOS="948" HPOS="1411"/>
<Illustration ID="Page1_Block10" HEIGHT="175" WIDTH="88" VPOS="1469" HPOS="544">
<Shape>
<Polygon POINTS="544,1469 631,1469 631,1474 632,1474 632,1644 545,1644 545,1477 544,1477 544,1469"/>
</Shape>
</Illustration>
<Illustration ID="Page1_Block11" HEIGHT="207" WIDTH="61" VPOS="1657" HPOS="790"/>
</PrintSpace>
</Page>
</Layout>
</alto>
36 changes: 36 additions & 0 deletions Tests/Fixtures/Format/audioVideo.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<?xml version="1.0" encoding="UTF-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/"
xmlns:mods="http://www.loc.gov/mods/v3"
xmlns:videomd="http://www.loc.gov/videoMD/"
xmlns:audiomd="http://www.loc.gov/audioMD/"
xmlns:dv="http://dfg-viewer.de/"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/mods.xsd http://www.loc.gov/videoMD/ https://www.loc.gov/standards/amdvmd/videoMD.xsd">
<mets:amdSec ID="AMD">
<mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="VIDEOMD">
<mets:xmlData>
<videomd:fileData>
<videomd:color>Color</videomd:color>
<videomd:compression>
<videomd:codecCreatorApp>Phoenix Finish</videomd:codecCreatorApp>
<videomd:codecName>Apple ProRes 4444</videomd:codecName>
</videomd:compression>
<videomd:frameRate mode="Fixed">24</videomd:frameRate>
<videomd:sound>Yes</videomd:sound>
</videomd:fileData>
<videomd:videoInfo>
<videomd:aspectRatio>1.375:1</videomd:aspectRatio>
<videomd:dimensions LENGTH="1808" UNITS="frames" />
<videomd:duration>00:01:30.07</videomd:duration>
<videomd:frame>
<videomd:frameRate>24</videomd:frameRate>
</videomd:frame>
</videomd:videoInfo>
<audiomd:audioInfo>
<audiomd:duration>01:10:35.08</audiomd:duration>
</audiomd:audioInfo>
</mets:xmlData>
</mets:mdWrap>
</mets:amdSec>
</mets:mets>
Loading

0 comments on commit c4aaf4f

Please sign in to comment.