-
Notifications
You must be signed in to change notification settings - Fork 1
/
index.js
28 lines (26 loc) · 876 Bytes
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
const pdfjs = require('pdfjs-dist');
const fs = require('fs');
var data = new Uint8Array(fs.readFileSync('./raw/eng/Dean\'s_List_AY2010-11_Sem_2.pdf'));
pdfjs.getDocument(data).then(function (pdfDocument) {
const numPages = pdfDocument.numPages;
const rows = {};
Promise.all(_.range(1, numPages + 1).map(function (pageNum) {
console.log(pageNum);
return new Promise(function (resolve, reject) {
pdfDocument.getPage(pageNum).then((page) => {
page.getTextContent().then((content) => {
content.items.map(function (item) {
const rowId = `${pageNum}-${item.transform[5]}`;
if (!rows.hasOwnProperty(rowId)) {
rows[rowId] = [];
}
rows[rowId].push(item.str);
});
resolve();
});
});
});
})).then(function () {
console.log(rows);
});
});