-
Notifications
You must be signed in to change notification settings - Fork 1
/
fetch.js
82 lines (78 loc) · 2.65 KB
/
fetch.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
const fs = require("fs");
const ora = require("ora");
const fetchDataset = require("./fetchDataset");
const toJson = require("./toJson");
const makeIndex = require("./makeIndex");
const datasets = require("./datasets.json");
// convert XML files in given dataset to an array of JSON structures
const getDatasetJson = async (type, url) => {
const fiches = [];
await fetchDataset(type, url, async (entry, downloadSpinner) => {
const chunks = [];
entry
.on("data", (buf) => chunks.push(buf))
.on("end", () => {
const str = Buffer.concat(chunks).toString("utf8");
if (/��/.test(str)) {
console.error(entry.path, "💀 💩 ☠️ ");
process.exit(1);
}
try {
const json = toJson(str);
fiches.push({
id: entry.path.replace(/\.xml$/, ""),
...json,
});
} catch (err) {
downloadSpinner.warn(
`Error while parsing "${entry.path}" of "${type}": ${err}`
);
}
});
});
return fiches;
};
const fetchAll = async () => {
for (const [type, url] of Object.entries(datasets)) {
fs.mkdirSync(`./data/${type}`, { recursive: true });
const fiches = await getDatasetJson(type, url);
const writeSpinner = ora(`Writing "${type}" fiches`).start();
const fichesIdArray = fiches
.map((fiche) => {
const fileName = `./data/${type}/${fiche.id}.json`;
const fileContent = JSON.stringify(fiche, null, 2);
if (fileContent.length > 100000000) {
writeSpinner.warn(
`Error saving "${fileName}": Size is too big ${
fileContent.length / 1000000
}MB (git limitation is 100MB). If you need this file, please consider using git-lfs or compression.`
);
return undefined;
} else {
try {
fs.writeFileSync(fileName, fileContent);
} catch (err) {
writeSpinner.warn(`Error saving "${fileName}": ${err.message}`);
}
}
return fiche.id;
})
.filter((ficheId) => ficheId !== undefined);
const indexName = `./data/${type}/index.json`;
try {
fs.writeFileSync(indexName, JSON.stringify(fichesIdArray, null, 2));
} catch (err) {
writeSpinner.fail(`Error saving "${indexName}" : ${err.message}`);
}
writeSpinner.succeed(`Files "${type}" successfully written `);
}
};
if (require.main === module) {
fetchAll()
.then(() => {
const index = makeIndex();
fs.writeFileSync("./data/index.json", JSON.stringify(index, null, 2));
console.log(`Summary dumped to data/index.json`);
})
.catch(console.log);
}