Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move to libzim7 #1702

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# top-most EditorConfig file
root = true

# Unix-style newlines with a newline ending every file
[*]
charset = utf-8
trim_trailing_whitespace = true
end_of_line = lf
insert_final_newline = true

# Tab indentation (no size specified)
[Makefile]
indent_style = tab

[*.{c,h,cpp,cpp,hpp}]
indent_size = 4

[*.{js,ts}]
indent_size = 2
7,509 changes: 2,700 additions & 4,809 deletions package-lock.json

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,12 @@
},
"dependencies": {
"@ladjs/country-language": "^1.0.0",
"@openzim/libzim": "2.4.4",
"@openzim/libzim": "github:openzim/node-libzim",
"@types/async": "^3.2.15",
"@types/backoff": "^2.5.1",
"@types/bluebird": "^3.5.32",
"@types/file-type": "^10.9.1",
"file-type": "^16.5.3",
"@types/html-minifier": "^4.0.0",
"@types/imagemin": "^7.0.0",
"@types/imagemin-gifsicle": "^7.0.0",
Expand Down
87 changes: 38 additions & 49 deletions src/mwoffliner.lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import urlParser from 'url';
import semver from 'semver';
import * as path from 'path';
import * as QueryStringParser from 'querystring';
import { ZimArticle, ZimCreator } from '@openzim/libzim';
import { Creator, StringItem, Compression } from '@openzim/libzim';

import {
articleDetailXId,
Expand Down Expand Up @@ -398,27 +398,31 @@ async function execute(argv: any) {
logger.log(`Writing zim to [${outZim}]`);
dump.outFile = outZim;

const zimCreator = new ZimCreator({
fileName: outZim,
fullTextIndexLanguage: dump.opts.withoutZimFullTextIndex ? '' : dump.mwMetaData.langIso3,
welcome: (dump.opts.mainPage ? dump.opts.mainPage : 'index'),
compression: 'zstd',
}, {
const language = dump.opts.withoutZimFullTextIndex ? '' : dump.mwMetaData.langIso3;
const zimCreator = new Creator();
zimCreator
.configIndexing(true, language)
.configCompression(Compression.Zstd)
.startZimCreation(outZim);

zimCreator.setMainPath(dump.opts.mainPage ?? 'index');

const metadata = {
Tags: dump.computeZimTags(),
Language: dump.mwMetaData.langIso3,
Date: (new Date()).toJSON().split('T')[0],
Title: dump.opts.customZimTitle || dump.mwMetaData.title,
Name: dump.computeFilenameRadical(false, true, true),
Flavour: dump.computeFlavour(),
Description: dump.opts.customZimDescription || dump.mwMetaData.subTitle,
Creator: dump.mwMetaData.creator,
Publisher: dump.opts.publisher,
});
const scraperArticle = new ZimArticle({
ns: 'M',
data: `mwoffliner ${packageJSON.version}`,
url: 'Scraper',
});
zimCreator.addArticle(scraperArticle);
Scraper: `mwoffliner ${packageJSON.version}`,
};
for (const [name, content] of Object.entries(metadata)) {
if(!content) continue;
zimCreator.addMetadata(name, content);
}

logger.info('Copying Static Resource Files');
await saveStaticFiles(config, zimCreator);
Expand All @@ -433,8 +437,8 @@ async function execute(argv: any) {
} = await getAndProcessStylesheets(downloader, stylesheetsToGet);
logger.log(`Downloaded stylesheets`);

const article = new ZimArticle({ url: `${config.output.dirs.mediawiki}/style.css`, data: finalCss, ns: '-' });
zimCreator.addArticle(article);
const item = new StringItem(`${config.output.dirs.mediawiki}/style.css`, 'text/css', '', {}, finalCss);
await zimCreator.addItem(item);
await saveFavicon(dump, zimCreator);

await getThumbnailsData();
Expand Down Expand Up @@ -475,7 +479,7 @@ async function execute(argv: any) {
await writeArticleRedirects(downloader, dump, zimCreator);

logger.log(`Finishing Zim Creation`);
await zimCreator.finalise();
await zimCreator.finishZimCreation();

logger.log(`Summary of scrape actions:`, JSON.stringify(dump.status, null, '\t'));
}
Expand All @@ -484,41 +488,35 @@ async function execute(argv: any) {
/* FUNCTIONS *********************** */
/* ********************************* */

async function writeArticleRedirects(downloader: Downloader, dump: Dump, zimCreator: ZimCreator) {
async function writeArticleRedirects(downloader: Downloader, dump: Dump, zimCreator: Creator) {
await redirectsXId.iterateItems(
downloader.speed,
async (redirects) => {
for (const [redirectId, { targetId, title }] of Object.entries(redirects)) {
if (redirectId !== targetId) {
const redirectArticle = new ZimArticle({
url: redirectId,
shouldIndex: true,
data: '',
ns: 'A',
mimeType: 'text/html',

// We fake a title, by just removing the underscores
title: String(redirectId).replace(/_/g, ' '),

redirectUrl: targetId,
});
zimCreator.addArticle(redirectArticle);
// We fake a title, by just removing the underscores
const title = String(redirectId).replace(/_/g, ' ');
zimCreator.addRedirection(
redirectId,
title,
targetId,
);
dump.status.redirects.written += 1;
}
}
},
);
}

async function saveFavicon(dump: Dump, zimCreator: ZimCreator): Promise<{}> {
async function saveFavicon(dump: Dump, zimCreator: Creator): Promise<void> {
logger.log('Saving favicon.png...');

async function saveFavicon(zimCreator: ZimCreator, faviconPath: string): Promise<{}> {
async function saveFavicon(zimCreator: Creator, faviconPath: string): Promise<void> {
try {
const source = await fs.promises.readFile(faviconPath);
const data = await sharp(source).resize(48, 48, { fit: sharp.fit.inside, withoutEnlargement: true }).png().toBuffer();
const article = new ZimArticle({ url: 'favicon', mimeType: 'image/png', data, ns: '-' });
return zimCreator.addArticle(article);
const data = await sharp(source).resize(48, 48, { fit: sharp.fit.inside, withoutEnlargement: true }).png().toBuffer().toString();
const item = new StringItem('favicon', 'image/png', '', {}, data);
return await zimCreator.addItem(item);
} catch (e) {
throw new Error('Failed to save favicon using sharp');
}
Expand All @@ -541,7 +539,7 @@ async function execute(argv: any) {
return await saveFavicon(zimCreator, faviconPath);
}

function getMainPage(dump: Dump, zimCreator: ZimCreator, downloader: Downloader) {
function getMainPage(dump: Dump, zimCreator: Creator, downloader: Downloader) {
async function createMainPage() {
logger.log('Creating main page...');
const doc = domino.createDocument(
Expand Down Expand Up @@ -580,22 +578,13 @@ async function execute(argv: any) {
}

/* Write the static html file */
const article = new ZimArticle({ url: 'index', data: doc.documentElement.outerHTML, ns: 'A', mimeType: 'text/html', title: 'Main Page' });
return zimCreator.addArticle(article);
const item = new StringItem('index', 'text/html', 'Main Page', {FRONT_ARTICLE: 1}, doc.documentElement.outerHTML);
return await zimCreator.addItem(item);
}

function createMainPageRedirect() {
logger.log(`Create main page redirection from [index] to [${'A/' + mainPage}]`);
const article = new ZimArticle({
url: 'index',
shouldIndex: true,
data: '',
ns: 'A',
mimeType: 'text/html',
title: mainPage,
redirectUrl: mainPage,
});
return zimCreator.addArticle(article);
zimCreator.addRedirection('index', mainPage, mainPage, {FRONT_ARTICLE: 1});
}

return mainPage ? createMainPageRedirect() : createMainPage();
Expand Down
53 changes: 26 additions & 27 deletions src/util/dump.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import Downloader from '../Downloader';
import { getFullUrl, jsPath, cssPath } from '.';
import { config } from '../config';
import MediaWiki from '../MediaWiki';
import { ZimCreator, ZimArticle } from '@openzim/libzim';
import { Creator, StringItem } from '@openzim/libzim';
import { Dump } from '../Dump';
import { filesToDownloadXPath } from '../stores';
import fs from 'fs'
Expand Down Expand Up @@ -83,7 +83,7 @@ export async function getAndProcessStylesheets(downloader: Downloader, links: Ar
});
}

export async function downloadAndSaveModule(zimCreator: ZimCreator, mw: MediaWiki, downloader: Downloader, dump: Dump, module: string, type: 'js' | 'css') {
export async function downloadAndSaveModule(zimCreator: Creator, mw: MediaWiki, downloader: Downloader, dump: Dump, module: string, type: 'js' | 'css') {
// param :
// module : string : the name of the module
// moduleUri : string : the path where the module will be saved into the zim
Expand Down Expand Up @@ -126,8 +126,9 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, mw: MediaWik
const articleId = type === 'js'
? jsPath(module, config.output.dirs.mediawiki)
: cssPath(module, config.output.dirs.mediawiki);
const article = new ZimArticle({ url: articleId, data: text, ns: '-' });
zimCreator.addArticle(article);
const mimeType = (type === 'js') ? 'application/javascript' : 'text/css';
const item = new StringItem(articleId, mimeType, '', {}, text);
await zimCreator.addItem(item);
logger.info(`Saved module [${module}]`);
} catch (e) {
logger.error(`Failed to get module with url [${moduleApiUrl}]\nYou may need to specify a custom --mwModulePath`, e);
Expand All @@ -136,29 +137,27 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, mw: MediaWik
}

// URLs should be kept the same as Kiwix JS relies on it.
export async function importPolyfillModules(zimCreator: ZimCreator) {
[
{ name: 'webpHeroPolyfill', path: 'webp-hero/dist-cjs/polyfills.js' },
{ name: 'webpHeroBundle', path: 'webp-hero/dist-cjs/webp-hero.bundle.js' }
].forEach( ({name, path}) => {
const article = new ZimArticle({
url: jsPath(name),
data: fs.readFileSync(require.resolve(path), 'utf8').toString(),
ns: '-'
});
zimCreator.addArticle(article);
export async function importPolyfillModules(zimCreator: Creator) {
const polyfills = [
{ name: 'webpHeroPolyfill', path: 'webp-hero/dist-cjs/polyfills.js' },
{ name: 'webpHeroBundle', path: 'webp-hero/dist-cjs/webp-hero.bundle.js' }
];
for(const {name, path} of polyfills) {
const url = jsPath(name);
const mimeType = 'application/javascript';
const data = fs.readFileSync(require.resolve(path), 'utf8').toString();
const item = new StringItem(url, mimeType, name, {}, data);
await zimCreator.addItem(item);
}

const content = await axios.get(WEBP_HANDLER_URL, {responseType: 'arraybuffer', timeout: 60000, validateStatus(status) { return ([200, 302, 304].indexOf(status) > -1); }})
.then((a) => a.data)
.catch((err) => {
throw new Error(`Failed to download webpHandler from [${WEBP_HANDLER_URL}]: ${err}`);
});

const content = await axios.get(WEBP_HANDLER_URL, {responseType: 'arraybuffer', timeout: 60000, validateStatus(status) { return ([200, 302, 304].indexOf(status) > -1); }})
.then((a) => a.data)
.catch((err) => {
throw new Error(`Failed to download webpHandler from [${WEBP_HANDLER_URL}]: ${err}`);
});

const article = new ZimArticle({
url: jsPath('webpHandler'),
data: content,
ns: '-'
});
zimCreator.addArticle(article);
const url = jsPath('webpHandler');
const mimeType = 'application/javascript';
const item = new StringItem(url, mimeType, '', {}, content);
await zimCreator.addItem(item);
}
21 changes: 12 additions & 9 deletions src/util/misc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import fs from 'fs';
import path from 'path';
import mkdirp from 'mkdirp';
import pathParser from 'path';
import { ZimCreator, ZimArticle } from '@openzim/libzim';
import { Creator, StringItem } from '@openzim/libzim';
import { Config, config } from '../config';
import logger from '../Logger';
import { LATEX_IMAGE_URL_REGEX, WIKIHIERO_IMAGE_URL_REGEX, IMAGE_THUMB_URL_REGEX, FIND_HTTP_REGEX, IMAGE_URL_REGEX, BITMAP_IMAGE_MIME_REGEX, IMAGE_MIME_REGEX,
Expand Down Expand Up @@ -145,24 +145,27 @@ export function interpolateTranslationString(str: string, parameters: { [key: st
return newString;
}

export function saveStaticFiles(config: Config, zimCreator: ZimCreator) {
export function saveStaticFiles(config: Config, zimCreator: Creator) {
const cssPromises = config.output.cssResources
.concat(config.output.mainPageCssResources)
.map(async (css) => {
try {
const cssCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/${css}.css`));
const article = new ZimArticle({ url: cssPath(css), data: cssCont, ns: '-' });
zimCreator.addArticle(article);
const cssCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/${css}.css`)).toString();
const url = cssPath(css);
const item = new StringItem(url, 'text/css', '', {}, cssCont);
await zimCreator.addItem(item);
} catch (error) {
logger.warn(`Could not create ${css} file : ${error}`);
}
});

const jsPromises = config.output.jsResources.map(async (js) => {
try {
const jsCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/${js}.js`));
const article = new ZimArticle({ url: jsPath(js), data: jsCont, ns: '-' });
zimCreator.addArticle(article);
const jsCont = await readFilePromise(pathParser.resolve(__dirname, `../../res/${js}.js`)).toString();
const url = jsPath(js);
const mimeType = 'application/javascript';
const item = new StringItem(url, mimeType, '', {}, jsCont);
await zimCreator.addItem(item);
} catch (error) {
logger.warn(`Could not create ${js} file : ${error}`);
}
Expand Down Expand Up @@ -360,4 +363,4 @@ export function isBitmapImageMimeType(mimeType: string): boolean {

export function isWebpCandidateImageMimeType(webp: boolean, content_type: string) {
return webp && WEBP_CANDIDATE_IMAGE_MIME_TYPE.test(content_type);
}
}
Loading