From fe301bd7d68ebcc3807a90667f27339bab7bf2cf Mon Sep 17 00:00:00 2001 From: chris <1010084+cloverich@users.noreply.github.com> Date: Tue, 3 Dec 2024 08:42:12 -0800 Subject: [PATCH] Import non-Notion markdown directory (#273) add experimental markdown / Obsidian importer - add source type to importer to support notion vs non-notion - conditionally strip and track notion Id from note title - resolve and convert [[Wikilinks]] - track and convert inline #tags - skip title parsing from front matter; fallback to file name when non-Notion import - use birthtime and mtime, not ctime, for default note creation date when front-matter not present - refactor file moving and resolving to walk all files, then move if referenced by a note - (bugfix) check for empty / null value when selecting new chronicles root - it implies the user clicked cancel - drop legacy importer code (was unused and kept for reference) --- src/electron/migrations/20211005142122.sql | 4 +- src/markdown/README.md | 2 + src/markdown/index.test.ts | 31 +- src/markdown/index.ts | 25 +- src/markdown/mdast-util-ofm-tag/index.ts | 17 + .../mdast-util-ofm-tag/lib/fromMarkdown.ts | 26 ++ .../mdast-util-ofm-tag/lib/toMarkdown.ts | 15 + .../micromark-extension-ofm-tag/index.ts | 10 + .../micromark-extension-ofm-tag/lib/syntax.ts | 131 ++++++ src/markdown/test-utils.ts | 8 +- src/preload/client/importer.ts | 441 ++++++++---------- .../client/importer/FilesImportResolver.ts | 283 +++++++++++ src/preload/client/importer/SourceType.ts | 5 + src/preload/client/importer/frontmatter.ts | 23 +- src/preload/client/importer/importer.test.ts | 43 +- src/preload/client/preferences.ts | 3 +- .../importer/legacy/importChronicles.ts | 164 ------- src/preload/importer/legacy/indexer.ts | 274 ----------- src/views/preferences/index.tsx | 39 +- 19 files changed, 777 insertions(+), 767 deletions(-) create mode 100644 src/markdown/mdast-util-ofm-tag/index.ts create mode 100644 src/markdown/mdast-util-ofm-tag/lib/fromMarkdown.ts create mode 100644 src/markdown/mdast-util-ofm-tag/lib/toMarkdown.ts create mode 100644 src/markdown/micromark-extension-ofm-tag/index.ts create mode 100644 src/markdown/micromark-extension-ofm-tag/lib/syntax.ts create mode 100644 src/preload/client/importer/FilesImportResolver.ts create mode 100644 src/preload/client/importer/SourceType.ts delete mode 100644 src/preload/importer/legacy/importChronicles.ts delete mode 100644 src/preload/importer/legacy/indexer.ts diff --git a/src/electron/migrations/20211005142122.sql b/src/electron/migrations/20211005142122.sql index e6e010e..2fd4a5f 100644 --- a/src/electron/migrations/20211005142122.sql +++ b/src/electron/migrations/20211005142122.sql @@ -58,9 +58,10 @@ CREATE TABLE IF NOT EXISTS "imports" ( CREATE TABLE IF NOT EXISTS "import_files" ( "importerId" TEXT NOT NULL, - "sourcePathResolved" TEXT NOT NULL PRIMARY KEY, "status" TEXT NOT NULL DEFAULT "pending", "chroniclesId" TEXT NOT NULL, + "sourcePathResolved" TEXT NOT NULL PRIMARY KEY, + "filename" TEXT NOT NULL, -- filename without extension "extension" TEXT NOT NULL, "error" TEXT ); @@ -71,6 +72,7 @@ CREATE TABLE IF NOT EXISTS "import_notes" ( "status" TEXT NOT NULL, -- success, error "chroniclesId" TEXT NOT NULL, "chroniclesPath" TEXT NOT NULL, + -- todo: sourcePath + hash of content "sourcePath" TEXT NOT NULL PRIMARY KEY, "sourceId" TEXT, "error" BOOLEAN, diff --git a/src/markdown/README.md b/src/markdown/README.md index 78db0ba..626f61c 100644 --- a/src/markdown/README.md +++ b/src/markdown/README.md @@ -13,3 +13,5 @@ https://github.com/inokawa/remark-slate-transformer/issues/67 # mdast-util-ofm Partial fork of https://github.com/MoritzRS/obsidian-ext initially because of ESM import issues. I need no immediate modifications of this project and if / once this project completes esm updates may be preferable to move this back to a dependency. + +ofm-\* packages are Copyright Moritz R. Schulz and MIT licensed diff --git a/src/markdown/index.test.ts b/src/markdown/index.test.ts index 5464af3..995a77b 100644 --- a/src/markdown/index.test.ts +++ b/src/markdown/index.test.ts @@ -4,7 +4,7 @@ import { describe, it } from "mocha"; import path from "path"; import { slateToString, stringToSlate } from "./index.js"; -import { dig, parseMarkdown } from "./test-utils.js"; +import { dig, parseMarkdown, parseMarkdownForImport } from "./test-utils.js"; // Tests can structure the data this way and use runTests to // test the various conversions. @@ -41,7 +41,7 @@ function outputMarkdown(markdown: string | { in: string; out: string }) { // but is sometimes configurable (ex: options -> bullet) // - markdown (string)->mdast // - markdown (string)->slate -function runTests(doc: TestDoc) { +function runTests(doc: TestDoc, parser = parseMarkdown) { it("roundtrips", function () { const result = slateToString(stringToSlate(inputMarkdown(doc.markdown))); @@ -54,14 +54,14 @@ function runTests(doc: TestDoc) { // round trip properly if it does not parse at all (ex: wikilinks without a handler) if (doc.mdast) { it("markdown->mdast", function () { - const result = parseMarkdown(inputMarkdown(doc.markdown)); + const result = parser(inputMarkdown(doc.markdown)); expect(result).to.deep.equal(doc.mdast); }); } if (doc.slate) { it("markdown->slate", function () { - const result = stringToSlate(outputMarkdown(doc.markdown)); + const result = stringToSlate(outputMarkdown(doc.markdown), parser); expect(result).to.deep.equal(doc.slate); }); } @@ -477,7 +477,28 @@ describe("[[Wikilinks]]", function () { ], }; - runTests(doc); + runTests(doc, parseMarkdownForImport); +}); + +describe("mdast-util-ofm-tag", async () => { + const doc = { + markdown: "a #b c", + mdast: { + type: "root", + children: [ + { + type: "paragraph", + children: [ + { type: "text", value: "a " }, + { type: "ofmTag", value: "b" }, + { type: "text", value: " c" }, + ], + }, + ], + }, + }; + + runTests(doc, parseMarkdownForImport); }); // A place to put behavior that is not yet handled correctly; so I can store test diff --git a/src/markdown/index.ts b/src/markdown/index.ts index baf3b7b..1b5af6f 100644 --- a/src/markdown/index.ts +++ b/src/markdown/index.ts @@ -8,7 +8,9 @@ import { fromMarkdown } from "mdast-util-from-markdown"; import { gfmFromMarkdown, gfmToMarkdown } from "mdast-util-gfm"; import { toMarkdown } from "mdast-util-to-markdown"; import { gfm } from "micromark-extension-gfm"; +import { ofmTagFromMarkdown } from "./mdast-util-ofm-tag"; import { ofmWikilinkFromMarkdown } from "./mdast-util-ofm-wikilink"; +import { ofmTag } from "./micromark-extension-ofm-tag"; import { ofmWikilink } from "./micromark-extension-ofm-wikilink"; import { mdastToSlate } from "./remark-slate-transformer/transformers/mdast-to-slate.js"; @@ -47,10 +49,23 @@ function wrapImages(tree: mdast.Root) { return tree; } +// The importer has additional support for #tag and [[WikiLink]], but converts them +// to Chronicles tags and markdown links. Future versions may support these properly. +export const parseMarkdownForImport = (markdown: string): mdast.Root => { + return fromMarkdown(markdown, { + extensions: [gfm(), ofmTag(), ofmWikilink()], + mdastExtensions: [ + gfmFromMarkdown(), + ofmTagFromMarkdown(), + ofmWikilinkFromMarkdown(), + ], + }); +}; + export const parseMarkdown = (markdown: string): mdast.Root => { return fromMarkdown(markdown, { - extensions: [gfm(), ofmWikilink()], - mdastExtensions: [gfmFromMarkdown(), ofmWikilinkFromMarkdown()], + extensions: [gfm()], + mdastExtensions: [gfmFromMarkdown()], }); }; @@ -62,8 +77,10 @@ export const mdastToString = (tree: mdast.Nodes) => { }); }; -export const stringToSlate = (input: string) => { - return mdastToSlate(unwrapImages(parseMarkdown(input))); +// parser param: support configuring for importer tests, which import and convert +// a few otherwise unsupported markdown features (tags, wikilinks) +export const stringToSlate = (input: string, parse = parseMarkdown) => { + return mdastToSlate(unwrapImages(parse(input))); }; export const slateToString = (nodes: SlateCustom.SlateNode[]) => { diff --git a/src/markdown/mdast-util-ofm-tag/index.ts b/src/markdown/mdast-util-ofm-tag/index.ts new file mode 100644 index 0000000..d06921c --- /dev/null +++ b/src/markdown/mdast-util-ofm-tag/index.ts @@ -0,0 +1,17 @@ +declare module "mdast" { + interface OfmTag extends Literal { + type: "ofmTag"; + value: string; + } + + interface RootContentMap { + ofmTag: OfmTag; + } + + interface PhrasingContentMap { + ofmTag: OfmTag; + } +} + +export { ofmTagFromMarkdown } from "./lib/fromMarkdown.js"; +export { ofmTagToMarkdown } from "./lib/toMarkdown.js"; diff --git a/src/markdown/mdast-util-ofm-tag/lib/fromMarkdown.ts b/src/markdown/mdast-util-ofm-tag/lib/fromMarkdown.ts new file mode 100644 index 0000000..13c7bc4 --- /dev/null +++ b/src/markdown/mdast-util-ofm-tag/lib/fromMarkdown.ts @@ -0,0 +1,26 @@ +import type { Extension } from "mdast-util-from-markdown"; + +/** + * Create an extension for `mdast-util-from-markdown` to enable OFM tags in markdown. + */ +export function ofmTagFromMarkdown(): Extension { + return { + enter: { + ofmTag: function (token) { + this.enter({ type: "ofmTag", value: "" }, token); + }, + ofmTagContent: function (token) { + // note: modified this line to avoid needing es2022 which weirdly breaks + // other stuff. + // const node = this.stack.at(-1); + const node = this.stack[this.stack.length - 1]; + if (node?.type === "ofmTag") node.value = this.sliceSerialize(token); + }, + }, + exit: { + ofmTag: function (token) { + this.exit(token); + }, + }, + }; +} diff --git a/src/markdown/mdast-util-ofm-tag/lib/toMarkdown.ts b/src/markdown/mdast-util-ofm-tag/lib/toMarkdown.ts new file mode 100644 index 0000000..f30c7e3 --- /dev/null +++ b/src/markdown/mdast-util-ofm-tag/lib/toMarkdown.ts @@ -0,0 +1,15 @@ +import type { Options } from "mdast-util-to-markdown"; + +/** + * Create an extension for `mdast-util-to-markdown` to enable OFM tags in markdown. + */ +export function ofmTagToMarkdown(): Options { + return { + handlers: { + ofmTag(node) { + const value = node.value; + return `#${value}`; + }, + }, + }; +} diff --git a/src/markdown/micromark-extension-ofm-tag/index.ts b/src/markdown/micromark-extension-ofm-tag/index.ts new file mode 100644 index 0000000..4033b6b --- /dev/null +++ b/src/markdown/micromark-extension-ofm-tag/index.ts @@ -0,0 +1,10 @@ +declare module "micromark-util-types" { + interface TokenTypeMap { + ofmTag: "ofmTag"; + ofmTagMarker: "ofmTagMarker"; + ofmTagContent: "ofmTagContent"; + } +} + +// export { ofmTagHtml } from "./lib/html.js"; +export { ofmTag } from "./lib/syntax.js"; diff --git a/src/markdown/micromark-extension-ofm-tag/lib/syntax.ts b/src/markdown/micromark-extension-ofm-tag/lib/syntax.ts new file mode 100644 index 0000000..f7927e0 --- /dev/null +++ b/src/markdown/micromark-extension-ofm-tag/lib/syntax.ts @@ -0,0 +1,131 @@ +import type { + Code, + Effects, + Extension, + State, + TokenizeContext, +} from "micromark-util-types"; + +// ASCI Codes +const SPACE = 32; +const NUMBER_SIGN = 35; +const DASH = 45; +const SLASH = 47; +const DIGIT_0 = 48; +const DIGIT_9 = 57; +const LETTER_A = 65; +const LETTER_Z = 90; +const UNDERSCORE = 95; +const LETTER_a = 97; +const LETTER_z = 122; + +/** + * Create an extension for `micromark` to enable OFM tag syntax. + */ +export function ofmTag(): Extension { + return { + text: { + [NUMBER_SIGN]: { + name: "ofmTag", + tokenize: tokenize, + }, + }, + }; +} + +/** + * A tokenizer for Obsidian tag syntax. + * The tag must include at least one non-numerical character. + */ +function tokenize( + this: TokenizeContext, + effects: Effects, + ok: State, + nok: State, +) { + const previous = this.previous; + const events = this.events; + return start; + + /** + * Start of tag + * + * ```markdown + * > | #123/tag + * ^ + * ``` + */ + function start(code: Code) { + // Only tags can be chained directly without space + if ( + previous && + previous > SPACE && + events[events.length - 1][1].type !== "ofmTag" + ) { + return nok(code); + } + + effects.enter("ofmTag"); + effects.enter("ofmTagMarker"); + effects.consume(code); + effects.exit("ofmTagMarker"); + effects.enter("ofmTagContent"); + return inside_tag_candidate; + } + + /** + * Inside a tag without any non-numerical character + * + * ```markdown + * > | #123/tag + * ^^^ + * ``` + */ + function inside_tag_candidate(code: Code) { + if (code && code >= DIGIT_0 && code <= DIGIT_9) { + effects.consume(code); + return inside_tag_candidate; + } + + if ( + code && + ((code >= LETTER_A && code <= LETTER_Z) || + (code >= LETTER_a && code <= LETTER_z) || + code === UNDERSCORE || + code === DASH || + code === SLASH) + ) { + effects.consume(code); + return inside_tag; + } + + return nok(code); + } + + /** + * Inside a tag with at least one non-numerical character + * + * ```markdown + * > | #123/tag + * ^^^^ + * ``` + */ + function inside_tag(code: Code) { + if ( + code && + ((code >= DIGIT_0 && code <= DIGIT_9) || + (code >= LETTER_A && code <= LETTER_Z) || + (code >= LETTER_a && code <= LETTER_z) || + code === UNDERSCORE || + code === DASH || + code === SLASH) + ) { + effects.consume(code); + return inside_tag; + } + + effects.exit("ofmTagContent"); + effects.exit("ofmTag"); + return ok(code); + } +} diff --git a/src/markdown/test-utils.ts b/src/markdown/test-utils.ts index d6dc458..71811bb 100644 --- a/src/markdown/test-utils.ts +++ b/src/markdown/test-utils.ts @@ -1,6 +1,9 @@ import { Root } from "mdast"; -import { parseMarkdown as parseMarkdownRaw } from "./index.js"; +import { + parseMarkdownForImport as parseMarkdownForImportRaw, + parseMarkdown as parseMarkdownRaw, +} from "./index.js"; // Remove the parsed position information to simplify deep equals comparisons // There is a similar function that's an entire npm package; fuck that. @@ -18,6 +21,9 @@ export function prunePositions(tree: any) { export const parseMarkdown = (markdown: string): Root => prunePositions(parseMarkdownRaw(markdown)); +export const parseMarkdownForImport = (markdown: string): Root => + prunePositions(parseMarkdownForImportRaw(markdown)); + // Like _.get but fail loud, helpful error messages // Usage: dig(mdast, 'children.0.children.1.value') export function dig(obj: any, path: string) { diff --git a/src/preload/client/importer.ts b/src/preload/client/importer.ts index 0336086..ab902dd 100644 --- a/src/preload/client/importer.ts +++ b/src/preload/client/importer.ts @@ -1,5 +1,4 @@ import { Database } from "better-sqlite3"; -import fs from "fs"; import { Knex } from "knex"; import path from "path"; import { Files, PathStatsFile } from "../files"; @@ -18,10 +17,15 @@ import * as mdast from "mdast"; export type IImporterClient = ImporterClient; import { uuidv7obj } from "uuidv7"; -import { mdastToString, parseMarkdown as stringToMdast } from "../../markdown"; +import { + mdastToString, + parseMarkdownForImport as stringToMdast, +} from "../../markdown"; +import { FilesImportResolver } from "./importer/FilesImportResolver"; +import { SourceType } from "./importer/SourceType"; import { parseTitleAndFrontMatter } from "./importer/frontmatter"; -const SKIPPABLE_FILES = new Set(".DS_Store"); +export const SKIPPABLE_FILES = new Set(".DS_Store"); // UUID in Notion notes look like 32 character hex strings; make this somewhat more lenient const hexIdRegex = /\b[0-9a-f]{16,}\b/; @@ -73,7 +77,7 @@ interface StagedNote { chroniclesId: string; chroniclesPath: string; status: string; // 'pending' | 'note_created' - error: string | null; + error?: string | null; } export class ImporterClient { @@ -97,7 +101,10 @@ export class ImporterClient { * * Designed for my own Notion export, and assumes sync will be called afterwards. */ - import = async (importDir: string) => { + import = async ( + importDir: string, + sourceType: SourceType = SourceType.Other, + ) => { await this.clearImportTables(); const importerId = uuidv7obj().toHex(); const chroniclesRoot = await this.ensureRoot(); @@ -126,23 +133,65 @@ export class ImporterClient { } console.log("importing directory", importDir); - await this.stageNotes(importDir, importerId, chroniclesRoot); - await this.processStagedNotes(chroniclesRoot); + + // todo: also create a NotesImportResolver to handle note links rather than mixing + // into this importer class + const resolver = new FilesImportResolver(this.knex, importerId, this.files); + + // stage all notes and files in the import directory for processing. + // we do this in two steps so we can generate mappings of source -> dest + // for links and files, and then update them in the second pass. + await this.stageNotes( + importDir, + importerId, + chroniclesRoot, + sourceType, + resolver, + ); + + await this.processStagedNotes(chroniclesRoot, sourceType, resolver); }; - // stage all notes and files in the import directory for processing. - // we do this in two steps so we can generate mappings of source -> dest - // for links and files, and then update them in the second pass. + // pre-process all notes and files in the import directory, tracking in the import tables + // for later processing private stageNotes = async ( importDir: string, importerId: string, chroniclesRoot: string, // absolute path to chronicles root dir + sourceType: SourceType, + resolver: FilesImportResolver, ) => { // for processNote; maps the original folder path to the fixed name const journalsMapping: Record = {}; - for await (const file of Files.walk(importDir, () => true, {})) { - await this.stageNote(file, importDir, importerId, journalsMapping); + for await (const file of Files.walk( + importDir, + // todo: Skip some directories (e.g. .git, .vscode, etc.) + (filestats) => { + // Skip directories, symbolic links, etc. + if (!filestats.stats.isFile()) return false; + + const name = path.basename(filestats.path); + + // Skip hidden files and directories + if (name.startsWith(".")) return false; + if (SKIPPABLE_FILES.has(name)) return false; + + return true; + }, + {}, + )) { + if (file.path.endsWith(".md")) { + await this.stageNote( + file, + importDir, + importerId, + journalsMapping, + sourceType, + ); + } else { + resolver.stageFile(file); + } } }; @@ -152,47 +201,43 @@ export class ImporterClient { importDir: string, importerId: string, journals: Record, // mapping of original folder path to journal name + sourceType: SourceType, ) => { - const { ext, name, dir } = path.parse(file.path); - - // Skip hidden files and directories - if (name.startsWith(".")) return; - if (SKIPPABLE_FILES.has(name)) return; - - // Skip directories, symbolic links, etc. - if (!file.stats.isFile()) return; - - // Only process markdown files - if (ext !== ".md") return; - - // todo: handle repeat import, specifically if the imported folder / file already exists; - // b/c that may happen when importing multiple sources... + const { name, dir } = path.parse(file.path); // todo: sha comparison const contents = await Files.read(file.path); - const [, notionId] = stripNotionIdFromTitle(name); try { // todo: fallback title to filename - uuid - const { frontMatter, body, title } = parseTitleAndFrontMatter(contents); + const { frontMatter, body, title } = parseTitleAndFrontMatter( + contents, + name, + sourceType, + ); + const journalName = this.inferJournalName( dir, importDir, journals, + sourceType, // See notes in inferOrGenerateJournalName; this is a very specific // to my Notion export. frontMatter.Category, ); - // In a directory that was pre-formatted by Chronicles, this should not - // be needed. Will leave here as a reminder when I do the more generalized - // import routine. + // Prefer front-matter supplied create and update times, but fallback to file stats + // todo: check updatedAt, "Updated At", "Last Edited", etc. i.e. support more possible + // front-matter keys for dates; probably needs to be configurable: + // 1. Which key(s) to check + // 2. Whether to use birthtime or mtime + // 3. Which timezone to use + // 4. Whether to use the front-matter date or the file date if (!frontMatter.createdAt) { - frontMatter.createdAt = file.stats.ctime.toISOString(); + frontMatter.createdAt = + file.stats.birthtime.toISOString() || file.stats.mtime.toISOString(); } - // todo: check updatedAt Updated At, Last Edited, etc. - // createdAt if (!frontMatter.updatedAt) { frontMatter.updatedAt = file.stats.mtime.toISOString(); } @@ -200,18 +245,12 @@ export class ImporterClient { // todo: handle additional kinds of frontMatter; just add a column for them // and ensure they are not overwritten when editing existing files // https://github.com/cloverich/chronicles/issues/127 - - const mdast = stringToMdast(body); - - await this.stageNoteFiles(importerId, importDir, file.path, mdast); - const chroniclesId = uuidv7obj().toHex(); - const importItem = { + const stagedNote: StagedNote = { importerId, chroniclesId: chroniclesId, // hmm... what am I going to do with this? Should it be absolute to NOTES_DIR? chroniclesPath: `${path.join(journalName, chroniclesId)}.md`, - sourceId: notionId, sourcePath: file.path, title, content: body, @@ -220,7 +259,12 @@ export class ImporterClient { status: "pending", }; - await this.knex("import_notes").insert(importItem); + if (sourceType === SourceType.Notion) { + const [, notionId] = stripNotionIdFromTitle(name); + stagedNote.sourceId = notionId; + } + + await this.knex("import_notes").insert(stagedNote); } catch (e) { // todo: this error handler is far too big, obviously console.error("Error processing note", file.path, e); @@ -228,7 +272,11 @@ export class ImporterClient { }; // Second pass; process all staged notes and files - private processStagedNotes = async (chroniclesRoot: string) => { + private processStagedNotes = async ( + chroniclesRoot: string, + sourceType: SourceType, + resolver: FilesImportResolver, + ) => { await this.ensureRoot(); const { id: importerId, importDir } = await this.knex("imports") .where({ @@ -243,9 +291,8 @@ export class ImporterClient { console.log("Processing import", importerId, importDir); } - await this.moveStagedFiles(chroniclesRoot, importerId, importDir); - const filesMapping = await this.movedFilePaths(importerId); const linkMapping = await this.noteLinksMapping(importerId); + const wikiLinkMapping = await this.noteLinksWikiMapping(importerId); const items = await this.knex("import_notes").where({ importerId, @@ -254,14 +301,21 @@ export class ImporterClient { for await (const item of items) { const frontMatter = JSON.parse(item.frontMatter); - // todo: can I store the mdast in JSON? If so, should I just do this on the first - // pass since I already parsed it to mdast once? const mdast = stringToMdast(item.content) as any as mdast.Root; - this.updateNoteLinks(mdast, item, linkMapping); + await this.updateNoteLinks(mdast, item, linkMapping, wikiLinkMapping); + + // NOTE: A bit hacky: When we update file links, we also mark the file as referenced + // Then, we only move (copy) referenced files, and mark the remainder as orphaned. + // So these two calls must go in order: + await resolver.updateFileLinks(item.sourcePath, mdast); + await resolver.moveStagedFiles(chroniclesRoot, importerId, importDir); - // chris: if item.sourcePath isn't the same sourcePath used to make the file link the first - // time maybe wont be right. I changed a lot of code without testing yet. - this.updateFileLinks(item.sourcePath, mdast, filesMapping); + this.convertWikiLinks(mdast); + + // process tags into front matter + frontMatter.tags = Array.from( + new Set(this.processAndConvertTags(mdast, frontMatter.tags || [])), + ); // with updated links we can now save the document try { @@ -270,7 +324,7 @@ export class ImporterClient { id: item.chroniclesId, journal: item.journal, // using name as id content: mdastToString(mdast), - title: item.title, //stripNotionIdFromTitle(name), + title: item.title, tags: frontMatter.tags || [], createdAt: frontMatter.createdAt, updatedAt: frontMatter.updatedAt, @@ -345,6 +399,26 @@ export class ImporterClient { return linkMapping; }; + // Pull all staged notes and generate a mapping of original note title + // to the new file path (chroniclesPath). This is used to update + // wikilinks that point to other notes to chronicles-style markdown links. + private noteLinksWikiMapping = async (importerId: string) => { + let linkMapping: Record = + {}; + + const importedItems = await this.knex("import_notes") + .where({ importerId }) + .select("title", "journal", "chroniclesId"); + + for (const item of importedItems) { + if ("error" in item && item.error) continue; + const { journal, chroniclesId, title } = item; + linkMapping[title] = { journal, chroniclesId }; + } + + return linkMapping; + }; + // check if a markdown link is a link to a (markdown) note private isNoteLink = (url: string) => { // we are only interested in markdown links @@ -359,8 +433,13 @@ export class ImporterClient { private updateNoteLinks = async ( mdast: mdast.Root | mdast.Content, item: StagedNote, + // mapping of sourcePath to new journal and chroniclesId linkMapping: Record, + // mapping of note title to new journal and chroniclesId + linkMappingWiki: Record, ) => { + // todo: update ofmWikilink + // todo: update links that point to local files if (mdast.type === "link" && this.isNoteLink(mdast.url)) { const url = decodeURIComponent(mdast.url); const sourceFolderPath = path.dirname(item.sourcePath); @@ -373,9 +452,20 @@ export class ImporterClient { mdast.url = `../${mapped.journal}/${mapped.chroniclesId}.md`; } + if (mdast.type === "ofmWikilink") { + const title = mdast.value; + const mapped = linkMappingWiki[title]; + + if (!mapped) return; + + // NOTE: This updates the url, but assumes the node type + // will be converted to regular link in later step + mdast.url = `../${mapped.journal}/${mapped.chroniclesId}.md`; + } + if ("children" in mdast) { for (const child of mdast.children as any) { - this.updateNoteLinks(child, item, linkMapping); + this.updateNoteLinks(child, item, linkMapping, linkMappingWiki); } } }; @@ -411,6 +501,7 @@ export class ImporterClient { importDir: string, // cache / unique names checker (for when we have to generate name) journals: Record, + sourceType: SourceType, category?: string, ): string => { // In _my_ Notion usage, most of my notes were in a "Documents" database and I @@ -454,11 +545,15 @@ export class ImporterClient { .split(path.sep) // if leading with path.sep, kick out '' .filter(Boolean) - // Strip notionId from each part - // "Documents abc123eft" -> "Documents" .map((part) => { - const [folderNameWithoutId] = stripNotionIdFromTitle(part); - return folderNameWithoutId; + // Strip notionId from each part + // "Documents abc123eft" -> "Documents" + if (sourceType === SourceType.Notion) { + const [folderNameWithoutId] = stripNotionIdFromTitle(part); + return folderNameWithoutId; + } else { + return part; + } }); } @@ -506,218 +601,50 @@ export class ImporterClient { return journalName; }; - // Everything but copy file from validateAndMoveFile, - // return generated ID and dest filelname and whether it was resolved, - // store this in staging table in stageFile. - private fileExists = async ( - resolvedPath: string, - importDir: string, - ): Promise<[null, string] | [string, null]> => { - // Check if file is contained within importDir to prevent path traversal - if (!resolvedPath.startsWith(importDir)) - return [null, "Potential path traversal detected"]; - - // Check if the file exists - if (!fs.existsSync(resolvedPath)) - return [null, "Source file does not exist"]; - - // Check if file has read permissions - try { - await fs.promises.access(resolvedPath, fs.constants.R_OK); - } catch { - return [null, "No read access to the file"]; - } - - return [resolvedPath, null]; - }; - - // Because I keep forgetting extension already has a . in it, etc. - // Returns relative or absolute path based which one attachmentsPath - // Chronicles file references are always ../_attachments/chroniclesId.ext as - // of this writing. - private makeDestinationFilePath = ( - attachmentsPath: string, - chroniclesId: string, - extension: string, - ) => { - return path.join(attachmentsPath, `${chroniclesId}${extension}`); - }; - - // Mdast helper to determine if a node is a file link - isFileLink = ( - mdast: mdast.Content | mdast.Root, - ): mdast is mdast.Image | mdast.Link => { - return ( - (mdast.type === "image" || mdast.type === "link") && - !this.isNoteLink(mdast.url) && - !/^(https?|mailto|#|\/|\.|tel|sms|geo|data):/.test(mdast.url) - ); - }; - - /** - * Resolve a file link to an absolute path, which we use as the primary key - * in the staging table for moving files; can be used to check if file was - * already moved, and to fetch the destination id for the link when updating - * the link in the document. - * - * @param noteSourcePath - absolute path to the note that contains the link - * @param url - mdast.url of the link - */ - private cleanFileUrl = (noteSourcePath: string, url: string): string => { - const urlWithoutQuery = url.split(/\?/)[0] || ""; - return decodeURIComponent( - // todo: should we also normalize here? - path.normalize( - path.resolve(path.dirname(noteSourcePath), urlWithoutQuery), - ), - ); - }; - - // Sanitize the url and stage it into the import_files table - private stageFile = async ( - importerId: string, - url: string, // mdast.url of the link - noteSourcePath: string, // path to the note that contains the link - // Might need this if we validate before staging - importDir: string, // absolute path to import directory - ) => { - const resolvedUrl = this.cleanFileUrl(noteSourcePath, url); - - // todo: sourcePathResolved is the primary key; should be unique; but we don't need to error - // here if it fails to insert; we can skip because we only need to stage and move it once - // IDK what the error signature here is. - try { - await this.knex("import_files").insert({ - importerId: importerId, - sourcePathResolved: resolvedUrl, - chroniclesId: uuidv7obj().toHex(), - extension: path.extname(resolvedUrl), - }); - } catch (err: any) { - // file referenced more than once in note, or in more than one notes; if import logic - // is good really dont even need to log this, should just skip - if ("code" in err && err.code === "SQLITE_CONSTRAINT_PRIMARYKEY") { - console.log("skipping file already staged", resolvedUrl); - } else { - throw err; - } - } - }; - - // Move all staged files to _attachments (if pending) - private moveStagedFiles = async ( - chroniclesRoot: string, - importerId: string, - importDir: string, - ) => { - const files = await this.knex("import_files").where({ - importerId, - status: "pending", - }); - - const attachmentsDir = path.join(chroniclesRoot, "_attachments"); - await fs.promises.mkdir(attachmentsDir, { recursive: true }); - - for await (const file of files) { - const { sourcePathResolved, extension, chroniclesId } = file; - - // todo: convert to just err checking - let [_, err] = await this.fileExists(sourcePathResolved, importDir); - - if (err != null) { - console.error("this.fileExists test fails for ", sourcePathResolved); - await this.knex("import_files") - .where({ importerId, sourcePathResolved }) - .update({ error: err }); - continue; - } - - const destinationFile = this.makeDestinationFilePath( - attachmentsDir, - chroniclesId, - extension, - ); - - try { - await this.files.copyFile(sourcePathResolved, destinationFile); - } catch (err) { - await this.knex("import_files") - .where({ importerId, sourcePathResolved }) - .update({ error: (err as Error).message }); - continue; - } - - await this.knex("import_files") - .where({ importerId, sourcePathResolved }) - .update({ status: "complete" }); - } - }; - - // Fetch all files successfully moved to _attachments, and return a mapping - // of the original source path to the new filename so document file links can be updated - private movedFilePaths = async (importerId: string) => { - const files = await this.knex("import_files").where({ - importerId, - status: "complete", - }); - - // todo: Can pass in chronicles root; but since convention is always ../_attachments/file, should - // always be able to re-construct this... - const mapping: Record = {}; - for (const file of files) { - mapping[file.sourcePathResolved] = this.makeDestinationFilePath( - "../_attachments", - file.chroniclesId, - file.extension, - ); - } - - return mapping; - }; - - /** - * For each link in the file that points to a file, move the file to _attachments, - * rename the file based on chronicles conventions, and update the link in the file. - * - * @param importDir - The root import directory\ - * @param sourcePath - The path to the source file that contains the link; used to resolve relative links - * @param mdast - */ - private stageNoteFiles = async ( - importerId: string, - importDir: string, - sourcePath: string, - mdast: mdast.Content | mdast.Root, - ): Promise => { - if (this.isFileLink(mdast)) { - await this.stageFile(importerId, mdast.url, sourcePath, importDir); + // note: This assumes the mdast.url property of wikiembedding / link is already + // updated by the updateFileLinks routine. + private convertWikiLinks = (mdast: mdast.Content | mdast.Root) => { + // todo: also handle ofmWikiLink + if (mdast.type === "ofmWikiembedding") { + // todo: figure out what to do about hash + (mdast as any).type = "image"; + mdast.title = mdast.value; + mdast.alt = mdast.value; + mdast.url = mdast.url; + } else if (mdast.type === "ofmWikilink") { + mdast.children = [{ type: "text", value: mdast.value }]; + (mdast as any).type = "link"; } else { if ("children" in mdast) { - let results = []; - for await (const child of mdast.children as any) { - await this.stageNoteFiles(importerId, importDir, sourcePath, child); + for (const child of mdast.children as any) { + this.convertWikiLinks(child); } } } }; - // use the mapping of moved files to update the file links in the note - private updateFileLinks = ( - noteSourcePath: string, + // 1. Find and collect all ofmTags, so they can be added to front matter + // 2. Convert ofmTags to text nodes otherwise later Slate will choke on them, since + // Chronicles does not (yet) natively support inline tags + // todo(test): Tag with #hash remains in document; tag without hash is stored in db + private processAndConvertTags = ( mdast: mdast.Content | mdast.Root, - filesMapping: Record, - ) => { - if (this.isFileLink(mdast)) { - const url = this.cleanFileUrl(noteSourcePath, mdast.url); - if (url in filesMapping) { - mdast.url = filesMapping[url]; - } + tags: string[] = [], + ): string[] => { + if (mdast.type === "ofmTag") { + (mdast as any).type = "text"; + const tag = mdast.value; // without hash + mdast.value = `#${mdast.value}`; + tags.push(tag); + return tags; } else { if ("children" in mdast) { - for (const child of mdast.children as any) { - this.updateFileLinks(noteSourcePath, child, filesMapping); + for (const child of mdast.children as any[]) { + this.processAndConvertTags(child, tags); } } + + return tags; } }; } diff --git a/src/preload/client/importer/FilesImportResolver.ts b/src/preload/client/importer/FilesImportResolver.ts new file mode 100644 index 0000000..cd90f46 --- /dev/null +++ b/src/preload/client/importer/FilesImportResolver.ts @@ -0,0 +1,283 @@ +import fs from "fs"; +import { Knex } from "knex"; +import mdast from "mdast"; +import path from "path"; +import { uuidv7obj } from "uuidv7"; +import { PathStatsFile } from "../../files"; +import { IFilesClient } from "../files"; + +const ATTACHMENTS_DIR = "_attachments"; + +// Manages the staging and moving of files during the import process, and +// resolves file links in markdown notes to the chronicles path, so they can +// be converted to markdown links. +// The import process is as follows: +// 1. stageFile: Stage all files in the import directory, and store their metadata in the +// import_files table. +// 2. updateFileLinks: Update all file links in the notes to the chronicles path, so they can be +// converted to markdown links. +// 3. moveStagedFiles: Move all staged files to the chronicles directory, and update their status +// in the import_files table. +export class FilesImportResolver { + private knex: Knex; + private importerId: string; + private filesclient: IFilesClient; + + constructor(knex: Knex, importerId: string, filesclient: IFilesClient) { + this.knex = knex; + this.importerId = importerId; + this.filesclient = filesclient; + } + + // Resolve wikilink to markdown link (w/ chronicles id), and mark the staged + // file as used (so it will be moved in the next step). + // [[2024-11-17-20241118102000781.webp]] -> ../_attachments/.webp + private resolveToChroniclesByName = async ( + name: string, + ): Promise => { + // check db for chronicles id matching name, if any + const result = await this.knex("import_files") + .where({ filename: name }) + .select("chroniclesId", "extension") + .first()!; + + if (!result) return; + + const { chroniclesId, extension } = result; + const updatedPath = this.makeDestinationFilePath(chroniclesId, extension); + + if (updatedPath) { + await this.knex("import_files").where({ chroniclesId }).update({ + status: "referenced", + }); + + return updatedPath; + } + }; + + // Resolve a file path (from a markdown link) from its original path to the + // chronicles path, and mark the staged file as used (so it will be moved in + // the next step). + // /path/to/file.jpg -> ../_attachments/.jpg + private resolveToChroniclesByPath = async ( + path: string, + ): Promise => { + const result = await this.knex("import_files") + .where({ sourcePathResolved: path }) + .select("chroniclesId", "extension") + .first()!; + + if (!result) return; + + const { chroniclesId, extension } = result; + const updatedPath = this.makeDestinationFilePath(chroniclesId, extension); + + if (updatedPath) { + await this.knex("import_files").where({ chroniclesId }).update({ + status: "referenced", + }); + + return updatedPath; + } + }; + + /** + * Resolve a file link to an absolute path, which we use as the primary key + * in the staging table for moving files; can be used to check if file was + * already moved, and to fetch the destination id for the link when updating + * the link in the document. + * + * @param noteSourcePath - absolute path to the note that contains the link + * @param url - mdast.url of the link + */ + private resolveMarkdownFileLinkToAbsPath = ( + noteSourcePath: string, + url: string, + ): string => { + const urlWithoutQuery = url.split(/\?/)[0] || ""; + return decodeURIComponent( + path.normalize( + path.resolve(path.dirname(noteSourcePath), urlWithoutQuery), + ), + ); + }; + + private resolveMarkdownFileLinkToChroniclesPath = async ( + noteSourcePath: string, + url: string, + ): Promise => { + const absPath = this.resolveMarkdownFileLinkToAbsPath(noteSourcePath, url); + return await this.resolveToChroniclesByPath(absPath); + }; + + // Add a file to the import_files table, so it can be moved in the next step; + // generate a chronicles id so the future chronicles path can be resolved prior + // to moving the file. + stageFile = async (filestats: PathStatsFile) => { + const ext = path.extname(filestats.path); + + try { + await this.knex("import_files").insert({ + importerId: this.importerId, + sourcePathResolved: filestats.path, // todo: re-name to pathAbs or pathRelative + filename: path.basename(filestats.path, ext), + chroniclesId: uuidv7obj().toHex(), + extension: ext, + }); + } catch (err: any) { + // file referenced more than once in note, or in more than one notes; if import logic + // is good really dont even need to log this, should just skip + if ("code" in err && err.code === "SQLITE_CONSTRAINT_PRIMARYKEY") { + console.log("skipping file already staged", filestats.path); + } else { + throw err; + } + } + }; + + // todo: Move this back out to importer, just copy pasted to get things working + // check if a markdown link is a link to a (markdown) note + private isNoteLink = (url: string) => { + // we are only interested in markdown links + if (!url.endsWith(".md")) return false; + + // ensure its not a url with an .md domain + if (url.includes("://")) return false; + + return true; + }; + + // Determine if an mdast node is a file link + isFileLink = ( + mdast: mdast.Content | mdast.Root, + ): mdast is mdast.Image | mdast.Link | mdast.OfmWikiEmbedding => { + return ( + (((mdast.type === "image" || mdast.type === "link") && + !this.isNoteLink(mdast.url)) || + mdast.type === "ofmWikiembedding") && + !/^(https?|mailto|#|\/|\.|tel|sms|geo|data):/.test(mdast.url) + ); + }; + + //../_attachments/chroniclesId.ext + private makeDestinationFilePath = ( + chroniclesId: string, + extension: string, + ) => { + return path.join("..", ATTACHMENTS_DIR, `${chroniclesId}${extension}`); + }; + + // use the previously generated list of staged files to update file links in the note, + // specifically to resolve ![[WikiLinks]] to the chronicles path, so they can be + // convereted to markdown links. + // NOTE: MUST have called stageFile on ALL files before calling this!!! + updateFileLinks = async ( + noteSourcePath: string, + mdast: mdast.Content | mdast.Root, + ) => { + if (this.isFileLink(mdast)) { + // note: The mdast type will be updated in convertWikiLinks + // todo: handle ofmWikiLink + if (mdast.type === "ofmWikiembedding") { + const updatedUrl = await this.resolveToChroniclesByName(mdast.value); + if (updatedUrl) { + mdast.url = updatedUrl; + } + } else { + const updatedUrl = await this.resolveMarkdownFileLinkToChroniclesPath( + noteSourcePath, + mdast.url, + ); + if (updatedUrl) { + mdast.url = updatedUrl; + } + } + } else { + if ("children" in mdast) { + for (const child of mdast.children as any) { + await this.updateFileLinks(noteSourcePath, child); + } + } + } + }; + + // rudiemntary check to see if a file exists and is readable + private safeAccess = async ( + resolvedPath: string, + importDir: string, + ): Promise<[null, string] | [string, null]> => { + // Check if file is contained within importDir to prevent path traversal + if (!resolvedPath.startsWith(importDir)) + return [null, "Potential path traversal detected"]; + + // Check if the file exists + if (!fs.existsSync(resolvedPath)) + return [null, "Source file does not exist"]; + + // Check if file has read permissions + try { + await fs.promises.access(resolvedPath, fs.constants.R_OK); + } catch { + return [null, "No read access to the file"]; + } + + return [resolvedPath, null]; + }; + + // After all files staged and links updated, move all staged files to the + moveStagedFiles = async ( + chroniclesRoot: string, + importerId: string, + importDir: string, + ) => { + // bug: at this point their status is all pending; someone is not awaiting + const files = await this.knex("import_files").where({ + importerId, + status: "referenced", + }); + + const attachmentsDir = path.join(chroniclesRoot, ATTACHMENTS_DIR); + await fs.promises.mkdir(attachmentsDir, { recursive: true }); + + for await (const file of files) { + const { sourcePathResolved, extension, chroniclesId } = file; + + // todo: convert to just err checking + let [_, err] = await this.safeAccess(sourcePathResolved, importDir); + + if (err != null) { + console.error("this.fileExists test fails for ", sourcePathResolved); + await this.knex("import_files") + .where({ importerId, sourcePathResolved }) + .update({ error: err }); + continue; + } + + const destinationFile = path.join( + chroniclesRoot, + ATTACHMENTS_DIR, + `${chroniclesId}${extension}`, + ); + + try { + await this.filesclient.copyFile(sourcePathResolved, destinationFile); + await this.knex("import_files") + .where({ chroniclesId }) + .update({ status: "complete", error: null }); + } catch (err) { + console.error("error moving file", chroniclesId, err); + await this.knex("import_files") + .where({ chroniclesId }) + .update({ error: (err as Error).message }); + continue; + } + } + + // Mark all remaining files as orphaned; can be used to debug import issues, + // and potentially also be configurable (i.e. whether to import orphaned files + // or not) + await this.knex("import_files") + .where({ status: "pending", importerId }) + .update({ status: "orphaned" }); + }; +} diff --git a/src/preload/client/importer/SourceType.ts b/src/preload/client/importer/SourceType.ts new file mode 100644 index 0000000..2c803fc --- /dev/null +++ b/src/preload/client/importer/SourceType.ts @@ -0,0 +1,5 @@ +// Included here so non-preload (preference) can import +export enum SourceType { + Notion = "notion", + Other = "other", +} diff --git a/src/preload/client/importer/frontmatter.ts b/src/preload/client/importer/frontmatter.ts index 5af7196..3b3f290 100644 --- a/src/preload/client/importer/frontmatter.ts +++ b/src/preload/client/importer/frontmatter.ts @@ -1,4 +1,5 @@ import yaml from "yaml"; +import { SourceType } from "../importer/SourceType"; interface ParseTitleAndFrontMatterRes { title: string; @@ -12,11 +13,31 @@ interface RawExtractFrontMatterResponse { body: string; } +export const parseTitleAndFrontMatter = ( + contents: string, + filename: string, + sourceType: SourceType, +): ParseTitleAndFrontMatterRes => { + // My Notion files were all in a database and hence exported with + // a kind of "front matter"; can pull title from that. + if (sourceType === "notion") { + return parseTitleAndFrontMatterNotion(contents); + } else { + // Otherwise for other import types, for now, make no attempt at finding + // or parsing front matter. + return { + title: filename, + frontMatter: {}, + body: contents, + }; + } +}; + /** * Parses a string of contents into a title, front matter, and body; strips title / frontmatter * from the body. */ -export function parseTitleAndFrontMatter( +function parseTitleAndFrontMatterNotion( contents: string, ): ParseTitleAndFrontMatterRes { const { title, rawFrontMatter, body } = extractRawFrontMatter(contents); diff --git a/src/preload/client/importer/importer.test.ts b/src/preload/client/importer/importer.test.ts index 0e23b4f..2f9a6bb 100644 --- a/src/preload/client/importer/importer.test.ts +++ b/src/preload/client/importer/importer.test.ts @@ -2,19 +2,22 @@ // While I dev. May keep this around, but its pretty hacky and far // from complete or a real test suite. import { diff } from "deep-object-diff"; -import { parseMarkdown } from "../../../markdown"; import { ImporterClient } from "../importer"; +import { SourceType } from "./SourceType"; import { parseTitleAndFrontMatter } from "./frontmatter"; export function runTests(importer: ImporterClient) { runFrontmatterTests(importer); - testIsFileLink(importer); } // to the console; can convert to real tests at the end. function runFrontmatterTests(importer: ImporterClient) { for (const testCase of titleFrontMatterTestCases) { - const result = parseTitleAndFrontMatter(testCase.input); + const result = parseTitleAndFrontMatter( + testCase.input, + "Dont use this title", + SourceType.Notion, + ); if (!result.frontMatter) { console.error("FAILED:", testCase.expected.title); @@ -360,37 +363,3 @@ export const inferOrGenerateJournalNameTestCases = [ output: "TODO_...", // shorter }, ]; - -// todo: hacky AF, but just to get some tests running -function testIsFileLink(importer: ImporterClient) { - function getLinkLike(mdast: any) { - if (mdast.type !== "root" && mdast.url) { - return mdast; - } else if (mdast.children) { - return mdast.children.map(getLinkLike); - } - } - - // parse links out of the markdown string - // maybe better to just manually create mdast objects - const links = getLinkLike( - parseMarkdown(` -# Test case - -![file](sers/cloverich/Documents/chronicles-development/export) -[Google Software Engineer Interview](https://igotanoffer.com/blogs/tech/google-software-engineer-interview) -![2020%20in%20review%204911c57a21aa4a0daa47c2e5f8d9df98/IMG_20200104_112600.jpg](2020%20in%20review%204911c57a21aa4a0daa47c2e5f8d9df98/IMG_20200104_112600.jpg) -![file](file:///Users/cloverich/Documents/chronicles-development/export) - `), - ) - .flat(Infinity) - .filter(Boolean); - - [true, false, true, true].forEach((isFileLink, i) => { - if (isFileLink !== importer.isFileLink(links[i])) { - console.error("FAILED", links[i].url); - } else { - console.info("SUCCESS", links[i].url); - } - }); -} diff --git a/src/preload/client/preferences.ts b/src/preload/client/preferences.ts index ef55fea..ca80a39 100644 --- a/src/preload/client/preferences.ts +++ b/src/preload/client/preferences.ts @@ -78,9 +78,10 @@ export class PreferencesClient { return new Promise<{ error?: string; value?: string }>( (resolve, reject) => { ipcRenderer.once("directory-selected", (event, arg) => { - console.log("directory-selected", arg); if (arg.error) { reject(arg.error); + } else if (!arg.value) { + resolve({ value: undefined }); } else { this.set("NOTES_DIR", arg.value); resolve(arg.value); diff --git a/src/preload/importer/legacy/importChronicles.ts b/src/preload/importer/legacy/importChronicles.ts deleted file mode 100644 index b908f12..0000000 --- a/src/preload/importer/legacy/importChronicles.ts +++ /dev/null @@ -1,164 +0,0 @@ -// This importer is a legacy of the prior file-based format, which IIRC was something like: - -// /my_journal -// 2024/ -// /04 -// /01 -// # etc -// Because I did not use front-matter, and because the ctime / mtime could be changed by bulk file moves / zip / -// cloud sync, I relied on the filename for the date of the note. I think it makes sense to leave this until -// I'm satisfied with the final format of Chronicles data, and especially until I introduce markdown importers. -// -// -// import fs from "fs"; -// import { DateTime } from "luxon"; -// import path from "path"; -// import { stringToMdast } from "../../markdown"; -// import { Files } from "../files"; -// import { shouldIndexDay } from "./legacy/indexer"; - -// import { create } from "../client"; -// const client = create(); - -// async function findOrCreate(name: string) { -// const journals = await client.journals.list(); -// // if (journals.includes) -// const existing = journals.find((j) => j.name === name); -// if (existing) return existing; - -// return await client.journals.create({ name }); -// } - -// // Copy pasta from shouldIndex and exported for my importChronicles script... -// function dateFromPrevalidatedFilepath(filepath: string) { -// const { ext, name } = path.parse(filepath); -// if (ext !== ".md") return false; -// if (name.startsWith(".")) return false; - -// // NOTE: This manages to respect the timezone, so if I pull 2020-05-01, -// // turn it into a date, then stringify it, it gives me a 5 hour (CDT) offset. -// // Filename (without extension) must be a valid date -// const parsedDate = DateTime.fromISO(name); -// if (name !== parsedDate.toISODate()) return false; - -// return parsedDate; -// } - -// // Import documents from my old file based system, which used markdown files -// // in a one note per day system: /my-journal/2020/05/01/2020-05-01.md -// export async function importChronicles(notesDir: string) { -// // await new Promise((res) => setTimeout(res, 2000)); -// // list all journals in my notes directory -// const journals = fs -// .readdirSync(notesDir) -// // excludes '.DS_Store' and other misc. directories -// .filter((folder) => !folder.startsWith(".")); - -// // walk journals one by one -// for (const journal of journals) { -// const jourrnalModel = await findOrCreate(journal); - -// for await (const file of Files.walk( -// path.join(notesDir, journal), -// shouldIndexDay, -// )) { -// const parsed = await loadDocument(file.path); -// if (parsed.mdast.type !== "root") throw new Error("oh my"); - -// for await (const document of splitOnTitle(parsed.contents)) { -// // skip empty documents... -// if (!document.content.length) continue; - -// const date = dateFromPrevalidatedFilepath(file.path); -// if (!date) throw new Error(`expected valid date for ${file.path}`); - -// // todo: consider adding a `date` field, and using that as the definitive date -// // then createdAt and updatedAt could maintain "When was this document created" -// // and make back-dating a bit more sensible... -// const doc = await client.documents.save({ -// journal: jourrnalModel!.id, -// createdAt: date.toISO()!, -// updatedAt: date.toISO()!, -// content: document.content, -// title: document.title, -// tags: [], // todo -// }); -// console.log("created", doc.id); -// } -// } -// } -// } - -// async function loadDocument(filepath: string) { -// // date? -// const contents = await Files.read(filepath); -// return { -// contents: contents, -// mdast: stringToMdast(contents), -// }; -// } - -// // Split a document into multiple documents by presence of a top-level -// // markdown heading, i.e. "# This is a heading" -// function splitOnTitle( -// content: string, -// ): Array<{ title: string; content: string }> { -// const lines = content.split("\n"); - -// // Clear a few edge cases to simplify the rest of the implementation: -// // Empty -- return empty array -// // One document with only a title -- return empty array -// // One document with only one line -- return one document -// if (lines.length === 0) return []; - -// if (lines.length === 1) { -// // Drop documents that have only a title and no content -// if (lines[0].startsWith("# ")) return []; -// return [{ title: "", content: lines[0] }]; -// } - -// function makeDocument(lines: string[]) { -// const hasTitle = lines[0].startsWith("# "); -// const document = { -// title: hasTitle ? lines[0].slice(2) : "", -// content: hasTitle ? lines.slice(1).join("\n") : lines.join("\n"), -// }; - -// document.content = document.content.trim(); -// return document; -// } - -// let nextDocumentLines: string[] = []; -// const documents: Array<{ title: string; content: string }> = []; - -// for (const line of lines) { -// if (line.startsWith("# ") && nextDocumentLines.length > 0) { -// // append existing lines as document, then create a new one -// documents.push(makeDocument(nextDocumentLines)); -// nextDocumentLines = [line]; -// } else { -// nextDocumentLines.push(line); -// } -// } - -// // clear the remaining buffered lines -// if (nextDocumentLines.length) { -// documents.push(makeDocument(nextDocumentLines)); -// } - -// return documents; -// } - -// // Now that I import stuff that uses electron uhh... -// // this must be called from an electron process... -// // ... but I also use typescript... -// // call from renderer? lmao. -// // importChronicles().then( -// // () => { -// // process.exit(0); -// // }, -// // (err) => { -// // console.error(err); -// // process.exit(1); -// // } -// // ); diff --git a/src/preload/importer/legacy/indexer.ts b/src/preload/importer/legacy/indexer.ts deleted file mode 100644 index 13f3260..0000000 --- a/src/preload/importer/legacy/indexer.ts +++ /dev/null @@ -1,274 +0,0 @@ -// import { Database } from "better-sqlite3"; -// import { DateTime } from "luxon"; -// import path from "path"; -// import { Root, mdastToString, stringToMdast } from "../../../markdown"; -// import { Files, PathStatsFile } from "../../files"; - -// /** -// * NOTE: See comments in importChronicles; this is legacy. Leaving until the importers work is completed at least. -// */ -// export interface IJournal { -// // path to root folder -// url: string; -// // display name -// name: string; - -// /** -// * The duration of a single document in a journal. -// */ -// period: "day" | "week" | "month" | "year"; -// } - -// function isISODate(dateStr: string) { -// const parsedDate = DateTime.fromISO(dateStr); -// return dateStr === parsedDate.toISODate(); -// } - -// const reg = /\d{4}-\d{2}-\d{2}/; - -// interface NodeSchema { -// journal: string; // future: id -// date: string; -// type: string; // node type -// idx: number; -// attributes: string; // jsonb -// } - -// class IndexParsingError extends Error { -// constructor(msg: string) { -// super(msg); -// this.name = "IndexParsingError"; -// } -// } - -// // legacy -// class Indexer { -// private db: Database; -// constructor(db: Database) { -// this.db = db; -// } - -// insert = (journal: string, date: string, node: any) => { -// // NOTE: Lazy work here. I want to serialize most node attributes into a JSON column that -// // I could eventually search on, like "depth" for heading nodes. But other properties on the node -// // (like children and and position) I do not need. So, pull them off and discard. -// // I could delete node.position but I may need node.children in subsequent processing steps, like -// // when pulling listItem children off of list nodes to independnetly index.... -// // Basically the structure of MDAST is affecting how I process it. Blargh. -// const { type, children, position, ...atributes } = node; - -// let contents: string; - -// try { -// contents = mdastToString(node); -// } catch (err: any) { -// throw new IndexParsingError(err); -// } - -// // todo: use auto-increment to track parent node -// this.db -// .prepare( -// "INSERT INTO nodes (journal, date, type, contents, attributes) VALUES (:journal, :date, :type, :contents, :attributes)", -// ) -// .run({ -// journal, -// date, -// type, -// contents, -// attributes: JSON.stringify(atributes), -// }); -// }; - -// /** -// * De-index a journals documents -// * -// * @param journal - name of journal to remove from indexed nodes table -// */ -// deindex = async (journal: string) => { -// const stmt = this.db.prepare("DELETE FROM nodes where journal = :journal"); -// stmt.run({ journal }); -// }; - -// /** -// * Re-index a document - e.g. after its been updated -// * @param journal - name of journal -// * @param date -// * @param contents -// */ -// update = async (journal: string, date: string, contents: string) => { -// const parsed = stringToMdast(contents); -// const stmt = this.db.prepare( -// "DELETE FROM nodes where journal = :journal and date = :date", -// ); -// stmt.run({ journal, date }); -// await this.indexNode(journal, date, parsed); -// }; - -// /** -// * Recursively index an mdast document -// * -// * NOTE: This is a naive strategy to make content searchable by node type. -// * Little thought has been applied to the right way to index content, and -// * all the things that go with that. -// * @param journal -// * @param date -// * @param node - TODO: Base node type -// */ -// indexNode = async (journal: string, date: string, node: Root | any) => { -// // Redundant when called by index since Files.walk shouldIndex does this. But -// // I put this here because of a bug so.... hmmm.. -// if (!isISODate(date)) -// throw new Error( -// `[Indexer.indexNode] Expected an ISO formatted date but got ${date}`, -// ); - -// if (node.type !== "root") { -// try { -// await this.insert(journal, date, node); -// } catch (err) { -// // Because I am recursively indexing _all_ nodeTypes, the remark parser -// // I am using to stringify node content may not have a "compiler" for a particular -// // node: Ex - if compiles a table node, but will choke if passed its child tableRow -// // node directly. Ignore these errors and simply don't index those child nodes. -// // Longer term, I'll likely use a different indexing strategy / implementation so -// // not concerned about this right now. -// if (err instanceof IndexParsingError) { -// // ignore -// } else { -// console.error( -// "Error indexing node for journal ${journal}: It may not show up correctly", -// ); -// console.error(err); -// } -// } -// } - -// if (!node.children) return; - -// for (const child of node.children) { -// await this.indexNode(journal, date, child); -// } -// }; - -// index = async (journal: IJournal) => { -// const shouldFunc = getShouldFunc(journal.period); - -// for await (const entry of Files.walk(journal.url, shouldFunc)) { -// console.debug("[Indexer.index] processing entry", entry.path); - -// const contents = await Files.read(entry.path); -// // todo: track parsing errors so you understand why your content -// // isn't showing up in your journal view (failed to index). -// try { -// const parsed = stringToMdast(contents); - -// // BUG ALERT: I was passing `entry.path` as second argument, when it wanted the -// // filename, because it wants an ISODate: 2020-05-01, which is how we name files. -// // I added `isISODate` to indexNode. -// const filename = path.parse(entry.path).name; - -// await this.indexNode(journal.name, filename, parsed); -// } catch (err) { -// // Log and continue, so we can index remaining journal documents -// console.error( -// `[Indexer.index] error indexing entry ${entry.path}`, -// err, -// ); -// } -// } -// }; -// } - -// // BELOW: HELPERS FOR DETERMINING IF A FILE SHOULD BE INDEXED, BASED ON FILENAME -// // AND THE JOURNAL'S "period" -- day, month, year. -// // SEE Files.walk usage - -// // To check for filename structure and directory naming convention -// // Has match groups for year, month, and filename parts -// // ex match: /journals/reviews/2020/04/2020-04-01.md -// const fileformat = /\/(\d{4})\/(\d{2})\/(\d{4})-(\d{2})-\d{2}/; - -// function isStartofWeek(d: DateTime) { -// return d.startOf("week").day + 6 === d.day; -// } - -// function isStartOfMonth(d: DateTime) { -// return d.startOf("month").toISODate() === d.toISODate(); -// } - -// function isStartOfYear(d: DateTime) { -// return d.startOf("year").toISODate() === d.toISODate(); -// } - -// // exported for my importChronicles script without much thought -// export const shouldIndexDay = (file: PathStatsFile) => shouldIndex(file, "day"); -// const shouldIndexWeek = (file: PathStatsFile) => shouldIndex(file, "week"); -// const shouldIndexMonth = (file: PathStatsFile) => shouldIndex(file, "month"); -// const shouldIndexYear = (file: PathStatsFile) => shouldIndex(file, "year"); - -// function getShouldFunc(period: IJournal["period"]) { -// switch (period) { -// case "day": -// return shouldIndexDay; -// case "week": -// return shouldIndexWeek; -// case "month": -// return shouldIndexMonth; -// case "year": -// return shouldIndexYear; -// } -// } - -// /** -// * Should we index a given file? -// * -// * @param file - A file yielded by our directory walking function -// * @param period - The journal "period" -// */ -// function shouldIndex(file: PathStatsFile, period: IJournal["period"]): boolean { -// if (file.stats.isDirectory()) return false; - -// const { ext, name } = path.parse(file.path); -// if (ext !== ".md") return false; -// if (name.startsWith(".")) return false; - -// // Filename (without extension) must be a valid date -// const parsedDate = DateTime.fromISO(name); -// if (name !== parsedDate.toISODate()) return false; - -// if (period === "week") { -// if (!isStartofWeek(parsedDate)) return false; -// } - -// if (period === "month") { -// if (!isStartOfMonth(parsedDate)) return false; -// } - -// if (period === "year") { -// if (!isStartOfYear(parsedDate)) return false; -// } - -// // const result = fileformat.exec('journals/foo/2020/02/2020-01-15.md') -// // Produces an array-like object: -// // [ -// // '/2020/02/2020-01-15', -// // '2020', -// // '02', -// // '2020', -// // '01', -// // index: 17, -// // input: 'journals/foo//2020/02/2020-01-15.md', -// // groups: undefined -// // ] -// // NOTE: Its only array _like_, and only the matched segments -// const segments = fileformat.exec(file.path); - -// // Is it in the correct directory structure? -// if (!segments) return false; -// if (segments.length !== 5) return false; - -// // File should be in nested directories for its year and month -// if (segments[1] !== segments[3] || segments[2] !== segments[4]) return false; - -// return true; -// } diff --git a/src/views/preferences/index.tsx b/src/views/preferences/index.tsx index 4c12530..fad7785 100644 --- a/src/views/preferences/index.tsx +++ b/src/views/preferences/index.tsx @@ -9,8 +9,10 @@ import { observable } from "mobx"; import { observer } from "mobx-react-lite"; import React, { PropsWithChildren, useEffect } from "react"; import { useNavigate } from "react-router-dom"; +import { Select } from "../../components/Select"; import useClient from "../../hooks/useClient"; import { useJournals } from "../../hooks/useJournals"; +import { SourceType } from "../../preload/client/importer/SourceType"; import { Preferences } from "../../preload/client/preferences"; import Titlebar from "../../titlebar/macos"; import * as Base from "../layout"; @@ -21,17 +23,18 @@ const Preferences = observer(() => { observable({ preferences: {} as Preferences, loading: true, + sourceType: SourceType.Other, }), ); const client = useClient(); const navigate = useNavigate(); - async function openDialogNotesDir() { + async function selectNotesRoot() { store.loading = true; try { const result = await client.preferences.openDialogNotesDir(); - if (!result) { + if (!result?.value) { store.loading = false; return; } @@ -45,7 +48,7 @@ const Preferences = observer(() => { } } - async function openDialogImportDir() { + async function importDirectory() { store.loading = true; try { const result = await client.preferences.openDialogImportDir(); @@ -54,7 +57,7 @@ const Preferences = observer(() => { return; } - await client.importer.import(result); + await client.importer.import(result, store.sourceType); store.loading = false; } catch (e) { console.error("Error importing directory", e); @@ -112,22 +115,6 @@ const Preferences = observer(() => { located at {client.preferences.settingsPath()}

- {/* -

Export

- -
*/} -

Chronicles Notes Root

@@ -143,7 +130,7 @@ const Preferences = observer(() => { @@ -152,11 +139,19 @@ const Preferences = observer(() => {

Import markdown directory

Import a directory of markdown files. Experimental.

+