From fe301bd7d68ebcc3807a90667f27339bab7bf2cf Mon Sep 17 00:00:00 2001
From: chris <1010084+cloverich@users.noreply.github.com>
Date: Tue, 3 Dec 2024 08:42:12 -0800
Subject: [PATCH] Import non-Notion markdown directory (#273)
add experimental markdown / Obsidian importer
- add source type to importer to support notion vs non-notion
- conditionally strip and track notion Id from note title
- resolve and convert [[Wikilinks]]
- track and convert inline #tags
- skip title parsing from front matter; fallback to file name when non-Notion import
- use birthtime and mtime, not ctime, for default note creation date when front-matter not present
- refactor file moving and resolving to walk all files, then move if referenced by a note
- (bugfix) check for empty / null value when selecting new chronicles root - it implies the user clicked cancel
- drop legacy importer code (was unused and kept for reference)
---
src/electron/migrations/20211005142122.sql | 4 +-
src/markdown/README.md | 2 +
src/markdown/index.test.ts | 31 +-
src/markdown/index.ts | 25 +-
src/markdown/mdast-util-ofm-tag/index.ts | 17 +
.../mdast-util-ofm-tag/lib/fromMarkdown.ts | 26 ++
.../mdast-util-ofm-tag/lib/toMarkdown.ts | 15 +
.../micromark-extension-ofm-tag/index.ts | 10 +
.../micromark-extension-ofm-tag/lib/syntax.ts | 131 ++++++
src/markdown/test-utils.ts | 8 +-
src/preload/client/importer.ts | 441 ++++++++----------
.../client/importer/FilesImportResolver.ts | 283 +++++++++++
src/preload/client/importer/SourceType.ts | 5 +
src/preload/client/importer/frontmatter.ts | 23 +-
src/preload/client/importer/importer.test.ts | 43 +-
src/preload/client/preferences.ts | 3 +-
.../importer/legacy/importChronicles.ts | 164 -------
src/preload/importer/legacy/indexer.ts | 274 -----------
src/views/preferences/index.tsx | 39 +-
19 files changed, 777 insertions(+), 767 deletions(-)
create mode 100644 src/markdown/mdast-util-ofm-tag/index.ts
create mode 100644 src/markdown/mdast-util-ofm-tag/lib/fromMarkdown.ts
create mode 100644 src/markdown/mdast-util-ofm-tag/lib/toMarkdown.ts
create mode 100644 src/markdown/micromark-extension-ofm-tag/index.ts
create mode 100644 src/markdown/micromark-extension-ofm-tag/lib/syntax.ts
create mode 100644 src/preload/client/importer/FilesImportResolver.ts
create mode 100644 src/preload/client/importer/SourceType.ts
delete mode 100644 src/preload/importer/legacy/importChronicles.ts
delete mode 100644 src/preload/importer/legacy/indexer.ts
diff --git a/src/electron/migrations/20211005142122.sql b/src/electron/migrations/20211005142122.sql
index e6e010e..2fd4a5f 100644
--- a/src/electron/migrations/20211005142122.sql
+++ b/src/electron/migrations/20211005142122.sql
@@ -58,9 +58,10 @@ CREATE TABLE IF NOT EXISTS "imports" (
CREATE TABLE IF NOT EXISTS "import_files" (
"importerId" TEXT NOT NULL,
- "sourcePathResolved" TEXT NOT NULL PRIMARY KEY,
"status" TEXT NOT NULL DEFAULT "pending",
"chroniclesId" TEXT NOT NULL,
+ "sourcePathResolved" TEXT NOT NULL PRIMARY KEY,
+ "filename" TEXT NOT NULL, -- filename without extension
"extension" TEXT NOT NULL,
"error" TEXT
);
@@ -71,6 +72,7 @@ CREATE TABLE IF NOT EXISTS "import_notes" (
"status" TEXT NOT NULL, -- success, error
"chroniclesId" TEXT NOT NULL,
"chroniclesPath" TEXT NOT NULL,
+ -- todo: sourcePath + hash of content
"sourcePath" TEXT NOT NULL PRIMARY KEY,
"sourceId" TEXT,
"error" BOOLEAN,
diff --git a/src/markdown/README.md b/src/markdown/README.md
index 78db0ba..626f61c 100644
--- a/src/markdown/README.md
+++ b/src/markdown/README.md
@@ -13,3 +13,5 @@ https://github.com/inokawa/remark-slate-transformer/issues/67
# mdast-util-ofm
Partial fork of https://github.com/MoritzRS/obsidian-ext initially because of ESM import issues. I need no immediate modifications of this project and if / once this project completes esm updates may be preferable to move this back to a dependency.
+
+ofm-\* packages are Copyright Moritz R. Schulz and MIT licensed
diff --git a/src/markdown/index.test.ts b/src/markdown/index.test.ts
index 5464af3..995a77b 100644
--- a/src/markdown/index.test.ts
+++ b/src/markdown/index.test.ts
@@ -4,7 +4,7 @@ import { describe, it } from "mocha";
import path from "path";
import { slateToString, stringToSlate } from "./index.js";
-import { dig, parseMarkdown } from "./test-utils.js";
+import { dig, parseMarkdown, parseMarkdownForImport } from "./test-utils.js";
// Tests can structure the data this way and use runTests to
// test the various conversions.
@@ -41,7 +41,7 @@ function outputMarkdown(markdown: string | { in: string; out: string }) {
// but is sometimes configurable (ex: options -> bullet)
// - markdown (string)->mdast
// - markdown (string)->slate
-function runTests(doc: TestDoc) {
+function runTests(doc: TestDoc, parser = parseMarkdown) {
it("roundtrips", function () {
const result = slateToString(stringToSlate(inputMarkdown(doc.markdown)));
@@ -54,14 +54,14 @@ function runTests(doc: TestDoc) {
// round trip properly if it does not parse at all (ex: wikilinks without a handler)
if (doc.mdast) {
it("markdown->mdast", function () {
- const result = parseMarkdown(inputMarkdown(doc.markdown));
+ const result = parser(inputMarkdown(doc.markdown));
expect(result).to.deep.equal(doc.mdast);
});
}
if (doc.slate) {
it("markdown->slate", function () {
- const result = stringToSlate(outputMarkdown(doc.markdown));
+ const result = stringToSlate(outputMarkdown(doc.markdown), parser);
expect(result).to.deep.equal(doc.slate);
});
}
@@ -477,7 +477,28 @@ describe("[[Wikilinks]]", function () {
],
};
- runTests(doc);
+ runTests(doc, parseMarkdownForImport);
+});
+
+describe("mdast-util-ofm-tag", async () => {
+ const doc = {
+ markdown: "a #b c",
+ mdast: {
+ type: "root",
+ children: [
+ {
+ type: "paragraph",
+ children: [
+ { type: "text", value: "a " },
+ { type: "ofmTag", value: "b" },
+ { type: "text", value: " c" },
+ ],
+ },
+ ],
+ },
+ };
+
+ runTests(doc, parseMarkdownForImport);
});
// A place to put behavior that is not yet handled correctly; so I can store test
diff --git a/src/markdown/index.ts b/src/markdown/index.ts
index baf3b7b..1b5af6f 100644
--- a/src/markdown/index.ts
+++ b/src/markdown/index.ts
@@ -8,7 +8,9 @@ import { fromMarkdown } from "mdast-util-from-markdown";
import { gfmFromMarkdown, gfmToMarkdown } from "mdast-util-gfm";
import { toMarkdown } from "mdast-util-to-markdown";
import { gfm } from "micromark-extension-gfm";
+import { ofmTagFromMarkdown } from "./mdast-util-ofm-tag";
import { ofmWikilinkFromMarkdown } from "./mdast-util-ofm-wikilink";
+import { ofmTag } from "./micromark-extension-ofm-tag";
import { ofmWikilink } from "./micromark-extension-ofm-wikilink";
import { mdastToSlate } from "./remark-slate-transformer/transformers/mdast-to-slate.js";
@@ -47,10 +49,23 @@ function wrapImages(tree: mdast.Root) {
return tree;
}
+// The importer has additional support for #tag and [[WikiLink]], but converts them
+// to Chronicles tags and markdown links. Future versions may support these properly.
+export const parseMarkdownForImport = (markdown: string): mdast.Root => {
+ return fromMarkdown(markdown, {
+ extensions: [gfm(), ofmTag(), ofmWikilink()],
+ mdastExtensions: [
+ gfmFromMarkdown(),
+ ofmTagFromMarkdown(),
+ ofmWikilinkFromMarkdown(),
+ ],
+ });
+};
+
export const parseMarkdown = (markdown: string): mdast.Root => {
return fromMarkdown(markdown, {
- extensions: [gfm(), ofmWikilink()],
- mdastExtensions: [gfmFromMarkdown(), ofmWikilinkFromMarkdown()],
+ extensions: [gfm()],
+ mdastExtensions: [gfmFromMarkdown()],
});
};
@@ -62,8 +77,10 @@ export const mdastToString = (tree: mdast.Nodes) => {
});
};
-export const stringToSlate = (input: string) => {
- return mdastToSlate(unwrapImages(parseMarkdown(input)));
+// parser param: support configuring for importer tests, which import and convert
+// a few otherwise unsupported markdown features (tags, wikilinks)
+export const stringToSlate = (input: string, parse = parseMarkdown) => {
+ return mdastToSlate(unwrapImages(parse(input)));
};
export const slateToString = (nodes: SlateCustom.SlateNode[]) => {
diff --git a/src/markdown/mdast-util-ofm-tag/index.ts b/src/markdown/mdast-util-ofm-tag/index.ts
new file mode 100644
index 0000000..d06921c
--- /dev/null
+++ b/src/markdown/mdast-util-ofm-tag/index.ts
@@ -0,0 +1,17 @@
+declare module "mdast" {
+ interface OfmTag extends Literal {
+ type: "ofmTag";
+ value: string;
+ }
+
+ interface RootContentMap {
+ ofmTag: OfmTag;
+ }
+
+ interface PhrasingContentMap {
+ ofmTag: OfmTag;
+ }
+}
+
+export { ofmTagFromMarkdown } from "./lib/fromMarkdown.js";
+export { ofmTagToMarkdown } from "./lib/toMarkdown.js";
diff --git a/src/markdown/mdast-util-ofm-tag/lib/fromMarkdown.ts b/src/markdown/mdast-util-ofm-tag/lib/fromMarkdown.ts
new file mode 100644
index 0000000..13c7bc4
--- /dev/null
+++ b/src/markdown/mdast-util-ofm-tag/lib/fromMarkdown.ts
@@ -0,0 +1,26 @@
+import type { Extension } from "mdast-util-from-markdown";
+
+/**
+ * Create an extension for `mdast-util-from-markdown` to enable OFM tags in markdown.
+ */
+export function ofmTagFromMarkdown(): Extension {
+ return {
+ enter: {
+ ofmTag: function (token) {
+ this.enter({ type: "ofmTag", value: "" }, token);
+ },
+ ofmTagContent: function (token) {
+ // note: modified this line to avoid needing es2022 which weirdly breaks
+ // other stuff.
+ // const node = this.stack.at(-1);
+ const node = this.stack[this.stack.length - 1];
+ if (node?.type === "ofmTag") node.value = this.sliceSerialize(token);
+ },
+ },
+ exit: {
+ ofmTag: function (token) {
+ this.exit(token);
+ },
+ },
+ };
+}
diff --git a/src/markdown/mdast-util-ofm-tag/lib/toMarkdown.ts b/src/markdown/mdast-util-ofm-tag/lib/toMarkdown.ts
new file mode 100644
index 0000000..f30c7e3
--- /dev/null
+++ b/src/markdown/mdast-util-ofm-tag/lib/toMarkdown.ts
@@ -0,0 +1,15 @@
+import type { Options } from "mdast-util-to-markdown";
+
+/**
+ * Create an extension for `mdast-util-to-markdown` to enable OFM tags in markdown.
+ */
+export function ofmTagToMarkdown(): Options {
+ return {
+ handlers: {
+ ofmTag(node) {
+ const value = node.value;
+ return `#${value}`;
+ },
+ },
+ };
+}
diff --git a/src/markdown/micromark-extension-ofm-tag/index.ts b/src/markdown/micromark-extension-ofm-tag/index.ts
new file mode 100644
index 0000000..4033b6b
--- /dev/null
+++ b/src/markdown/micromark-extension-ofm-tag/index.ts
@@ -0,0 +1,10 @@
+declare module "micromark-util-types" {
+ interface TokenTypeMap {
+ ofmTag: "ofmTag";
+ ofmTagMarker: "ofmTagMarker";
+ ofmTagContent: "ofmTagContent";
+ }
+}
+
+// export { ofmTagHtml } from "./lib/html.js";
+export { ofmTag } from "./lib/syntax.js";
diff --git a/src/markdown/micromark-extension-ofm-tag/lib/syntax.ts b/src/markdown/micromark-extension-ofm-tag/lib/syntax.ts
new file mode 100644
index 0000000..f7927e0
--- /dev/null
+++ b/src/markdown/micromark-extension-ofm-tag/lib/syntax.ts
@@ -0,0 +1,131 @@
+import type {
+ Code,
+ Effects,
+ Extension,
+ State,
+ TokenizeContext,
+} from "micromark-util-types";
+
+// ASCI Codes
+const SPACE = 32;
+const NUMBER_SIGN = 35;
+const DASH = 45;
+const SLASH = 47;
+const DIGIT_0 = 48;
+const DIGIT_9 = 57;
+const LETTER_A = 65;
+const LETTER_Z = 90;
+const UNDERSCORE = 95;
+const LETTER_a = 97;
+const LETTER_z = 122;
+
+/**
+ * Create an extension for `micromark` to enable OFM tag syntax.
+ */
+export function ofmTag(): Extension {
+ return {
+ text: {
+ [NUMBER_SIGN]: {
+ name: "ofmTag",
+ tokenize: tokenize,
+ },
+ },
+ };
+}
+
+/**
+ * A tokenizer for Obsidian tag syntax.
+ * The tag must include at least one non-numerical character.
+ */
+function tokenize(
+ this: TokenizeContext,
+ effects: Effects,
+ ok: State,
+ nok: State,
+) {
+ const previous = this.previous;
+ const events = this.events;
+ return start;
+
+ /**
+ * Start of tag
+ *
+ * ```markdown
+ * > | #123/tag
+ * ^
+ * ```
+ */
+ function start(code: Code) {
+ // Only tags can be chained directly without space
+ if (
+ previous &&
+ previous > SPACE &&
+ events[events.length - 1][1].type !== "ofmTag"
+ ) {
+ return nok(code);
+ }
+
+ effects.enter("ofmTag");
+ effects.enter("ofmTagMarker");
+ effects.consume(code);
+ effects.exit("ofmTagMarker");
+ effects.enter("ofmTagContent");
+ return inside_tag_candidate;
+ }
+
+ /**
+ * Inside a tag without any non-numerical character
+ *
+ * ```markdown
+ * > | #123/tag
+ * ^^^
+ * ```
+ */
+ function inside_tag_candidate(code: Code) {
+ if (code && code >= DIGIT_0 && code <= DIGIT_9) {
+ effects.consume(code);
+ return inside_tag_candidate;
+ }
+
+ if (
+ code &&
+ ((code >= LETTER_A && code <= LETTER_Z) ||
+ (code >= LETTER_a && code <= LETTER_z) ||
+ code === UNDERSCORE ||
+ code === DASH ||
+ code === SLASH)
+ ) {
+ effects.consume(code);
+ return inside_tag;
+ }
+
+ return nok(code);
+ }
+
+ /**
+ * Inside a tag with at least one non-numerical character
+ *
+ * ```markdown
+ * > | #123/tag
+ * ^^^^
+ * ```
+ */
+ function inside_tag(code: Code) {
+ if (
+ code &&
+ ((code >= DIGIT_0 && code <= DIGIT_9) ||
+ (code >= LETTER_A && code <= LETTER_Z) ||
+ (code >= LETTER_a && code <= LETTER_z) ||
+ code === UNDERSCORE ||
+ code === DASH ||
+ code === SLASH)
+ ) {
+ effects.consume(code);
+ return inside_tag;
+ }
+
+ effects.exit("ofmTagContent");
+ effects.exit("ofmTag");
+ return ok(code);
+ }
+}
diff --git a/src/markdown/test-utils.ts b/src/markdown/test-utils.ts
index d6dc458..71811bb 100644
--- a/src/markdown/test-utils.ts
+++ b/src/markdown/test-utils.ts
@@ -1,6 +1,9 @@
import { Root } from "mdast";
-import { parseMarkdown as parseMarkdownRaw } from "./index.js";
+import {
+ parseMarkdownForImport as parseMarkdownForImportRaw,
+ parseMarkdown as parseMarkdownRaw,
+} from "./index.js";
// Remove the parsed position information to simplify deep equals comparisons
// There is a similar function that's an entire npm package; fuck that.
@@ -18,6 +21,9 @@ export function prunePositions(tree: any) {
export const parseMarkdown = (markdown: string): Root =>
prunePositions(parseMarkdownRaw(markdown));
+export const parseMarkdownForImport = (markdown: string): Root =>
+ prunePositions(parseMarkdownForImportRaw(markdown));
+
// Like _.get but fail loud, helpful error messages
// Usage: dig(mdast, 'children.0.children.1.value')
export function dig(obj: any, path: string) {
diff --git a/src/preload/client/importer.ts b/src/preload/client/importer.ts
index 0336086..ab902dd 100644
--- a/src/preload/client/importer.ts
+++ b/src/preload/client/importer.ts
@@ -1,5 +1,4 @@
import { Database } from "better-sqlite3";
-import fs from "fs";
import { Knex } from "knex";
import path from "path";
import { Files, PathStatsFile } from "../files";
@@ -18,10 +17,15 @@ import * as mdast from "mdast";
export type IImporterClient = ImporterClient;
import { uuidv7obj } from "uuidv7";
-import { mdastToString, parseMarkdown as stringToMdast } from "../../markdown";
+import {
+ mdastToString,
+ parseMarkdownForImport as stringToMdast,
+} from "../../markdown";
+import { FilesImportResolver } from "./importer/FilesImportResolver";
+import { SourceType } from "./importer/SourceType";
import { parseTitleAndFrontMatter } from "./importer/frontmatter";
-const SKIPPABLE_FILES = new Set(".DS_Store");
+export const SKIPPABLE_FILES = new Set(".DS_Store");
// UUID in Notion notes look like 32 character hex strings; make this somewhat more lenient
const hexIdRegex = /\b[0-9a-f]{16,}\b/;
@@ -73,7 +77,7 @@ interface StagedNote {
chroniclesId: string;
chroniclesPath: string;
status: string; // 'pending' | 'note_created'
- error: string | null;
+ error?: string | null;
}
export class ImporterClient {
@@ -97,7 +101,10 @@ export class ImporterClient {
*
* Designed for my own Notion export, and assumes sync will be called afterwards.
*/
- import = async (importDir: string) => {
+ import = async (
+ importDir: string,
+ sourceType: SourceType = SourceType.Other,
+ ) => {
await this.clearImportTables();
const importerId = uuidv7obj().toHex();
const chroniclesRoot = await this.ensureRoot();
@@ -126,23 +133,65 @@ export class ImporterClient {
}
console.log("importing directory", importDir);
- await this.stageNotes(importDir, importerId, chroniclesRoot);
- await this.processStagedNotes(chroniclesRoot);
+
+ // todo: also create a NotesImportResolver to handle note links rather than mixing
+ // into this importer class
+ const resolver = new FilesImportResolver(this.knex, importerId, this.files);
+
+ // stage all notes and files in the import directory for processing.
+ // we do this in two steps so we can generate mappings of source -> dest
+ // for links and files, and then update them in the second pass.
+ await this.stageNotes(
+ importDir,
+ importerId,
+ chroniclesRoot,
+ sourceType,
+ resolver,
+ );
+
+ await this.processStagedNotes(chroniclesRoot, sourceType, resolver);
};
- // stage all notes and files in the import directory for processing.
- // we do this in two steps so we can generate mappings of source -> dest
- // for links and files, and then update them in the second pass.
+ // pre-process all notes and files in the import directory, tracking in the import tables
+ // for later processing
private stageNotes = async (
importDir: string,
importerId: string,
chroniclesRoot: string, // absolute path to chronicles root dir
+ sourceType: SourceType,
+ resolver: FilesImportResolver,
) => {
// for processNote; maps the original folder path to the fixed name
const journalsMapping: Record = {};
- for await (const file of Files.walk(importDir, () => true, {})) {
- await this.stageNote(file, importDir, importerId, journalsMapping);
+ for await (const file of Files.walk(
+ importDir,
+ // todo: Skip some directories (e.g. .git, .vscode, etc.)
+ (filestats) => {
+ // Skip directories, symbolic links, etc.
+ if (!filestats.stats.isFile()) return false;
+
+ const name = path.basename(filestats.path);
+
+ // Skip hidden files and directories
+ if (name.startsWith(".")) return false;
+ if (SKIPPABLE_FILES.has(name)) return false;
+
+ return true;
+ },
+ {},
+ )) {
+ if (file.path.endsWith(".md")) {
+ await this.stageNote(
+ file,
+ importDir,
+ importerId,
+ journalsMapping,
+ sourceType,
+ );
+ } else {
+ resolver.stageFile(file);
+ }
}
};
@@ -152,47 +201,43 @@ export class ImporterClient {
importDir: string,
importerId: string,
journals: Record, // mapping of original folder path to journal name
+ sourceType: SourceType,
) => {
- const { ext, name, dir } = path.parse(file.path);
-
- // Skip hidden files and directories
- if (name.startsWith(".")) return;
- if (SKIPPABLE_FILES.has(name)) return;
-
- // Skip directories, symbolic links, etc.
- if (!file.stats.isFile()) return;
-
- // Only process markdown files
- if (ext !== ".md") return;
-
- // todo: handle repeat import, specifically if the imported folder / file already exists;
- // b/c that may happen when importing multiple sources...
+ const { name, dir } = path.parse(file.path);
// todo: sha comparison
const contents = await Files.read(file.path);
- const [, notionId] = stripNotionIdFromTitle(name);
try {
// todo: fallback title to filename - uuid
- const { frontMatter, body, title } = parseTitleAndFrontMatter(contents);
+ const { frontMatter, body, title } = parseTitleAndFrontMatter(
+ contents,
+ name,
+ sourceType,
+ );
+
const journalName = this.inferJournalName(
dir,
importDir,
journals,
+ sourceType,
// See notes in inferOrGenerateJournalName; this is a very specific
// to my Notion export.
frontMatter.Category,
);
- // In a directory that was pre-formatted by Chronicles, this should not
- // be needed. Will leave here as a reminder when I do the more generalized
- // import routine.
+ // Prefer front-matter supplied create and update times, but fallback to file stats
+ // todo: check updatedAt, "Updated At", "Last Edited", etc. i.e. support more possible
+ // front-matter keys for dates; probably needs to be configurable:
+ // 1. Which key(s) to check
+ // 2. Whether to use birthtime or mtime
+ // 3. Which timezone to use
+ // 4. Whether to use the front-matter date or the file date
if (!frontMatter.createdAt) {
- frontMatter.createdAt = file.stats.ctime.toISOString();
+ frontMatter.createdAt =
+ file.stats.birthtime.toISOString() || file.stats.mtime.toISOString();
}
- // todo: check updatedAt Updated At, Last Edited, etc.
- // createdAt
if (!frontMatter.updatedAt) {
frontMatter.updatedAt = file.stats.mtime.toISOString();
}
@@ -200,18 +245,12 @@ export class ImporterClient {
// todo: handle additional kinds of frontMatter; just add a column for them
// and ensure they are not overwritten when editing existing files
// https://github.com/cloverich/chronicles/issues/127
-
- const mdast = stringToMdast(body);
-
- await this.stageNoteFiles(importerId, importDir, file.path, mdast);
-
const chroniclesId = uuidv7obj().toHex();
- const importItem = {
+ const stagedNote: StagedNote = {
importerId,
chroniclesId: chroniclesId,
// hmm... what am I going to do with this? Should it be absolute to NOTES_DIR?
chroniclesPath: `${path.join(journalName, chroniclesId)}.md`,
- sourceId: notionId,
sourcePath: file.path,
title,
content: body,
@@ -220,7 +259,12 @@ export class ImporterClient {
status: "pending",
};
- await this.knex("import_notes").insert(importItem);
+ if (sourceType === SourceType.Notion) {
+ const [, notionId] = stripNotionIdFromTitle(name);
+ stagedNote.sourceId = notionId;
+ }
+
+ await this.knex("import_notes").insert(stagedNote);
} catch (e) {
// todo: this error handler is far too big, obviously
console.error("Error processing note", file.path, e);
@@ -228,7 +272,11 @@ export class ImporterClient {
};
// Second pass; process all staged notes and files
- private processStagedNotes = async (chroniclesRoot: string) => {
+ private processStagedNotes = async (
+ chroniclesRoot: string,
+ sourceType: SourceType,
+ resolver: FilesImportResolver,
+ ) => {
await this.ensureRoot();
const { id: importerId, importDir } = await this.knex("imports")
.where({
@@ -243,9 +291,8 @@ export class ImporterClient {
console.log("Processing import", importerId, importDir);
}
- await this.moveStagedFiles(chroniclesRoot, importerId, importDir);
- const filesMapping = await this.movedFilePaths(importerId);
const linkMapping = await this.noteLinksMapping(importerId);
+ const wikiLinkMapping = await this.noteLinksWikiMapping(importerId);
const items = await this.knex("import_notes").where({
importerId,
@@ -254,14 +301,21 @@ export class ImporterClient {
for await (const item of items) {
const frontMatter = JSON.parse(item.frontMatter);
- // todo: can I store the mdast in JSON? If so, should I just do this on the first
- // pass since I already parsed it to mdast once?
const mdast = stringToMdast(item.content) as any as mdast.Root;
- this.updateNoteLinks(mdast, item, linkMapping);
+ await this.updateNoteLinks(mdast, item, linkMapping, wikiLinkMapping);
+
+ // NOTE: A bit hacky: When we update file links, we also mark the file as referenced
+ // Then, we only move (copy) referenced files, and mark the remainder as orphaned.
+ // So these two calls must go in order:
+ await resolver.updateFileLinks(item.sourcePath, mdast);
+ await resolver.moveStagedFiles(chroniclesRoot, importerId, importDir);
- // chris: if item.sourcePath isn't the same sourcePath used to make the file link the first
- // time maybe wont be right. I changed a lot of code without testing yet.
- this.updateFileLinks(item.sourcePath, mdast, filesMapping);
+ this.convertWikiLinks(mdast);
+
+ // process tags into front matter
+ frontMatter.tags = Array.from(
+ new Set(this.processAndConvertTags(mdast, frontMatter.tags || [])),
+ );
// with updated links we can now save the document
try {
@@ -270,7 +324,7 @@ export class ImporterClient {
id: item.chroniclesId,
journal: item.journal, // using name as id
content: mdastToString(mdast),
- title: item.title, //stripNotionIdFromTitle(name),
+ title: item.title,
tags: frontMatter.tags || [],
createdAt: frontMatter.createdAt,
updatedAt: frontMatter.updatedAt,
@@ -345,6 +399,26 @@ export class ImporterClient {
return linkMapping;
};
+ // Pull all staged notes and generate a mapping of original note title
+ // to the new file path (chroniclesPath). This is used to update
+ // wikilinks that point to other notes to chronicles-style markdown links.
+ private noteLinksWikiMapping = async (importerId: string) => {
+ let linkMapping: Record =
+ {};
+
+ const importedItems = await this.knex("import_notes")
+ .where({ importerId })
+ .select("title", "journal", "chroniclesId");
+
+ for (const item of importedItems) {
+ if ("error" in item && item.error) continue;
+ const { journal, chroniclesId, title } = item;
+ linkMapping[title] = { journal, chroniclesId };
+ }
+
+ return linkMapping;
+ };
+
// check if a markdown link is a link to a (markdown) note
private isNoteLink = (url: string) => {
// we are only interested in markdown links
@@ -359,8 +433,13 @@ export class ImporterClient {
private updateNoteLinks = async (
mdast: mdast.Root | mdast.Content,
item: StagedNote,
+ // mapping of sourcePath to new journal and chroniclesId
linkMapping: Record,
+ // mapping of note title to new journal and chroniclesId
+ linkMappingWiki: Record,
) => {
+ // todo: update ofmWikilink
+ // todo: update links that point to local files
if (mdast.type === "link" && this.isNoteLink(mdast.url)) {
const url = decodeURIComponent(mdast.url);
const sourceFolderPath = path.dirname(item.sourcePath);
@@ -373,9 +452,20 @@ export class ImporterClient {
mdast.url = `../${mapped.journal}/${mapped.chroniclesId}.md`;
}
+ if (mdast.type === "ofmWikilink") {
+ const title = mdast.value;
+ const mapped = linkMappingWiki[title];
+
+ if (!mapped) return;
+
+ // NOTE: This updates the url, but assumes the node type
+ // will be converted to regular link in later step
+ mdast.url = `../${mapped.journal}/${mapped.chroniclesId}.md`;
+ }
+
if ("children" in mdast) {
for (const child of mdast.children as any) {
- this.updateNoteLinks(child, item, linkMapping);
+ this.updateNoteLinks(child, item, linkMapping, linkMappingWiki);
}
}
};
@@ -411,6 +501,7 @@ export class ImporterClient {
importDir: string,
// cache / unique names checker (for when we have to generate name)
journals: Record,
+ sourceType: SourceType,
category?: string,
): string => {
// In _my_ Notion usage, most of my notes were in a "Documents" database and I
@@ -454,11 +545,15 @@ export class ImporterClient {
.split(path.sep)
// if leading with path.sep, kick out ''
.filter(Boolean)
- // Strip notionId from each part
- // "Documents abc123eft" -> "Documents"
.map((part) => {
- const [folderNameWithoutId] = stripNotionIdFromTitle(part);
- return folderNameWithoutId;
+ // Strip notionId from each part
+ // "Documents abc123eft" -> "Documents"
+ if (sourceType === SourceType.Notion) {
+ const [folderNameWithoutId] = stripNotionIdFromTitle(part);
+ return folderNameWithoutId;
+ } else {
+ return part;
+ }
});
}
@@ -506,218 +601,50 @@ export class ImporterClient {
return journalName;
};
- // Everything but copy file from validateAndMoveFile,
- // return generated ID and dest filelname and whether it was resolved,
- // store this in staging table in stageFile.
- private fileExists = async (
- resolvedPath: string,
- importDir: string,
- ): Promise<[null, string] | [string, null]> => {
- // Check if file is contained within importDir to prevent path traversal
- if (!resolvedPath.startsWith(importDir))
- return [null, "Potential path traversal detected"];
-
- // Check if the file exists
- if (!fs.existsSync(resolvedPath))
- return [null, "Source file does not exist"];
-
- // Check if file has read permissions
- try {
- await fs.promises.access(resolvedPath, fs.constants.R_OK);
- } catch {
- return [null, "No read access to the file"];
- }
-
- return [resolvedPath, null];
- };
-
- // Because I keep forgetting extension already has a . in it, etc.
- // Returns relative or absolute path based which one attachmentsPath
- // Chronicles file references are always ../_attachments/chroniclesId.ext as
- // of this writing.
- private makeDestinationFilePath = (
- attachmentsPath: string,
- chroniclesId: string,
- extension: string,
- ) => {
- return path.join(attachmentsPath, `${chroniclesId}${extension}`);
- };
-
- // Mdast helper to determine if a node is a file link
- isFileLink = (
- mdast: mdast.Content | mdast.Root,
- ): mdast is mdast.Image | mdast.Link => {
- return (
- (mdast.type === "image" || mdast.type === "link") &&
- !this.isNoteLink(mdast.url) &&
- !/^(https?|mailto|#|\/|\.|tel|sms|geo|data):/.test(mdast.url)
- );
- };
-
- /**
- * Resolve a file link to an absolute path, which we use as the primary key
- * in the staging table for moving files; can be used to check if file was
- * already moved, and to fetch the destination id for the link when updating
- * the link in the document.
- *
- * @param noteSourcePath - absolute path to the note that contains the link
- * @param url - mdast.url of the link
- */
- private cleanFileUrl = (noteSourcePath: string, url: string): string => {
- const urlWithoutQuery = url.split(/\?/)[0] || "";
- return decodeURIComponent(
- // todo: should we also normalize here?
- path.normalize(
- path.resolve(path.dirname(noteSourcePath), urlWithoutQuery),
- ),
- );
- };
-
- // Sanitize the url and stage it into the import_files table
- private stageFile = async (
- importerId: string,
- url: string, // mdast.url of the link
- noteSourcePath: string, // path to the note that contains the link
- // Might need this if we validate before staging
- importDir: string, // absolute path to import directory
- ) => {
- const resolvedUrl = this.cleanFileUrl(noteSourcePath, url);
-
- // todo: sourcePathResolved is the primary key; should be unique; but we don't need to error
- // here if it fails to insert; we can skip because we only need to stage and move it once
- // IDK what the error signature here is.
- try {
- await this.knex("import_files").insert({
- importerId: importerId,
- sourcePathResolved: resolvedUrl,
- chroniclesId: uuidv7obj().toHex(),
- extension: path.extname(resolvedUrl),
- });
- } catch (err: any) {
- // file referenced more than once in note, or in more than one notes; if import logic
- // is good really dont even need to log this, should just skip
- if ("code" in err && err.code === "SQLITE_CONSTRAINT_PRIMARYKEY") {
- console.log("skipping file already staged", resolvedUrl);
- } else {
- throw err;
- }
- }
- };
-
- // Move all staged files to _attachments (if pending)
- private moveStagedFiles = async (
- chroniclesRoot: string,
- importerId: string,
- importDir: string,
- ) => {
- const files = await this.knex("import_files").where({
- importerId,
- status: "pending",
- });
-
- const attachmentsDir = path.join(chroniclesRoot, "_attachments");
- await fs.promises.mkdir(attachmentsDir, { recursive: true });
-
- for await (const file of files) {
- const { sourcePathResolved, extension, chroniclesId } = file;
-
- // todo: convert to just err checking
- let [_, err] = await this.fileExists(sourcePathResolved, importDir);
-
- if (err != null) {
- console.error("this.fileExists test fails for ", sourcePathResolved);
- await this.knex("import_files")
- .where({ importerId, sourcePathResolved })
- .update({ error: err });
- continue;
- }
-
- const destinationFile = this.makeDestinationFilePath(
- attachmentsDir,
- chroniclesId,
- extension,
- );
-
- try {
- await this.files.copyFile(sourcePathResolved, destinationFile);
- } catch (err) {
- await this.knex("import_files")
- .where({ importerId, sourcePathResolved })
- .update({ error: (err as Error).message });
- continue;
- }
-
- await this.knex("import_files")
- .where({ importerId, sourcePathResolved })
- .update({ status: "complete" });
- }
- };
-
- // Fetch all files successfully moved to _attachments, and return a mapping
- // of the original source path to the new filename so document file links can be updated
- private movedFilePaths = async (importerId: string) => {
- const files = await this.knex("import_files").where({
- importerId,
- status: "complete",
- });
-
- // todo: Can pass in chronicles root; but since convention is always ../_attachments/file, should
- // always be able to re-construct this...
- const mapping: Record = {};
- for (const file of files) {
- mapping[file.sourcePathResolved] = this.makeDestinationFilePath(
- "../_attachments",
- file.chroniclesId,
- file.extension,
- );
- }
-
- return mapping;
- };
-
- /**
- * For each link in the file that points to a file, move the file to _attachments,
- * rename the file based on chronicles conventions, and update the link in the file.
- *
- * @param importDir - The root import directory\
- * @param sourcePath - The path to the source file that contains the link; used to resolve relative links
- * @param mdast
- */
- private stageNoteFiles = async (
- importerId: string,
- importDir: string,
- sourcePath: string,
- mdast: mdast.Content | mdast.Root,
- ): Promise => {
- if (this.isFileLink(mdast)) {
- await this.stageFile(importerId, mdast.url, sourcePath, importDir);
+ // note: This assumes the mdast.url property of wikiembedding / link is already
+ // updated by the updateFileLinks routine.
+ private convertWikiLinks = (mdast: mdast.Content | mdast.Root) => {
+ // todo: also handle ofmWikiLink
+ if (mdast.type === "ofmWikiembedding") {
+ // todo: figure out what to do about hash
+ (mdast as any).type = "image";
+ mdast.title = mdast.value;
+ mdast.alt = mdast.value;
+ mdast.url = mdast.url;
+ } else if (mdast.type === "ofmWikilink") {
+ mdast.children = [{ type: "text", value: mdast.value }];
+ (mdast as any).type = "link";
} else {
if ("children" in mdast) {
- let results = [];
- for await (const child of mdast.children as any) {
- await this.stageNoteFiles(importerId, importDir, sourcePath, child);
+ for (const child of mdast.children as any) {
+ this.convertWikiLinks(child);
}
}
}
};
- // use the mapping of moved files to update the file links in the note
- private updateFileLinks = (
- noteSourcePath: string,
+ // 1. Find and collect all ofmTags, so they can be added to front matter
+ // 2. Convert ofmTags to text nodes otherwise later Slate will choke on them, since
+ // Chronicles does not (yet) natively support inline tags
+ // todo(test): Tag with #hash remains in document; tag without hash is stored in db
+ private processAndConvertTags = (
mdast: mdast.Content | mdast.Root,
- filesMapping: Record,
- ) => {
- if (this.isFileLink(mdast)) {
- const url = this.cleanFileUrl(noteSourcePath, mdast.url);
- if (url in filesMapping) {
- mdast.url = filesMapping[url];
- }
+ tags: string[] = [],
+ ): string[] => {
+ if (mdast.type === "ofmTag") {
+ (mdast as any).type = "text";
+ const tag = mdast.value; // without hash
+ mdast.value = `#${mdast.value}`;
+ tags.push(tag);
+ return tags;
} else {
if ("children" in mdast) {
- for (const child of mdast.children as any) {
- this.updateFileLinks(noteSourcePath, child, filesMapping);
+ for (const child of mdast.children as any[]) {
+ this.processAndConvertTags(child, tags);
}
}
+
+ return tags;
}
};
}
diff --git a/src/preload/client/importer/FilesImportResolver.ts b/src/preload/client/importer/FilesImportResolver.ts
new file mode 100644
index 0000000..cd90f46
--- /dev/null
+++ b/src/preload/client/importer/FilesImportResolver.ts
@@ -0,0 +1,283 @@
+import fs from "fs";
+import { Knex } from "knex";
+import mdast from "mdast";
+import path from "path";
+import { uuidv7obj } from "uuidv7";
+import { PathStatsFile } from "../../files";
+import { IFilesClient } from "../files";
+
+const ATTACHMENTS_DIR = "_attachments";
+
+// Manages the staging and moving of files during the import process, and
+// resolves file links in markdown notes to the chronicles path, so they can
+// be converted to markdown links.
+// The import process is as follows:
+// 1. stageFile: Stage all files in the import directory, and store their metadata in the
+// import_files table.
+// 2. updateFileLinks: Update all file links in the notes to the chronicles path, so they can be
+// converted to markdown links.
+// 3. moveStagedFiles: Move all staged files to the chronicles directory, and update their status
+// in the import_files table.
+export class FilesImportResolver {
+ private knex: Knex;
+ private importerId: string;
+ private filesclient: IFilesClient;
+
+ constructor(knex: Knex, importerId: string, filesclient: IFilesClient) {
+ this.knex = knex;
+ this.importerId = importerId;
+ this.filesclient = filesclient;
+ }
+
+ // Resolve wikilink to markdown link (w/ chronicles id), and mark the staged
+ // file as used (so it will be moved in the next step).
+ // [[2024-11-17-20241118102000781.webp]] -> ../_attachments/.webp
+ private resolveToChroniclesByName = async (
+ name: string,
+ ): Promise => {
+ // check db for chronicles id matching name, if any
+ const result = await this.knex("import_files")
+ .where({ filename: name })
+ .select("chroniclesId", "extension")
+ .first()!;
+
+ if (!result) return;
+
+ const { chroniclesId, extension } = result;
+ const updatedPath = this.makeDestinationFilePath(chroniclesId, extension);
+
+ if (updatedPath) {
+ await this.knex("import_files").where({ chroniclesId }).update({
+ status: "referenced",
+ });
+
+ return updatedPath;
+ }
+ };
+
+ // Resolve a file path (from a markdown link) from its original path to the
+ // chronicles path, and mark the staged file as used (so it will be moved in
+ // the next step).
+ // /path/to/file.jpg -> ../_attachments/.jpg
+ private resolveToChroniclesByPath = async (
+ path: string,
+ ): Promise => {
+ const result = await this.knex("import_files")
+ .where({ sourcePathResolved: path })
+ .select("chroniclesId", "extension")
+ .first()!;
+
+ if (!result) return;
+
+ const { chroniclesId, extension } = result;
+ const updatedPath = this.makeDestinationFilePath(chroniclesId, extension);
+
+ if (updatedPath) {
+ await this.knex("import_files").where({ chroniclesId }).update({
+ status: "referenced",
+ });
+
+ return updatedPath;
+ }
+ };
+
+ /**
+ * Resolve a file link to an absolute path, which we use as the primary key
+ * in the staging table for moving files; can be used to check if file was
+ * already moved, and to fetch the destination id for the link when updating
+ * the link in the document.
+ *
+ * @param noteSourcePath - absolute path to the note that contains the link
+ * @param url - mdast.url of the link
+ */
+ private resolveMarkdownFileLinkToAbsPath = (
+ noteSourcePath: string,
+ url: string,
+ ): string => {
+ const urlWithoutQuery = url.split(/\?/)[0] || "";
+ return decodeURIComponent(
+ path.normalize(
+ path.resolve(path.dirname(noteSourcePath), urlWithoutQuery),
+ ),
+ );
+ };
+
+ private resolveMarkdownFileLinkToChroniclesPath = async (
+ noteSourcePath: string,
+ url: string,
+ ): Promise => {
+ const absPath = this.resolveMarkdownFileLinkToAbsPath(noteSourcePath, url);
+ return await this.resolveToChroniclesByPath(absPath);
+ };
+
+ // Add a file to the import_files table, so it can be moved in the next step;
+ // generate a chronicles id so the future chronicles path can be resolved prior
+ // to moving the file.
+ stageFile = async (filestats: PathStatsFile) => {
+ const ext = path.extname(filestats.path);
+
+ try {
+ await this.knex("import_files").insert({
+ importerId: this.importerId,
+ sourcePathResolved: filestats.path, // todo: re-name to pathAbs or pathRelative
+ filename: path.basename(filestats.path, ext),
+ chroniclesId: uuidv7obj().toHex(),
+ extension: ext,
+ });
+ } catch (err: any) {
+ // file referenced more than once in note, or in more than one notes; if import logic
+ // is good really dont even need to log this, should just skip
+ if ("code" in err && err.code === "SQLITE_CONSTRAINT_PRIMARYKEY") {
+ console.log("skipping file already staged", filestats.path);
+ } else {
+ throw err;
+ }
+ }
+ };
+
+ // todo: Move this back out to importer, just copy pasted to get things working
+ // check if a markdown link is a link to a (markdown) note
+ private isNoteLink = (url: string) => {
+ // we are only interested in markdown links
+ if (!url.endsWith(".md")) return false;
+
+ // ensure its not a url with an .md domain
+ if (url.includes("://")) return false;
+
+ return true;
+ };
+
+ // Determine if an mdast node is a file link
+ isFileLink = (
+ mdast: mdast.Content | mdast.Root,
+ ): mdast is mdast.Image | mdast.Link | mdast.OfmWikiEmbedding => {
+ return (
+ (((mdast.type === "image" || mdast.type === "link") &&
+ !this.isNoteLink(mdast.url)) ||
+ mdast.type === "ofmWikiembedding") &&
+ !/^(https?|mailto|#|\/|\.|tel|sms|geo|data):/.test(mdast.url)
+ );
+ };
+
+ //../_attachments/chroniclesId.ext
+ private makeDestinationFilePath = (
+ chroniclesId: string,
+ extension: string,
+ ) => {
+ return path.join("..", ATTACHMENTS_DIR, `${chroniclesId}${extension}`);
+ };
+
+ // use the previously generated list of staged files to update file links in the note,
+ // specifically to resolve ![[WikiLinks]] to the chronicles path, so they can be
+ // convereted to markdown links.
+ // NOTE: MUST have called stageFile on ALL files before calling this!!!
+ updateFileLinks = async (
+ noteSourcePath: string,
+ mdast: mdast.Content | mdast.Root,
+ ) => {
+ if (this.isFileLink(mdast)) {
+ // note: The mdast type will be updated in convertWikiLinks
+ // todo: handle ofmWikiLink
+ if (mdast.type === "ofmWikiembedding") {
+ const updatedUrl = await this.resolveToChroniclesByName(mdast.value);
+ if (updatedUrl) {
+ mdast.url = updatedUrl;
+ }
+ } else {
+ const updatedUrl = await this.resolveMarkdownFileLinkToChroniclesPath(
+ noteSourcePath,
+ mdast.url,
+ );
+ if (updatedUrl) {
+ mdast.url = updatedUrl;
+ }
+ }
+ } else {
+ if ("children" in mdast) {
+ for (const child of mdast.children as any) {
+ await this.updateFileLinks(noteSourcePath, child);
+ }
+ }
+ }
+ };
+
+ // rudiemntary check to see if a file exists and is readable
+ private safeAccess = async (
+ resolvedPath: string,
+ importDir: string,
+ ): Promise<[null, string] | [string, null]> => {
+ // Check if file is contained within importDir to prevent path traversal
+ if (!resolvedPath.startsWith(importDir))
+ return [null, "Potential path traversal detected"];
+
+ // Check if the file exists
+ if (!fs.existsSync(resolvedPath))
+ return [null, "Source file does not exist"];
+
+ // Check if file has read permissions
+ try {
+ await fs.promises.access(resolvedPath, fs.constants.R_OK);
+ } catch {
+ return [null, "No read access to the file"];
+ }
+
+ return [resolvedPath, null];
+ };
+
+ // After all files staged and links updated, move all staged files to the
+ moveStagedFiles = async (
+ chroniclesRoot: string,
+ importerId: string,
+ importDir: string,
+ ) => {
+ // bug: at this point their status is all pending; someone is not awaiting
+ const files = await this.knex("import_files").where({
+ importerId,
+ status: "referenced",
+ });
+
+ const attachmentsDir = path.join(chroniclesRoot, ATTACHMENTS_DIR);
+ await fs.promises.mkdir(attachmentsDir, { recursive: true });
+
+ for await (const file of files) {
+ const { sourcePathResolved, extension, chroniclesId } = file;
+
+ // todo: convert to just err checking
+ let [_, err] = await this.safeAccess(sourcePathResolved, importDir);
+
+ if (err != null) {
+ console.error("this.fileExists test fails for ", sourcePathResolved);
+ await this.knex("import_files")
+ .where({ importerId, sourcePathResolved })
+ .update({ error: err });
+ continue;
+ }
+
+ const destinationFile = path.join(
+ chroniclesRoot,
+ ATTACHMENTS_DIR,
+ `${chroniclesId}${extension}`,
+ );
+
+ try {
+ await this.filesclient.copyFile(sourcePathResolved, destinationFile);
+ await this.knex("import_files")
+ .where({ chroniclesId })
+ .update({ status: "complete", error: null });
+ } catch (err) {
+ console.error("error moving file", chroniclesId, err);
+ await this.knex("import_files")
+ .where({ chroniclesId })
+ .update({ error: (err as Error).message });
+ continue;
+ }
+ }
+
+ // Mark all remaining files as orphaned; can be used to debug import issues,
+ // and potentially also be configurable (i.e. whether to import orphaned files
+ // or not)
+ await this.knex("import_files")
+ .where({ status: "pending", importerId })
+ .update({ status: "orphaned" });
+ };
+}
diff --git a/src/preload/client/importer/SourceType.ts b/src/preload/client/importer/SourceType.ts
new file mode 100644
index 0000000..2c803fc
--- /dev/null
+++ b/src/preload/client/importer/SourceType.ts
@@ -0,0 +1,5 @@
+// Included here so non-preload (preference) can import
+export enum SourceType {
+ Notion = "notion",
+ Other = "other",
+}
diff --git a/src/preload/client/importer/frontmatter.ts b/src/preload/client/importer/frontmatter.ts
index 5af7196..3b3f290 100644
--- a/src/preload/client/importer/frontmatter.ts
+++ b/src/preload/client/importer/frontmatter.ts
@@ -1,4 +1,5 @@
import yaml from "yaml";
+import { SourceType } from "../importer/SourceType";
interface ParseTitleAndFrontMatterRes {
title: string;
@@ -12,11 +13,31 @@ interface RawExtractFrontMatterResponse {
body: string;
}
+export const parseTitleAndFrontMatter = (
+ contents: string,
+ filename: string,
+ sourceType: SourceType,
+): ParseTitleAndFrontMatterRes => {
+ // My Notion files were all in a database and hence exported with
+ // a kind of "front matter"; can pull title from that.
+ if (sourceType === "notion") {
+ return parseTitleAndFrontMatterNotion(contents);
+ } else {
+ // Otherwise for other import types, for now, make no attempt at finding
+ // or parsing front matter.
+ return {
+ title: filename,
+ frontMatter: {},
+ body: contents,
+ };
+ }
+};
+
/**
* Parses a string of contents into a title, front matter, and body; strips title / frontmatter
* from the body.
*/
-export function parseTitleAndFrontMatter(
+function parseTitleAndFrontMatterNotion(
contents: string,
): ParseTitleAndFrontMatterRes {
const { title, rawFrontMatter, body } = extractRawFrontMatter(contents);
diff --git a/src/preload/client/importer/importer.test.ts b/src/preload/client/importer/importer.test.ts
index 0e23b4f..2f9a6bb 100644
--- a/src/preload/client/importer/importer.test.ts
+++ b/src/preload/client/importer/importer.test.ts
@@ -2,19 +2,22 @@
// While I dev. May keep this around, but its pretty hacky and far
// from complete or a real test suite.
import { diff } from "deep-object-diff";
-import { parseMarkdown } from "../../../markdown";
import { ImporterClient } from "../importer";
+import { SourceType } from "./SourceType";
import { parseTitleAndFrontMatter } from "./frontmatter";
export function runTests(importer: ImporterClient) {
runFrontmatterTests(importer);
- testIsFileLink(importer);
}
// to the console; can convert to real tests at the end.
function runFrontmatterTests(importer: ImporterClient) {
for (const testCase of titleFrontMatterTestCases) {
- const result = parseTitleAndFrontMatter(testCase.input);
+ const result = parseTitleAndFrontMatter(
+ testCase.input,
+ "Dont use this title",
+ SourceType.Notion,
+ );
if (!result.frontMatter) {
console.error("FAILED:", testCase.expected.title);
@@ -360,37 +363,3 @@ export const inferOrGenerateJournalNameTestCases = [
output: "TODO_...", // shorter
},
];
-
-// todo: hacky AF, but just to get some tests running
-function testIsFileLink(importer: ImporterClient) {
- function getLinkLike(mdast: any) {
- if (mdast.type !== "root" && mdast.url) {
- return mdast;
- } else if (mdast.children) {
- return mdast.children.map(getLinkLike);
- }
- }
-
- // parse links out of the markdown string
- // maybe better to just manually create mdast objects
- const links = getLinkLike(
- parseMarkdown(`
-# Test case
-
-![file](sers/cloverich/Documents/chronicles-development/export)
-[Google Software Engineer Interview](https://igotanoffer.com/blogs/tech/google-software-engineer-interview)
-![2020%20in%20review%204911c57a21aa4a0daa47c2e5f8d9df98/IMG_20200104_112600.jpg](2020%20in%20review%204911c57a21aa4a0daa47c2e5f8d9df98/IMG_20200104_112600.jpg)
-![file](file:///Users/cloverich/Documents/chronicles-development/export)
- `),
- )
- .flat(Infinity)
- .filter(Boolean);
-
- [true, false, true, true].forEach((isFileLink, i) => {
- if (isFileLink !== importer.isFileLink(links[i])) {
- console.error("FAILED", links[i].url);
- } else {
- console.info("SUCCESS", links[i].url);
- }
- });
-}
diff --git a/src/preload/client/preferences.ts b/src/preload/client/preferences.ts
index ef55fea..ca80a39 100644
--- a/src/preload/client/preferences.ts
+++ b/src/preload/client/preferences.ts
@@ -78,9 +78,10 @@ export class PreferencesClient {
return new Promise<{ error?: string; value?: string }>(
(resolve, reject) => {
ipcRenderer.once("directory-selected", (event, arg) => {
- console.log("directory-selected", arg);
if (arg.error) {
reject(arg.error);
+ } else if (!arg.value) {
+ resolve({ value: undefined });
} else {
this.set("NOTES_DIR", arg.value);
resolve(arg.value);
diff --git a/src/preload/importer/legacy/importChronicles.ts b/src/preload/importer/legacy/importChronicles.ts
deleted file mode 100644
index b908f12..0000000
--- a/src/preload/importer/legacy/importChronicles.ts
+++ /dev/null
@@ -1,164 +0,0 @@
-// This importer is a legacy of the prior file-based format, which IIRC was something like:
-
-// /my_journal
-// 2024/
-// /04
-// /01
-// # etc
-// Because I did not use front-matter, and because the ctime / mtime could be changed by bulk file moves / zip /
-// cloud sync, I relied on the filename for the date of the note. I think it makes sense to leave this until
-// I'm satisfied with the final format of Chronicles data, and especially until I introduce markdown importers.
-//
-//
-// import fs from "fs";
-// import { DateTime } from "luxon";
-// import path from "path";
-// import { stringToMdast } from "../../markdown";
-// import { Files } from "../files";
-// import { shouldIndexDay } from "./legacy/indexer";
-
-// import { create } from "../client";
-// const client = create();
-
-// async function findOrCreate(name: string) {
-// const journals = await client.journals.list();
-// // if (journals.includes)
-// const existing = journals.find((j) => j.name === name);
-// if (existing) return existing;
-
-// return await client.journals.create({ name });
-// }
-
-// // Copy pasta from shouldIndex and exported for my importChronicles script...
-// function dateFromPrevalidatedFilepath(filepath: string) {
-// const { ext, name } = path.parse(filepath);
-// if (ext !== ".md") return false;
-// if (name.startsWith(".")) return false;
-
-// // NOTE: This manages to respect the timezone, so if I pull 2020-05-01,
-// // turn it into a date, then stringify it, it gives me a 5 hour (CDT) offset.
-// // Filename (without extension) must be a valid date
-// const parsedDate = DateTime.fromISO(name);
-// if (name !== parsedDate.toISODate()) return false;
-
-// return parsedDate;
-// }
-
-// // Import documents from my old file based system, which used markdown files
-// // in a one note per day system: /my-journal/2020/05/01/2020-05-01.md
-// export async function importChronicles(notesDir: string) {
-// // await new Promise((res) => setTimeout(res, 2000));
-// // list all journals in my notes directory
-// const journals = fs
-// .readdirSync(notesDir)
-// // excludes '.DS_Store' and other misc. directories
-// .filter((folder) => !folder.startsWith("."));
-
-// // walk journals one by one
-// for (const journal of journals) {
-// const jourrnalModel = await findOrCreate(journal);
-
-// for await (const file of Files.walk(
-// path.join(notesDir, journal),
-// shouldIndexDay,
-// )) {
-// const parsed = await loadDocument(file.path);
-// if (parsed.mdast.type !== "root") throw new Error("oh my");
-
-// for await (const document of splitOnTitle(parsed.contents)) {
-// // skip empty documents...
-// if (!document.content.length) continue;
-
-// const date = dateFromPrevalidatedFilepath(file.path);
-// if (!date) throw new Error(`expected valid date for ${file.path}`);
-
-// // todo: consider adding a `date` field, and using that as the definitive date
-// // then createdAt and updatedAt could maintain "When was this document created"
-// // and make back-dating a bit more sensible...
-// const doc = await client.documents.save({
-// journal: jourrnalModel!.id,
-// createdAt: date.toISO()!,
-// updatedAt: date.toISO()!,
-// content: document.content,
-// title: document.title,
-// tags: [], // todo
-// });
-// console.log("created", doc.id);
-// }
-// }
-// }
-// }
-
-// async function loadDocument(filepath: string) {
-// // date?
-// const contents = await Files.read(filepath);
-// return {
-// contents: contents,
-// mdast: stringToMdast(contents),
-// };
-// }
-
-// // Split a document into multiple documents by presence of a top-level
-// // markdown heading, i.e. "# This is a heading"
-// function splitOnTitle(
-// content: string,
-// ): Array<{ title: string; content: string }> {
-// const lines = content.split("\n");
-
-// // Clear a few edge cases to simplify the rest of the implementation:
-// // Empty -- return empty array
-// // One document with only a title -- return empty array
-// // One document with only one line -- return one document
-// if (lines.length === 0) return [];
-
-// if (lines.length === 1) {
-// // Drop documents that have only a title and no content
-// if (lines[0].startsWith("# ")) return [];
-// return [{ title: "", content: lines[0] }];
-// }
-
-// function makeDocument(lines: string[]) {
-// const hasTitle = lines[0].startsWith("# ");
-// const document = {
-// title: hasTitle ? lines[0].slice(2) : "",
-// content: hasTitle ? lines.slice(1).join("\n") : lines.join("\n"),
-// };
-
-// document.content = document.content.trim();
-// return document;
-// }
-
-// let nextDocumentLines: string[] = [];
-// const documents: Array<{ title: string; content: string }> = [];
-
-// for (const line of lines) {
-// if (line.startsWith("# ") && nextDocumentLines.length > 0) {
-// // append existing lines as document, then create a new one
-// documents.push(makeDocument(nextDocumentLines));
-// nextDocumentLines = [line];
-// } else {
-// nextDocumentLines.push(line);
-// }
-// }
-
-// // clear the remaining buffered lines
-// if (nextDocumentLines.length) {
-// documents.push(makeDocument(nextDocumentLines));
-// }
-
-// return documents;
-// }
-
-// // Now that I import stuff that uses electron uhh...
-// // this must be called from an electron process...
-// // ... but I also use typescript...
-// // call from renderer? lmao.
-// // importChronicles().then(
-// // () => {
-// // process.exit(0);
-// // },
-// // (err) => {
-// // console.error(err);
-// // process.exit(1);
-// // }
-// // );
diff --git a/src/preload/importer/legacy/indexer.ts b/src/preload/importer/legacy/indexer.ts
deleted file mode 100644
index 13f3260..0000000
--- a/src/preload/importer/legacy/indexer.ts
+++ /dev/null
@@ -1,274 +0,0 @@
-// import { Database } from "better-sqlite3";
-// import { DateTime } from "luxon";
-// import path from "path";
-// import { Root, mdastToString, stringToMdast } from "../../../markdown";
-// import { Files, PathStatsFile } from "../../files";
-
-// /**
-// * NOTE: See comments in importChronicles; this is legacy. Leaving until the importers work is completed at least.
-// */
-// export interface IJournal {
-// // path to root folder
-// url: string;
-// // display name
-// name: string;
-
-// /**
-// * The duration of a single document in a journal.
-// */
-// period: "day" | "week" | "month" | "year";
-// }
-
-// function isISODate(dateStr: string) {
-// const parsedDate = DateTime.fromISO(dateStr);
-// return dateStr === parsedDate.toISODate();
-// }
-
-// const reg = /\d{4}-\d{2}-\d{2}/;
-
-// interface NodeSchema {
-// journal: string; // future: id
-// date: string;
-// type: string; // node type
-// idx: number;
-// attributes: string; // jsonb
-// }
-
-// class IndexParsingError extends Error {
-// constructor(msg: string) {
-// super(msg);
-// this.name = "IndexParsingError";
-// }
-// }
-
-// // legacy
-// class Indexer {
-// private db: Database;
-// constructor(db: Database) {
-// this.db = db;
-// }
-
-// insert = (journal: string, date: string, node: any) => {
-// // NOTE: Lazy work here. I want to serialize most node attributes into a JSON column that
-// // I could eventually search on, like "depth" for heading nodes. But other properties on the node
-// // (like children and and position) I do not need. So, pull them off and discard.
-// // I could delete node.position but I may need node.children in subsequent processing steps, like
-// // when pulling listItem children off of list nodes to independnetly index....
-// // Basically the structure of MDAST is affecting how I process it. Blargh.
-// const { type, children, position, ...atributes } = node;
-
-// let contents: string;
-
-// try {
-// contents = mdastToString(node);
-// } catch (err: any) {
-// throw new IndexParsingError(err);
-// }
-
-// // todo: use auto-increment to track parent node
-// this.db
-// .prepare(
-// "INSERT INTO nodes (journal, date, type, contents, attributes) VALUES (:journal, :date, :type, :contents, :attributes)",
-// )
-// .run({
-// journal,
-// date,
-// type,
-// contents,
-// attributes: JSON.stringify(atributes),
-// });
-// };
-
-// /**
-// * De-index a journals documents
-// *
-// * @param journal - name of journal to remove from indexed nodes table
-// */
-// deindex = async (journal: string) => {
-// const stmt = this.db.prepare("DELETE FROM nodes where journal = :journal");
-// stmt.run({ journal });
-// };
-
-// /**
-// * Re-index a document - e.g. after its been updated
-// * @param journal - name of journal
-// * @param date
-// * @param contents
-// */
-// update = async (journal: string, date: string, contents: string) => {
-// const parsed = stringToMdast(contents);
-// const stmt = this.db.prepare(
-// "DELETE FROM nodes where journal = :journal and date = :date",
-// );
-// stmt.run({ journal, date });
-// await this.indexNode(journal, date, parsed);
-// };
-
-// /**
-// * Recursively index an mdast document
-// *
-// * NOTE: This is a naive strategy to make content searchable by node type.
-// * Little thought has been applied to the right way to index content, and
-// * all the things that go with that.
-// * @param journal
-// * @param date
-// * @param node - TODO: Base node type
-// */
-// indexNode = async (journal: string, date: string, node: Root | any) => {
-// // Redundant when called by index since Files.walk shouldIndex does this. But
-// // I put this here because of a bug so.... hmmm..
-// if (!isISODate(date))
-// throw new Error(
-// `[Indexer.indexNode] Expected an ISO formatted date but got ${date}`,
-// );
-
-// if (node.type !== "root") {
-// try {
-// await this.insert(journal, date, node);
-// } catch (err) {
-// // Because I am recursively indexing _all_ nodeTypes, the remark parser
-// // I am using to stringify node content may not have a "compiler" for a particular
-// // node: Ex - if compiles a table node, but will choke if passed its child tableRow
-// // node directly. Ignore these errors and simply don't index those child nodes.
-// // Longer term, I'll likely use a different indexing strategy / implementation so
-// // not concerned about this right now.
-// if (err instanceof IndexParsingError) {
-// // ignore
-// } else {
-// console.error(
-// "Error indexing node for journal ${journal}: It may not show up correctly",
-// );
-// console.error(err);
-// }
-// }
-// }
-
-// if (!node.children) return;
-
-// for (const child of node.children) {
-// await this.indexNode(journal, date, child);
-// }
-// };
-
-// index = async (journal: IJournal) => {
-// const shouldFunc = getShouldFunc(journal.period);
-
-// for await (const entry of Files.walk(journal.url, shouldFunc)) {
-// console.debug("[Indexer.index] processing entry", entry.path);
-
-// const contents = await Files.read(entry.path);
-// // todo: track parsing errors so you understand why your content
-// // isn't showing up in your journal view (failed to index).
-// try {
-// const parsed = stringToMdast(contents);
-
-// // BUG ALERT: I was passing `entry.path` as second argument, when it wanted the
-// // filename, because it wants an ISODate: 2020-05-01, which is how we name files.
-// // I added `isISODate` to indexNode.
-// const filename = path.parse(entry.path).name;
-
-// await this.indexNode(journal.name, filename, parsed);
-// } catch (err) {
-// // Log and continue, so we can index remaining journal documents
-// console.error(
-// `[Indexer.index] error indexing entry ${entry.path}`,
-// err,
-// );
-// }
-// }
-// };
-// }
-
-// // BELOW: HELPERS FOR DETERMINING IF A FILE SHOULD BE INDEXED, BASED ON FILENAME
-// // AND THE JOURNAL'S "period" -- day, month, year.
-// // SEE Files.walk usage
-
-// // To check for filename structure and directory naming convention
-// // Has match groups for year, month, and filename parts
-// // ex match: /journals/reviews/2020/04/2020-04-01.md
-// const fileformat = /\/(\d{4})\/(\d{2})\/(\d{4})-(\d{2})-\d{2}/;
-
-// function isStartofWeek(d: DateTime) {
-// return d.startOf("week").day + 6 === d.day;
-// }
-
-// function isStartOfMonth(d: DateTime) {
-// return d.startOf("month").toISODate() === d.toISODate();
-// }
-
-// function isStartOfYear(d: DateTime) {
-// return d.startOf("year").toISODate() === d.toISODate();
-// }
-
-// // exported for my importChronicles script without much thought
-// export const shouldIndexDay = (file: PathStatsFile) => shouldIndex(file, "day");
-// const shouldIndexWeek = (file: PathStatsFile) => shouldIndex(file, "week");
-// const shouldIndexMonth = (file: PathStatsFile) => shouldIndex(file, "month");
-// const shouldIndexYear = (file: PathStatsFile) => shouldIndex(file, "year");
-
-// function getShouldFunc(period: IJournal["period"]) {
-// switch (period) {
-// case "day":
-// return shouldIndexDay;
-// case "week":
-// return shouldIndexWeek;
-// case "month":
-// return shouldIndexMonth;
-// case "year":
-// return shouldIndexYear;
-// }
-// }
-
-// /**
-// * Should we index a given file?
-// *
-// * @param file - A file yielded by our directory walking function
-// * @param period - The journal "period"
-// */
-// function shouldIndex(file: PathStatsFile, period: IJournal["period"]): boolean {
-// if (file.stats.isDirectory()) return false;
-
-// const { ext, name } = path.parse(file.path);
-// if (ext !== ".md") return false;
-// if (name.startsWith(".")) return false;
-
-// // Filename (without extension) must be a valid date
-// const parsedDate = DateTime.fromISO(name);
-// if (name !== parsedDate.toISODate()) return false;
-
-// if (period === "week") {
-// if (!isStartofWeek(parsedDate)) return false;
-// }
-
-// if (period === "month") {
-// if (!isStartOfMonth(parsedDate)) return false;
-// }
-
-// if (period === "year") {
-// if (!isStartOfYear(parsedDate)) return false;
-// }
-
-// // const result = fileformat.exec('journals/foo/2020/02/2020-01-15.md')
-// // Produces an array-like object:
-// // [
-// // '/2020/02/2020-01-15',
-// // '2020',
-// // '02',
-// // '2020',
-// // '01',
-// // index: 17,
-// // input: 'journals/foo//2020/02/2020-01-15.md',
-// // groups: undefined
-// // ]
-// // NOTE: Its only array _like_, and only the matched segments
-// const segments = fileformat.exec(file.path);
-
-// // Is it in the correct directory structure?
-// if (!segments) return false;
-// if (segments.length !== 5) return false;
-
-// // File should be in nested directories for its year and month
-// if (segments[1] !== segments[3] || segments[2] !== segments[4]) return false;
-
-// return true;
-// }
diff --git a/src/views/preferences/index.tsx b/src/views/preferences/index.tsx
index 4c12530..fad7785 100644
--- a/src/views/preferences/index.tsx
+++ b/src/views/preferences/index.tsx
@@ -9,8 +9,10 @@ import { observable } from "mobx";
import { observer } from "mobx-react-lite";
import React, { PropsWithChildren, useEffect } from "react";
import { useNavigate } from "react-router-dom";
+import { Select } from "../../components/Select";
import useClient from "../../hooks/useClient";
import { useJournals } from "../../hooks/useJournals";
+import { SourceType } from "../../preload/client/importer/SourceType";
import { Preferences } from "../../preload/client/preferences";
import Titlebar from "../../titlebar/macos";
import * as Base from "../layout";
@@ -21,17 +23,18 @@ const Preferences = observer(() => {
observable({
preferences: {} as Preferences,
loading: true,
+ sourceType: SourceType.Other,
}),
);
const client = useClient();
const navigate = useNavigate();
- async function openDialogNotesDir() {
+ async function selectNotesRoot() {
store.loading = true;
try {
const result = await client.preferences.openDialogNotesDir();
- if (!result) {
+ if (!result?.value) {
store.loading = false;
return;
}
@@ -45,7 +48,7 @@ const Preferences = observer(() => {
}
}
- async function openDialogImportDir() {
+ async function importDirectory() {
store.loading = true;
try {
const result = await client.preferences.openDialogImportDir();
@@ -54,7 +57,7 @@ const Preferences = observer(() => {
return;
}
- await client.importer.import(result);
+ await client.importer.import(result, store.sourceType);
store.loading = false;
} catch (e) {
console.error("Error importing directory", e);
@@ -112,22 +115,6 @@ const Preferences = observer(() => {
located at {client.preferences.settingsPath()}