Import non-Notion markdown directory (#273)

add experimental markdown / Obsidian importer - add source type to importer to support notion vs non-notion - conditionally strip and track notion Id from note title - resolve and convert [[Wikilinks]] - track and convert inline #tags - skip title parsing from front matter; fallback to file name when non-Notion import - use birthtime and mtime, not ctime, for default note creation date when front-matter not present - refactor file moving and resolving to walk all files, then move if referenced by a note - (bugfix) check for empty / null value when selecting new chronicles root - it implies the user clicked cancel - drop legacy importer code (was unused and kept for reference)
cloverich · Dec 3, 2024 · fe301bd · fe301bd
1 parent 0c21b89
commit fe301bd
Show file tree

Hide file tree

Showing 19 changed files with 777 additions and 767 deletions.
diff --git a/src/electron/migrations/20211005142122.sql b/src/electron/migrations/20211005142122.sql
@@ -58,9 +58,10 @@ CREATE TABLE IF NOT EXISTS "imports" (
 
 CREATE TABLE IF NOT EXISTS "import_files" (
     "importerId" TEXT NOT NULL,
-    "sourcePathResolved" TEXT NOT NULL PRIMARY KEY,
     "status" TEXT NOT NULL DEFAULT "pending",
     "chroniclesId" TEXT NOT NULL,
+    "sourcePathResolved" TEXT NOT NULL PRIMARY KEY,
+    "filename" TEXT NOT NULL, -- filename without extension
     "extension" TEXT NOT NULL,
     "error" TEXT
 );
@@ -71,6 +72,7 @@ CREATE TABLE IF NOT EXISTS "import_notes" (
     "status" TEXT NOT NULL, -- success, error
     "chroniclesId" TEXT NOT NULL,
     "chroniclesPath" TEXT NOT NULL,
+    -- todo: sourcePath + hash of content
     "sourcePath" TEXT NOT NULL PRIMARY KEY,
     "sourceId" TEXT,
     "error" BOOLEAN,

diff --git a/src/markdown/README.md b/src/markdown/README.md
@@ -13,3 +13,5 @@ https://github.com/inokawa/remark-slate-transformer/issues/67
 # mdast-util-ofm
 
 Partial fork of https://github.com/MoritzRS/obsidian-ext initially because of ESM import issues. I need no immediate modifications of this project and if / once this project completes esm updates may be preferable to move this back to a dependency.
+
+ofm-\* packages are Copyright Moritz R. Schulz and MIT licensed
diff --git a/src/markdown/index.test.ts b/src/markdown/index.test.ts
@@ -4,7 +4,7 @@ import { describe, it } from "mocha";
 import path from "path";
 
 import { slateToString, stringToSlate } from "./index.js";
-import { dig, parseMarkdown } from "./test-utils.js";
+import { dig, parseMarkdown, parseMarkdownForImport } from "./test-utils.js";
 
 // Tests can structure the data this way and use runTests to
 // test the various conversions.
@@ -41,7 +41,7 @@ function outputMarkdown(markdown: string | { in: string; out: string }) {
 //    but is sometimes configurable (ex: options -> bullet)
 // - markdown (string)->mdast
 // - markdown (string)->slate
-function runTests(doc: TestDoc) {
+function runTests(doc: TestDoc, parser = parseMarkdown) {
   it("roundtrips", function () {
     const result = slateToString(stringToSlate(inputMarkdown(doc.markdown)));
 
@@ -54,14 +54,14 @@ function runTests(doc: TestDoc) {
   // round trip properly if it does not parse at all (ex: wikilinks without a handler)
   if (doc.mdast) {
     it("markdown->mdast", function () {
-      const result = parseMarkdown(inputMarkdown(doc.markdown));
+      const result = parser(inputMarkdown(doc.markdown));
       expect(result).to.deep.equal(doc.mdast);
     });
   }
 
   if (doc.slate) {
     it("markdown->slate", function () {
-      const result = stringToSlate(outputMarkdown(doc.markdown));
+      const result = stringToSlate(outputMarkdown(doc.markdown), parser);
       expect(result).to.deep.equal(doc.slate);
     });
   }
@@ -477,7 +477,28 @@ describe("[[Wikilinks]]", function () {
     ],
   };
 
-  runTests(doc);
+  runTests(doc, parseMarkdownForImport);
+});
+
+describe("mdast-util-ofm-tag", async () => {
+  const doc = {
+    markdown: "a #b c",
+    mdast: {
+      type: "root",
+      children: [
+        {
+          type: "paragraph",
+          children: [
+            { type: "text", value: "a " },
+            { type: "ofmTag", value: "b" },
+            { type: "text", value: " c" },
+          ],
+        },
+      ],
+    },
+  };
+
+  runTests(doc, parseMarkdownForImport);
 });
 
 // A place to put behavior that is not yet handled correctly; so I can store test

diff --git a/src/markdown/index.ts b/src/markdown/index.ts
@@ -8,7 +8,9 @@ import { fromMarkdown } from "mdast-util-from-markdown";
 import { gfmFromMarkdown, gfmToMarkdown } from "mdast-util-gfm";
 import { toMarkdown } from "mdast-util-to-markdown";
 import { gfm } from "micromark-extension-gfm";
+import { ofmTagFromMarkdown } from "./mdast-util-ofm-tag";
 import { ofmWikilinkFromMarkdown } from "./mdast-util-ofm-wikilink";
+import { ofmTag } from "./micromark-extension-ofm-tag";
 import { ofmWikilink } from "./micromark-extension-ofm-wikilink";
 import { mdastToSlate } from "./remark-slate-transformer/transformers/mdast-to-slate.js";
 
@@ -47,10 +49,23 @@ function wrapImages(tree: mdast.Root) {
   return tree;
 }
 
+// The importer has additional support for #tag and [[WikiLink]], but converts them
+// to Chronicles tags and markdown links. Future versions may support these properly.
+export const parseMarkdownForImport = (markdown: string): mdast.Root => {
+  return fromMarkdown(markdown, {
+    extensions: [gfm(), ofmTag(), ofmWikilink()],
+    mdastExtensions: [
+      gfmFromMarkdown(),
+      ofmTagFromMarkdown(),
+      ofmWikilinkFromMarkdown(),
+    ],
+  });
+};
+
 export const parseMarkdown = (markdown: string): mdast.Root => {
   return fromMarkdown(markdown, {
-    extensions: [gfm(), ofmWikilink()],
-    mdastExtensions: [gfmFromMarkdown(), ofmWikilinkFromMarkdown()],
+    extensions: [gfm()],
+    mdastExtensions: [gfmFromMarkdown()],
   });
 };
 
@@ -62,8 +77,10 @@ export const mdastToString = (tree: mdast.Nodes) => {
   });
 };
 
-export const stringToSlate = (input: string) => {
-  return mdastToSlate(unwrapImages(parseMarkdown(input)));
+// parser param: support configuring for importer tests, which import and convert
+// a few otherwise unsupported markdown features (tags, wikilinks)
+export const stringToSlate = (input: string, parse = parseMarkdown) => {
+  return mdastToSlate(unwrapImages(parse(input)));
 };
 
 export const slateToString = (nodes: SlateCustom.SlateNode[]) => {

diff --git a/src/markdown/mdast-util-ofm-tag/index.ts b/src/markdown/mdast-util-ofm-tag/index.ts
@@ -0,0 +1,17 @@
+declare module "mdast" {
+  interface OfmTag extends Literal {
+    type: "ofmTag";
+    value: string;
+  }
+
+  interface RootContentMap {
+    ofmTag: OfmTag;
+  }
+
+  interface PhrasingContentMap {
+    ofmTag: OfmTag;
+  }
+}
+
+export { ofmTagFromMarkdown } from "./lib/fromMarkdown.js";
+export { ofmTagToMarkdown } from "./lib/toMarkdown.js";
diff --git a/src/markdown/mdast-util-ofm-tag/lib/fromMarkdown.ts b/src/markdown/mdast-util-ofm-tag/lib/fromMarkdown.ts
@@ -0,0 +1,26 @@
+import type { Extension } from "mdast-util-from-markdown";
+
+/**
+ * Create an extension for `mdast-util-from-markdown` to enable OFM tags in markdown.
+ */
+export function ofmTagFromMarkdown(): Extension {
+  return {
+    enter: {
+      ofmTag: function (token) {
+        this.enter({ type: "ofmTag", value: "" }, token);
+      },
+      ofmTagContent: function (token) {
+        // note: modified this line to avoid needing es2022 which weirdly breaks
+        // other stuff.
+        // const node = this.stack.at(-1);
+        const node = this.stack[this.stack.length - 1];
+        if (node?.type === "ofmTag") node.value = this.sliceSerialize(token);
+      },
+    },
+    exit: {
+      ofmTag: function (token) {
+        this.exit(token);
+      },
+    },
+  };
+}
diff --git a/src/markdown/mdast-util-ofm-tag/lib/toMarkdown.ts b/src/markdown/mdast-util-ofm-tag/lib/toMarkdown.ts
@@ -0,0 +1,15 @@
+import type { Options } from "mdast-util-to-markdown";
+
+/**
+ * Create an extension for `mdast-util-to-markdown` to enable OFM tags in markdown.
+ */
+export function ofmTagToMarkdown(): Options {
+  return {
+    handlers: {
+      ofmTag(node) {
+        const value = node.value;
+        return `#${value}`;
+      },
+    },
+  };
+}
diff --git a/src/markdown/micromark-extension-ofm-tag/index.ts b/src/markdown/micromark-extension-ofm-tag/index.ts
@@ -0,0 +1,10 @@
+declare module "micromark-util-types" {
+  interface TokenTypeMap {
+    ofmTag: "ofmTag";
+    ofmTagMarker: "ofmTagMarker";
+    ofmTagContent: "ofmTagContent";
+  }
+}
+
+// export { ofmTagHtml } from "./lib/html.js";
+export { ofmTag } from "./lib/syntax.js";
diff --git a/src/markdown/micromark-extension-ofm-tag/lib/syntax.ts b/src/markdown/micromark-extension-ofm-tag/lib/syntax.ts
@@ -0,0 +1,131 @@
+import type {
+  Code,
+  Effects,
+  Extension,
+  State,
+  TokenizeContext,
+} from "micromark-util-types";
+
+// ASCI Codes
+const SPACE = 32;
+const NUMBER_SIGN = 35;
+const DASH = 45;
+const SLASH = 47;
+const DIGIT_0 = 48;
+const DIGIT_9 = 57;
+const LETTER_A = 65;
+const LETTER_Z = 90;
+const UNDERSCORE = 95;
+const LETTER_a = 97;
+const LETTER_z = 122;
+
+/**
+ * Create an extension for `micromark` to enable OFM tag syntax.
+ */
+export function ofmTag(): Extension {
+  return {
+    text: {
+      [NUMBER_SIGN]: {
+        name: "ofmTag",
+        tokenize: tokenize,
+      },
+    },
+  };
+}
+
+/**
+ * A tokenizer for Obsidian tag syntax.
+ * The tag must include at least one non-numerical character.
+ */
+function tokenize(
+  this: TokenizeContext,
+  effects: Effects,
+  ok: State,
+  nok: State,
+) {
+  const previous = this.previous;
+  const events = this.events;
+  return start;
+
+  /**
+   * Start of tag
+   *
+   * ```markdown
+   * > | #123/tag
+   *     ^
+   * ```
+   */
+  function start(code: Code) {
+    // Only tags can be chained directly without space
+    if (
+      previous &&
+      previous > SPACE &&
+      events[events.length - 1][1].type !== "ofmTag"
+    ) {
+      return nok(code);
+    }
+
+    effects.enter("ofmTag");
+    effects.enter("ofmTagMarker");
+    effects.consume(code);
+    effects.exit("ofmTagMarker");
+    effects.enter("ofmTagContent");
+    return inside_tag_candidate;
+  }
+
+  /**
+   * Inside a tag without any non-numerical character
+   *
+   * ```markdown
+   * > | #123/tag
+   *      ^^^
+   * ```
+   */
+  function inside_tag_candidate(code: Code) {
+    if (code && code >= DIGIT_0 && code <= DIGIT_9) {
+      effects.consume(code);
+      return inside_tag_candidate;
+    }
+
+    if (
+      code &&
+      ((code >= LETTER_A && code <= LETTER_Z) ||
+        (code >= LETTER_a && code <= LETTER_z) ||
+        code === UNDERSCORE ||
+        code === DASH ||
+        code === SLASH)
+    ) {
+      effects.consume(code);
+      return inside_tag;
+    }
+
+    return nok(code);
+  }
+
+  /**
+   * Inside a tag with at least one non-numerical character
+   *
+   * ```markdown
+   * > | #123/tag
+   *         ^^^^
+   * ```
+   */
+  function inside_tag(code: Code) {
+    if (
+      code &&
+      ((code >= DIGIT_0 && code <= DIGIT_9) ||
+        (code >= LETTER_A && code <= LETTER_Z) ||
+        (code >= LETTER_a && code <= LETTER_z) ||
+        code === UNDERSCORE ||
+        code === DASH ||
+        code === SLASH)
+    ) {
+      effects.consume(code);
+      return inside_tag;
+    }
+
+    effects.exit("ofmTagContent");
+    effects.exit("ofmTag");
+    return ok(code);
+  }
+}
diff --git a/src/markdown/test-utils.ts b/src/markdown/test-utils.ts
@@ -1,6 +1,9 @@
 import { Root } from "mdast";
 
-import { parseMarkdown as parseMarkdownRaw } from "./index.js";
+import {
+  parseMarkdownForImport as parseMarkdownForImportRaw,
+  parseMarkdown as parseMarkdownRaw,
+} from "./index.js";
 
 // Remove the parsed position information to simplify deep equals comparisons
 // There is a similar function that's an entire npm package; fuck that.
@@ -18,6 +21,9 @@ export function prunePositions(tree: any) {
 export const parseMarkdown = (markdown: string): Root =>
   prunePositions(parseMarkdownRaw(markdown));
 
+export const parseMarkdownForImport = (markdown: string): Root =>
+  prunePositions(parseMarkdownForImportRaw(markdown));
+
 // Like _.get but fail loud, helpful error messages
 // Usage: dig(mdast, 'children.0.children.1.value')
 export function dig(obj: any, path: string) {
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,3 +13,5 @@ https://github.com/inokawa/remark-slate-transformer/issues/67
		# mdast-util-ofm

		Partial fork of https://github.com/MoritzRS/obsidian-ext initially because of ESM import issues. I need no immediate modifications of this project and if / once this project completes esm updates may be preferable to move this back to a dependency.

		ofm-\* packages are Copyright Moritz R. Schulz and MIT licensed