Skip to content

Commit

Permalink
Import non-Notion markdown directory (#273)
Browse files Browse the repository at this point in the history
add experimental markdown / Obsidian importer 

- add source type to importer to support notion vs non-notion
- conditionally strip and track notion Id from note title
- resolve and convert [[Wikilinks]]
- track and convert inline #tags
- skip title parsing from front matter; fallback to file name when non-Notion import
- use birthtime and mtime, not ctime, for default note creation date when front-matter not present
- refactor file moving and resolving to walk all files, then move if referenced by a note
- (bugfix) check for empty / null value when selecting new chronicles root - it implies the user clicked cancel
- drop legacy importer code (was unused and kept for reference)
  • Loading branch information
cloverich authored Dec 3, 2024
1 parent 0c21b89 commit fe301bd
Show file tree
Hide file tree
Showing 19 changed files with 777 additions and 767 deletions.
4 changes: 3 additions & 1 deletion src/electron/migrations/20211005142122.sql
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,10 @@ CREATE TABLE IF NOT EXISTS "imports" (

CREATE TABLE IF NOT EXISTS "import_files" (
"importerId" TEXT NOT NULL,
"sourcePathResolved" TEXT NOT NULL PRIMARY KEY,
"status" TEXT NOT NULL DEFAULT "pending",
"chroniclesId" TEXT NOT NULL,
"sourcePathResolved" TEXT NOT NULL PRIMARY KEY,
"filename" TEXT NOT NULL, -- filename without extension
"extension" TEXT NOT NULL,
"error" TEXT
);
Expand All @@ -71,6 +72,7 @@ CREATE TABLE IF NOT EXISTS "import_notes" (
"status" TEXT NOT NULL, -- success, error
"chroniclesId" TEXT NOT NULL,
"chroniclesPath" TEXT NOT NULL,
-- todo: sourcePath + hash of content
"sourcePath" TEXT NOT NULL PRIMARY KEY,
"sourceId" TEXT,
"error" BOOLEAN,
Expand Down
2 changes: 2 additions & 0 deletions src/markdown/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ https://github.com/inokawa/remark-slate-transformer/issues/67
# mdast-util-ofm

Partial fork of https://github.com/MoritzRS/obsidian-ext initially because of ESM import issues. I need no immediate modifications of this project and if / once this project completes esm updates may be preferable to move this back to a dependency.

ofm-\* packages are Copyright Moritz R. Schulz and MIT licensed
31 changes: 26 additions & 5 deletions src/markdown/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { describe, it } from "mocha";
import path from "path";

import { slateToString, stringToSlate } from "./index.js";
import { dig, parseMarkdown } from "./test-utils.js";
import { dig, parseMarkdown, parseMarkdownForImport } from "./test-utils.js";

// Tests can structure the data this way and use runTests to
// test the various conversions.
Expand Down Expand Up @@ -41,7 +41,7 @@ function outputMarkdown(markdown: string | { in: string; out: string }) {
// but is sometimes configurable (ex: options -> bullet)
// - markdown (string)->mdast
// - markdown (string)->slate
function runTests(doc: TestDoc) {
function runTests(doc: TestDoc, parser = parseMarkdown) {
it("roundtrips", function () {
const result = slateToString(stringToSlate(inputMarkdown(doc.markdown)));

Expand All @@ -54,14 +54,14 @@ function runTests(doc: TestDoc) {
// round trip properly if it does not parse at all (ex: wikilinks without a handler)
if (doc.mdast) {
it("markdown->mdast", function () {
const result = parseMarkdown(inputMarkdown(doc.markdown));
const result = parser(inputMarkdown(doc.markdown));
expect(result).to.deep.equal(doc.mdast);
});
}

if (doc.slate) {
it("markdown->slate", function () {
const result = stringToSlate(outputMarkdown(doc.markdown));
const result = stringToSlate(outputMarkdown(doc.markdown), parser);
expect(result).to.deep.equal(doc.slate);
});
}
Expand Down Expand Up @@ -477,7 +477,28 @@ describe("[[Wikilinks]]", function () {
],
};

runTests(doc);
runTests(doc, parseMarkdownForImport);
});

describe("mdast-util-ofm-tag", async () => {
const doc = {
markdown: "a #b c",
mdast: {
type: "root",
children: [
{
type: "paragraph",
children: [
{ type: "text", value: "a " },
{ type: "ofmTag", value: "b" },
{ type: "text", value: " c" },
],
},
],
},
};

runTests(doc, parseMarkdownForImport);
});

// A place to put behavior that is not yet handled correctly; so I can store test
Expand Down
25 changes: 21 additions & 4 deletions src/markdown/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ import { fromMarkdown } from "mdast-util-from-markdown";
import { gfmFromMarkdown, gfmToMarkdown } from "mdast-util-gfm";
import { toMarkdown } from "mdast-util-to-markdown";
import { gfm } from "micromark-extension-gfm";
import { ofmTagFromMarkdown } from "./mdast-util-ofm-tag";
import { ofmWikilinkFromMarkdown } from "./mdast-util-ofm-wikilink";
import { ofmTag } from "./micromark-extension-ofm-tag";
import { ofmWikilink } from "./micromark-extension-ofm-wikilink";
import { mdastToSlate } from "./remark-slate-transformer/transformers/mdast-to-slate.js";

Expand Down Expand Up @@ -47,10 +49,23 @@ function wrapImages(tree: mdast.Root) {
return tree;
}

// The importer has additional support for #tag and [[WikiLink]], but converts them
// to Chronicles tags and markdown links. Future versions may support these properly.
export const parseMarkdownForImport = (markdown: string): mdast.Root => {
return fromMarkdown(markdown, {
extensions: [gfm(), ofmTag(), ofmWikilink()],
mdastExtensions: [
gfmFromMarkdown(),
ofmTagFromMarkdown(),
ofmWikilinkFromMarkdown(),
],
});
};

export const parseMarkdown = (markdown: string): mdast.Root => {
return fromMarkdown(markdown, {
extensions: [gfm(), ofmWikilink()],
mdastExtensions: [gfmFromMarkdown(), ofmWikilinkFromMarkdown()],
extensions: [gfm()],
mdastExtensions: [gfmFromMarkdown()],
});
};

Expand All @@ -62,8 +77,10 @@ export const mdastToString = (tree: mdast.Nodes) => {
});
};

export const stringToSlate = (input: string) => {
return mdastToSlate(unwrapImages(parseMarkdown(input)));
// parser param: support configuring for importer tests, which import and convert
// a few otherwise unsupported markdown features (tags, wikilinks)
export const stringToSlate = (input: string, parse = parseMarkdown) => {
return mdastToSlate(unwrapImages(parse(input)));
};

export const slateToString = (nodes: SlateCustom.SlateNode[]) => {
Expand Down
17 changes: 17 additions & 0 deletions src/markdown/mdast-util-ofm-tag/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
declare module "mdast" {
interface OfmTag extends Literal {
type: "ofmTag";
value: string;
}

interface RootContentMap {
ofmTag: OfmTag;
}

interface PhrasingContentMap {
ofmTag: OfmTag;
}
}

export { ofmTagFromMarkdown } from "./lib/fromMarkdown.js";
export { ofmTagToMarkdown } from "./lib/toMarkdown.js";
26 changes: 26 additions & 0 deletions src/markdown/mdast-util-ofm-tag/lib/fromMarkdown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import type { Extension } from "mdast-util-from-markdown";

/**
* Create an extension for `mdast-util-from-markdown` to enable OFM tags in markdown.
*/
export function ofmTagFromMarkdown(): Extension {
return {
enter: {
ofmTag: function (token) {
this.enter({ type: "ofmTag", value: "" }, token);
},
ofmTagContent: function (token) {
// note: modified this line to avoid needing es2022 which weirdly breaks
// other stuff.
// const node = this.stack.at(-1);
const node = this.stack[this.stack.length - 1];
if (node?.type === "ofmTag") node.value = this.sliceSerialize(token);
},
},
exit: {
ofmTag: function (token) {
this.exit(token);
},
},
};
}
15 changes: 15 additions & 0 deletions src/markdown/mdast-util-ofm-tag/lib/toMarkdown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import type { Options } from "mdast-util-to-markdown";

/**
* Create an extension for `mdast-util-to-markdown` to enable OFM tags in markdown.
*/
export function ofmTagToMarkdown(): Options {
return {
handlers: {
ofmTag(node) {
const value = node.value;
return `#${value}`;
},
},
};
}
10 changes: 10 additions & 0 deletions src/markdown/micromark-extension-ofm-tag/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
declare module "micromark-util-types" {
interface TokenTypeMap {
ofmTag: "ofmTag";
ofmTagMarker: "ofmTagMarker";
ofmTagContent: "ofmTagContent";
}
}

// export { ofmTagHtml } from "./lib/html.js";
export { ofmTag } from "./lib/syntax.js";
131 changes: 131 additions & 0 deletions src/markdown/micromark-extension-ofm-tag/lib/syntax.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import type {
Code,
Effects,
Extension,
State,
TokenizeContext,
} from "micromark-util-types";

// ASCI Codes
const SPACE = 32;
const NUMBER_SIGN = 35;
const DASH = 45;
const SLASH = 47;
const DIGIT_0 = 48;
const DIGIT_9 = 57;
const LETTER_A = 65;
const LETTER_Z = 90;
const UNDERSCORE = 95;
const LETTER_a = 97;
const LETTER_z = 122;

/**
* Create an extension for `micromark` to enable OFM tag syntax.
*/
export function ofmTag(): Extension {
return {
text: {
[NUMBER_SIGN]: {
name: "ofmTag",
tokenize: tokenize,
},
},
};
}

/**
* A tokenizer for Obsidian tag syntax.
* The tag must include at least one non-numerical character.
*/
function tokenize(
this: TokenizeContext,
effects: Effects,
ok: State,
nok: State,
) {
const previous = this.previous;
const events = this.events;
return start;

/**
* Start of tag
*
* ```markdown
* > | #123/tag
* ^
* ```
*/
function start(code: Code) {
// Only tags can be chained directly without space
if (
previous &&
previous > SPACE &&
events[events.length - 1][1].type !== "ofmTag"
) {
return nok(code);
}

effects.enter("ofmTag");
effects.enter("ofmTagMarker");
effects.consume(code);
effects.exit("ofmTagMarker");
effects.enter("ofmTagContent");
return inside_tag_candidate;
}

/**
* Inside a tag without any non-numerical character
*
* ```markdown
* > | #123/tag
* ^^^
* ```
*/
function inside_tag_candidate(code: Code) {
if (code && code >= DIGIT_0 && code <= DIGIT_9) {
effects.consume(code);
return inside_tag_candidate;
}

if (
code &&
((code >= LETTER_A && code <= LETTER_Z) ||
(code >= LETTER_a && code <= LETTER_z) ||
code === UNDERSCORE ||
code === DASH ||
code === SLASH)
) {
effects.consume(code);
return inside_tag;
}

return nok(code);
}

/**
* Inside a tag with at least one non-numerical character
*
* ```markdown
* > | #123/tag
* ^^^^
* ```
*/
function inside_tag(code: Code) {
if (
code &&
((code >= DIGIT_0 && code <= DIGIT_9) ||
(code >= LETTER_A && code <= LETTER_Z) ||
(code >= LETTER_a && code <= LETTER_z) ||
code === UNDERSCORE ||
code === DASH ||
code === SLASH)
) {
effects.consume(code);
return inside_tag;
}

effects.exit("ofmTagContent");
effects.exit("ofmTag");
return ok(code);
}
}
8 changes: 7 additions & 1 deletion src/markdown/test-utils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import { Root } from "mdast";

import { parseMarkdown as parseMarkdownRaw } from "./index.js";
import {
parseMarkdownForImport as parseMarkdownForImportRaw,
parseMarkdown as parseMarkdownRaw,
} from "./index.js";

// Remove the parsed position information to simplify deep equals comparisons
// There is a similar function that's an entire npm package; fuck that.
Expand All @@ -18,6 +21,9 @@ export function prunePositions(tree: any) {
export const parseMarkdown = (markdown: string): Root =>
prunePositions(parseMarkdownRaw(markdown));

export const parseMarkdownForImport = (markdown: string): Root =>
prunePositions(parseMarkdownForImportRaw(markdown));

// Like _.get but fail loud, helpful error messages
// Usage: dig(mdast, 'children.0.children.1.value')
export function dig(obj: any, path: string) {
Expand Down
Loading

0 comments on commit fe301bd

Please sign in to comment.