cloverich · cloverich · Dec 26, 2024 · Dec 24, 2024 · cloverich · Dec 25, 2024
diff --git a/src/electron/migrations/20211005142122.sql b/src/electron/migrations/20211005142122.sql
@@ -97,7 +97,6 @@ CREATE TABLE IF NOT EXISTS "import_notes" (
     "sourcePath" TEXT NOT NULL PRIMARY KEY,
     "sourceId" TEXT,
     "error" BOOLEAN,
-    "title" TEXT NOT NULL,
     "journal" TEXT NOT NULL,
     "frontMatter" TEXT,
     "content" TEXT

diff --git a/src/markdown/index.test.ts b/src/markdown/index.test.ts
@@ -5,7 +5,12 @@ import path from "path";
 import yaml from "yaml";
 
 import { slateToString, stringToSlate } from "./index.js";
-import { dig, parseMarkdown, parseMarkdownForImport } from "./test-utils.js";
+import {
+  dedent,
+  dig,
+  parseMarkdown,
+  parseMarkdownForImport,
+} from "./test-utils.js";
 
 // Tests can structure the data this way and use runTests to
 // test the various conversions.
@@ -619,26 +624,26 @@ describe("Whacky shit", function () {
 });
 
 describe("front matter parsing", function () {
-  const content = `---
-title: 2024-09-29
-tags: weekly-todo
-createdAt: 2024-09-30T17:50:22.000Z
-updatedAt: 2024-11-04T16:24:11.000Z
----
-
-#weekly-todo
-
-Last week: [2024-09-22](../work/0193acd4fa3574698c36c4514b907c70.md)
-
-**I am on call this week** [On call week of 2024-09-30](../persona/0193acd4fa45731f81350d4443c1ed16.md)
-
-## Monday
-  
-`;
-
   // A very basic "it works" test
   // todo: End to end test with a real document, asserting against the database values
   it("parses front matter as an mdast node, and can be parsed with yaml.parse", function () {
+    const content = dedent(`---
+    title: 2024-09-29
+    tags: weekly-todo
+    createdAt: 2024-09-30T17:50:22.000Z
+    updatedAt: 2024-11-04T16:24:11.000Z
+    ---
+    
+    #weekly-todo
+    
+    Last week: [2024-09-22](../work/0193acd4fa3574698c36c4514b907c70.md)
+    
+    **I am on call this week** [On call week of 2024-09-30](../persona/0193acd4fa45731f81350d4443c1ed16.md)
+    
+    ## Monday
+    
+    `);
+
     const parsed = parseMarkdown(content);
     expect(parsed.children[0].type).to.equal("yaml");
     expect(parsed.children[0].value).to.equal(
@@ -656,4 +661,19 @@ Last week: [2024-09-22](../work/0193acd4fa3574698c36c4514b907c70.md)
       updatedAt: "2024-11-04T16:24:11.000Z",
     });
   });
+
+  it("handles colons in front matter titles", function () {
+    const content = dedent(`---
+    title: "2024-09-29: A day to remember"
+    ---
+    
+    Last week I...
+    `);
+
+    const parsed = parseMarkdown(content);
+    const frontMatter = yaml.parse(parsed.children[0].value as string);
+    expect(frontMatter).to.deep.equal({
+      title: "2024-09-29: A day to remember",
+    });
+  });
 });
diff --git a/src/markdown/index.ts b/src/markdown/index.ts
@@ -54,17 +54,18 @@ function wrapImages(tree: mdast.Root) {
   return tree;
 }
 
-// The importer has additional support for #tag and [[WikiLink]], but converts them
+// During import (processing) parse #tag and [[WikiLink]]; importer converts them
 // to Chronicles tags and markdown links. Future versions may support these properly.
-export const parseMarkdownForImport = (markdown: string): mdast.Root => {
+export const parseMarkdownForImportProcessing = (
+  markdown: string,
+): mdast.Root => {
   return fromMarkdown(markdown, {
     extensions: [gfm(), ofmTag(), ofmWikilink(), frontmatter(["yaml"])],
     mdastExtensions: [
       gfmFromMarkdown(),
       ofmTagFromMarkdown(),
       ofmWikilinkFromMarkdown(),
       // https://github.com/micromark/micromark-extension-frontmatter?tab=readme-ov-file#preset
-      // todo: support toml (need toml parser)
       frontmatterFromMarkdown(["yaml"]),
     ],
   });

diff --git a/src/markdown/test-utils.ts b/src/markdown/test-utils.ts
@@ -1,7 +1,7 @@
 import { Root } from "mdast";
 
 import {
-  parseMarkdownForImport as parseMarkdownForImportRaw,
+  parseMarkdownForImportProcessing as parseMarkdownForImportRaw,
   parseMarkdown as parseMarkdownRaw,
 } from "./index.js";
 
@@ -43,3 +43,53 @@ export function dig(obj: any, path: string) {
     }
   }, obj);
 }
+
+// Adapted from https://github.com/MartinKolarik/dedent-js
+// Copyright (c) 2015 Martin Kolárik. Released under the MIT license.
+export function dedent(
+  templateStrings: TemplateStringsArray | string,
+  ...values: any[]
+) {
+  let matches = [];
+  let strings =
+    typeof templateStrings === "string"
+      ? [templateStrings]
+      : templateStrings.slice();
+
+  // 1. Remove trailing whitespace.
+  strings[strings.length - 1] = strings[strings.length - 1].replace(
+    /\r?\n([\t ]*)$/,
+    "",
+  );
+
+  // 2. Find all line breaks to determine the highest common indentation level.
+  for (let i = 0; i < strings.length; i++) {
+    let match;
+
+    if ((match = strings[i].match(/\n[\t ]+/g))) {
+      matches.push(...match);
+    }
+  }
+
+  // 3. Remove the common indentation from all strings.
+  if (matches.length) {
+    let size = Math.min(...matches.map((value) => value.length - 1));
+    let pattern = new RegExp(`\n[\t ]{${size}}`, "g");
+
+    for (let i = 0; i < strings.length; i++) {
+      strings[i] = strings[i].replace(pattern, "\n");
+    }
+  }
+
+  // 4. Remove leading whitespace.
+  strings[0] = strings[0].replace(/^\r?\n/, "");
+
+  // 5. Perform interpolation.
+  let string = strings[0];
+
+  for (let i = 0; i < values.length; i++) {
+    string += values[i] + strings[i + 1];
+  }
+
+  return string;
+}
diff --git a/src/preload/client/importer.ts b/src/preload/client/importer.ts
@@ -21,11 +21,11 @@ import { uuidv7obj } from "uuidv7";
 import {
   isNoteLink,
   mdastToString,
-  parseMarkdownForImport as stringToMdast,
+  parseMarkdownForImportProcessing,
 } from "../../markdown";
 import { FilesImportResolver } from "./importer/FilesImportResolver";
 import { SourceType } from "./importer/SourceType";
-import { parseTitleAndFrontMatter } from "./importer/frontmatter";
+import { parseTitleAndFrontMatterForImport } from "./importer/frontmatter";
 
 // UUID in Notion notes look like 32 character hex strings; make this somewhat more lenient
 const hexIdRegex = /\b[0-9a-f]{16,}\b/;
@@ -90,6 +90,20 @@ export class ImporterClient {
     private syncs: ISyncClient, // sync is keyword?
   ) {}
 
+  processPending = async () => {
+    const pendingImports = await this.knex("imports").where({
+      status: "pending",
+    });
+
+    for (const pendingImport of pendingImports) {
+      await this.processStagedNotes(
+        await this.ensureRoot(),
+        SourceType.Other,
+        new FilesImportResolver(this.knex, pendingImport.id, this.files),
+      );
+    }
+  };
+
   /**
    * Imports importDir into the chronicles root directory, grabbing all markdown
    * and linked files; makes the following changes:
@@ -104,7 +118,8 @@ export class ImporterClient {
     importDir: string,
     sourceType: SourceType = SourceType.Other,
   ) => {
-    await this.clearImportTables();
+    // await this.clearImportTables();
+    await this.clearIncomplete();
     const importerId = uuidv7obj().toHex();
     const chroniclesRoot = await this.ensureRoot();
 
@@ -165,7 +180,7 @@ export class ImporterClient {
 
     for await (const file of Files.walk(
       importDir,
-      30, // avoid infinite loops, random guess at reasoable depth
+      10, // random guess at reasoable max depth
 
       (dirent) => {
         // Skip hidden files and directories
@@ -210,7 +225,7 @@ export class ImporterClient {
 
     try {
       // todo: fallback title to filename - uuid
-      const { frontMatter, body } = parseTitleAndFrontMatter(
+      const { frontMatter, body } = parseTitleAndFrontMatterForImport(
         contents,
         name,
         sourceType,
@@ -270,8 +285,32 @@ export class ImporterClient {
 
       await this.knex("import_notes").insert(stagedNote);
     } catch (e) {
-      // todo: this error handler is far too big, obviously
-      console.error("Error processing note", file.path, e);
+      // todo: this error handler is too big
+      if ((e as any).code === "SQLITE_CONSTRAINT_PRIMARYKEY") {
+        console.log("Skipping re-import of note", file.path);
+      } else {
+        // track staging errors for review. For example, if a note has a title
+        // that is too long, or a front-matter key that is not supported, etc, user
+        // can use table logs to fix and re-run th e import
+        try {
+          const noteId = uuidv7obj().toHex();
+          await this.knex("import_notes").insert({
+            importerId,
+            sourcePath: file.path,
+            content: contents,
+            error: (e as any).message,
+
+            // note: these all have non-null / unique constraints:
+            chroniclesId: noteId,
+            chroniclesPath: "staging_error",
+            journal: "staging_error",
+            frontMatter: {},
+            status: "staging_error",
+          });
+        } catch (err) {
+          console.error("Error tracking staging import error", file.path, err);
+        }
+      }
     }
   };
 
@@ -300,12 +339,18 @@ export class ImporterClient {
 
     const items = await this.knex<StagedNote>("import_notes").where({
       importerId,
+      status: "pending",
     });
 
     for await (const item of items) {
       const frontMatter: FrontMatter = JSON.parse(item.frontMatter);
 
-      const mdast = stringToMdast(item.content) as any as mdast.Root;
+      // note: At this stage, we parse ofmTags and ofmWikilinks, to convert them to
+      // Chronicles tags and markdown links; they are not supported natively in Chronicles
+      // as of now.
+      const mdast = parseMarkdownForImportProcessing(
+        item.content,
+      ) as any as mdast.Root;
       await this.updateNoteLinks(mdast, item, linkMapping, wikiLinkMapping);
 
       // NOTE: A bit hacky: When we update file links, we also mark the file as referenced
@@ -374,30 +419,46 @@ export class ImporterClient {
   // 1. Delete notes directory
   // 2. Run this command
   // 3. Re-run import
-  private clearImportTables = async () => {
+  clearImportTables = async () => {
     await this.db.exec("DELETE FROM import_notes");
     await this.db.exec("DELETE FROM import_files");
     await this.db.exec("DELETE FROM imports");
   };
 
+  // todo: optionally allow re-importing form a specific import directory by clearing
+  // all imports
+
+  // Clear errored or stuck notes so re-import can be attempted; all notes that
+  // are not in the 'note_created' state are deleted.
+  clearIncomplete = async () => {
+    await this.knex("import_notes").not.where({ status: "note_created" }).del();
+  };
+
   // Pull all staged notes and generate a mapping of original file path
   // (sourcePath) to the new file path (chroniclesPath). This is used to update
   // links in the notes after they are moved.
   private noteLinksMapping = async (importerId: string) => {
-    let linkMapping: Record<string, { journal: string; chroniclesId: string }> =
-      {};
-
-    const importedItems = await this.knex("import_notes")
-      .where({ importerId })
-      .select("sourcePath", "journal", "chroniclesId");
+    try {
+      let linkMapping: Record<
+        string,
+        { journal: string; chroniclesId: string }
+      > = {};
+
+      const importedItems = await this.knex("import_notes")
+        .where({ importerId })
+        .select("sourcePath", "journal", "chroniclesId");
+
+      for (const item of importedItems) {
+        if ("error" in item && item.error) continue;
+        const { journal, chroniclesId, sourcePath } = item;
+        linkMapping[sourcePath] = { journal, chroniclesId };
+      }
 
-    for (const item of importedItems) {
-      if ("error" in item && item.error) continue;
-      const { journal, chroniclesId, sourcePath } = item;
-      linkMapping[sourcePath] = { journal, chroniclesId };
+      return linkMapping;
+    } catch (err) {
+      console.error("Error generating link mappings", err);
+      throw err;
     }
-
-    return linkMapping;
   };
 
   // Pull all staged notes and generate a mapping of original note title
@@ -409,11 +470,12 @@ export class ImporterClient {
 
     const importedItems = await this.knex("import_notes")
       .where({ importerId })
-      .select("title", "journal", "chroniclesId");
+      .select("frontMatter", "journal", "chroniclesId", "error");
 
     for (const item of importedItems) {
       if ("error" in item && item.error) continue;
-      const { journal, chroniclesId, title } = item;
+      const { journal, chroniclesId } = item;
+      const title = JSON.parse(item.frontMatter).title;
       linkMapping[title] = { journal, chroniclesId };
     }
 

diff --git a/src/preload/client/importer/FilesImportResolver.ts b/src/preload/client/importer/FilesImportResolver.ts
@@ -38,7 +38,7 @@ export class FilesImportResolver {
   ): Promise<string | undefined> => {
     // check db for chronicles id matching name, if any
     const result = await this.knex("import_files")
-      .where({ filename: name })
+      .where({ filename: name, importerId: this.importerId })
       .select("chroniclesId", "extension")
       .first()!;