Skip to content

Commit

Permalink
maintain import tables; document clearing; fix import bugs (#284)
Browse files Browse the repository at this point in the history
- re-work import process to maintain data in import table by default
- fix staging error tracking so staging errors are kept but cleared on re-run
- expose clear import tables button on settings, document it
- fix bug where tags or wikilinks in document would cause import to fail at staging phase
- fix bug where same filename from different import would be preferentially used
- fix bug where empty document would fail to import (potentially leaving valid title / frontmatter behind)
- add success message when clear import button succeeds
  • Loading branch information
cloverich authored Dec 26, 2024
1 parent 7b7f772 commit 228e3ba
Show file tree
Hide file tree
Showing 10 changed files with 359 additions and 136 deletions.
1 change: 0 additions & 1 deletion src/electron/migrations/20211005142122.sql
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ CREATE TABLE IF NOT EXISTS "import_notes" (
"sourcePath" TEXT NOT NULL PRIMARY KEY,
"sourceId" TEXT,
"error" BOOLEAN,
"title" TEXT NOT NULL,
"journal" TEXT NOT NULL,
"frontMatter" TEXT,
"content" TEXT
Expand Down
56 changes: 38 additions & 18 deletions src/markdown/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ import path from "path";
import yaml from "yaml";

import { slateToString, stringToSlate } from "./index.js";
import { dig, parseMarkdown, parseMarkdownForImport } from "./test-utils.js";
import {
dedent,
dig,
parseMarkdown,
parseMarkdownForImport,
} from "./test-utils.js";

// Tests can structure the data this way and use runTests to
// test the various conversions.
Expand Down Expand Up @@ -619,26 +624,26 @@ describe("Whacky shit", function () {
});

describe("front matter parsing", function () {
const content = `---
title: 2024-09-29
tags: weekly-todo
createdAt: 2024-09-30T17:50:22.000Z
updatedAt: 2024-11-04T16:24:11.000Z
---
#weekly-todo
Last week: [2024-09-22](../work/0193acd4fa3574698c36c4514b907c70.md)
**I am on call this week** [On call week of 2024-09-30](../persona/0193acd4fa45731f81350d4443c1ed16.md)
## Monday
`;

// A very basic "it works" test
// todo: End to end test with a real document, asserting against the database values
it("parses front matter as an mdast node, and can be parsed with yaml.parse", function () {
const content = dedent(`---
title: 2024-09-29
tags: weekly-todo
createdAt: 2024-09-30T17:50:22.000Z
updatedAt: 2024-11-04T16:24:11.000Z
---
#weekly-todo
Last week: [2024-09-22](../work/0193acd4fa3574698c36c4514b907c70.md)
**I am on call this week** [On call week of 2024-09-30](../persona/0193acd4fa45731f81350d4443c1ed16.md)
## Monday
`);

const parsed = parseMarkdown(content);
expect(parsed.children[0].type).to.equal("yaml");
expect(parsed.children[0].value).to.equal(
Expand All @@ -656,4 +661,19 @@ Last week: [2024-09-22](../work/0193acd4fa3574698c36c4514b907c70.md)
updatedAt: "2024-11-04T16:24:11.000Z",
});
});

it("handles colons in front matter titles", function () {
const content = dedent(`---
title: "2024-09-29: A day to remember"
---
Last week I...
`);

const parsed = parseMarkdown(content);
const frontMatter = yaml.parse(parsed.children[0].value as string);
expect(frontMatter).to.deep.equal({
title: "2024-09-29: A day to remember",
});
});
});
7 changes: 4 additions & 3 deletions src/markdown/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,18 @@ function wrapImages(tree: mdast.Root) {
return tree;
}

// The importer has additional support for #tag and [[WikiLink]], but converts them
// During import (processing) parse #tag and [[WikiLink]]; importer converts them
// to Chronicles tags and markdown links. Future versions may support these properly.
export const parseMarkdownForImport = (markdown: string): mdast.Root => {
export const parseMarkdownForImportProcessing = (
markdown: string,
): mdast.Root => {
return fromMarkdown(markdown, {
extensions: [gfm(), ofmTag(), ofmWikilink(), frontmatter(["yaml"])],
mdastExtensions: [
gfmFromMarkdown(),
ofmTagFromMarkdown(),
ofmWikilinkFromMarkdown(),
// https://github.com/micromark/micromark-extension-frontmatter?tab=readme-ov-file#preset
// todo: support toml (need toml parser)
frontmatterFromMarkdown(["yaml"]),
],
});
Expand Down
52 changes: 51 additions & 1 deletion src/markdown/test-utils.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Root } from "mdast";

import {
parseMarkdownForImport as parseMarkdownForImportRaw,
parseMarkdownForImportProcessing as parseMarkdownForImportRaw,
parseMarkdown as parseMarkdownRaw,
} from "./index.js";

Expand Down Expand Up @@ -43,3 +43,53 @@ export function dig(obj: any, path: string) {
}
}, obj);
}

// Adapted from https://github.com/MartinKolarik/dedent-js
// Copyright (c) 2015 Martin Kolárik. Released under the MIT license.
export function dedent(
templateStrings: TemplateStringsArray | string,
...values: any[]
) {
let matches = [];
let strings =
typeof templateStrings === "string"
? [templateStrings]
: templateStrings.slice();

// 1. Remove trailing whitespace.
strings[strings.length - 1] = strings[strings.length - 1].replace(
/\r?\n([\t ]*)$/,
"",
);

// 2. Find all line breaks to determine the highest common indentation level.
for (let i = 0; i < strings.length; i++) {
let match;

if ((match = strings[i].match(/\n[\t ]+/g))) {
matches.push(...match);
}
}

// 3. Remove the common indentation from all strings.
if (matches.length) {
let size = Math.min(...matches.map((value) => value.length - 1));
let pattern = new RegExp(`\n[\t ]{${size}}`, "g");

for (let i = 0; i < strings.length; i++) {
strings[i] = strings[i].replace(pattern, "\n");
}
}

// 4. Remove leading whitespace.
strings[0] = strings[0].replace(/^\r?\n/, "");

// 5. Perform interpolation.
let string = strings[0];

for (let i = 0; i < values.length; i++) {
string += values[i] + strings[i + 1];
}

return string;
}
108 changes: 85 additions & 23 deletions src/preload/client/importer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ import { uuidv7obj } from "uuidv7";
import {
isNoteLink,
mdastToString,
parseMarkdownForImport as stringToMdast,
parseMarkdownForImportProcessing,
} from "../../markdown";
import { FilesImportResolver } from "./importer/FilesImportResolver";
import { SourceType } from "./importer/SourceType";
import { parseTitleAndFrontMatter } from "./importer/frontmatter";
import { parseTitleAndFrontMatterForImport } from "./importer/frontmatter";

// UUID in Notion notes look like 32 character hex strings; make this somewhat more lenient
const hexIdRegex = /\b[0-9a-f]{16,}\b/;
Expand Down Expand Up @@ -90,6 +90,20 @@ export class ImporterClient {
private syncs: ISyncClient, // sync is keyword?
) {}

processPending = async () => {
const pendingImports = await this.knex("imports").where({
status: "pending",
});

for (const pendingImport of pendingImports) {
await this.processStagedNotes(
await this.ensureRoot(),
SourceType.Other,
new FilesImportResolver(this.knex, pendingImport.id, this.files),
);
}
};

/**
* Imports importDir into the chronicles root directory, grabbing all markdown
* and linked files; makes the following changes:
Expand All @@ -104,7 +118,8 @@ export class ImporterClient {
importDir: string,
sourceType: SourceType = SourceType.Other,
) => {
await this.clearImportTables();
// await this.clearImportTables();
await this.clearIncomplete();
const importerId = uuidv7obj().toHex();
const chroniclesRoot = await this.ensureRoot();

Expand Down Expand Up @@ -165,7 +180,7 @@ export class ImporterClient {

for await (const file of Files.walk(
importDir,
30, // avoid infinite loops, random guess at reasoable depth
10, // random guess at reasoable max depth

(dirent) => {
// Skip hidden files and directories
Expand Down Expand Up @@ -210,7 +225,7 @@ export class ImporterClient {

try {
// todo: fallback title to filename - uuid
const { frontMatter, body } = parseTitleAndFrontMatter(
const { frontMatter, body } = parseTitleAndFrontMatterForImport(
contents,
name,
sourceType,
Expand Down Expand Up @@ -270,8 +285,32 @@ export class ImporterClient {

await this.knex("import_notes").insert(stagedNote);
} catch (e) {
// todo: this error handler is far too big, obviously
console.error("Error processing note", file.path, e);
// todo: this error handler is too big
if ((e as any).code === "SQLITE_CONSTRAINT_PRIMARYKEY") {
console.log("Skipping re-import of note", file.path);
} else {
// track staging errors for review. For example, if a note has a title
// that is too long, or a front-matter key that is not supported, etc, user
// can use table logs to fix and re-run th e import
try {
const noteId = uuidv7obj().toHex();
await this.knex("import_notes").insert({
importerId,
sourcePath: file.path,
content: contents,
error: (e as any).message,

// note: these all have non-null / unique constraints:
chroniclesId: noteId,
chroniclesPath: "staging_error",
journal: "staging_error",
frontMatter: {},
status: "staging_error",
});
} catch (err) {
console.error("Error tracking staging import error", file.path, err);
}
}
}
};

Expand Down Expand Up @@ -300,12 +339,18 @@ export class ImporterClient {

const items = await this.knex<StagedNote>("import_notes").where({
importerId,
status: "pending",
});

for await (const item of items) {
const frontMatter: FrontMatter = JSON.parse(item.frontMatter);

const mdast = stringToMdast(item.content) as any as mdast.Root;
// note: At this stage, we parse ofmTags and ofmWikilinks, to convert them to
// Chronicles tags and markdown links; they are not supported natively in Chronicles
// as of now.
const mdast = parseMarkdownForImportProcessing(
item.content,
) as any as mdast.Root;
await this.updateNoteLinks(mdast, item, linkMapping, wikiLinkMapping);

// NOTE: A bit hacky: When we update file links, we also mark the file as referenced
Expand Down Expand Up @@ -374,30 +419,46 @@ export class ImporterClient {
// 1. Delete notes directory
// 2. Run this command
// 3. Re-run import
private clearImportTables = async () => {
clearImportTables = async () => {
await this.db.exec("DELETE FROM import_notes");
await this.db.exec("DELETE FROM import_files");
await this.db.exec("DELETE FROM imports");
};

// todo: optionally allow re-importing form a specific import directory by clearing
// all imports

// Clear errored or stuck notes so re-import can be attempted; all notes that
// are not in the 'note_created' state are deleted.
clearIncomplete = async () => {
await this.knex("import_notes").not.where({ status: "note_created" }).del();
};

// Pull all staged notes and generate a mapping of original file path
// (sourcePath) to the new file path (chroniclesPath). This is used to update
// links in the notes after they are moved.
private noteLinksMapping = async (importerId: string) => {
let linkMapping: Record<string, { journal: string; chroniclesId: string }> =
{};

const importedItems = await this.knex("import_notes")
.where({ importerId })
.select("sourcePath", "journal", "chroniclesId");
try {
let linkMapping: Record<
string,
{ journal: string; chroniclesId: string }
> = {};

const importedItems = await this.knex("import_notes")
.where({ importerId })
.select("sourcePath", "journal", "chroniclesId");

for (const item of importedItems) {
if ("error" in item && item.error) continue;
const { journal, chroniclesId, sourcePath } = item;
linkMapping[sourcePath] = { journal, chroniclesId };
}

for (const item of importedItems) {
if ("error" in item && item.error) continue;
const { journal, chroniclesId, sourcePath } = item;
linkMapping[sourcePath] = { journal, chroniclesId };
return linkMapping;
} catch (err) {
console.error("Error generating link mappings", err);
throw err;
}

return linkMapping;
};

// Pull all staged notes and generate a mapping of original note title
Expand All @@ -409,11 +470,12 @@ export class ImporterClient {

const importedItems = await this.knex("import_notes")
.where({ importerId })
.select("title", "journal", "chroniclesId");
.select("frontMatter", "journal", "chroniclesId", "error");

for (const item of importedItems) {
if ("error" in item && item.error) continue;
const { journal, chroniclesId, title } = item;
const { journal, chroniclesId } = item;
const title = JSON.parse(item.frontMatter).title;
linkMapping[title] = { journal, chroniclesId };
}

Expand Down
2 changes: 1 addition & 1 deletion src/preload/client/importer/FilesImportResolver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ export class FilesImportResolver {
): Promise<string | undefined> => {
// check db for chronicles id matching name, if any
const result = await this.knex("import_files")
.where({ filename: name })
.where({ filename: name, importerId: this.importerId })
.select("chroniclesId", "extension")
.first()!;

Expand Down
Loading

0 comments on commit 228e3ba

Please sign in to comment.