Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

maintain import tables; document clearing; fix import bugs #284

Merged
merged 1 commit into from
Dec 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/electron/migrations/20211005142122.sql
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ CREATE TABLE IF NOT EXISTS "import_notes" (
"sourcePath" TEXT NOT NULL PRIMARY KEY,
"sourceId" TEXT,
"error" BOOLEAN,
"title" TEXT NOT NULL,
"journal" TEXT NOT NULL,
"frontMatter" TEXT,
"content" TEXT
Expand Down
56 changes: 38 additions & 18 deletions src/markdown/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ import path from "path";
import yaml from "yaml";

import { slateToString, stringToSlate } from "./index.js";
import { dig, parseMarkdown, parseMarkdownForImport } from "./test-utils.js";
import {
dedent,
dig,
parseMarkdown,
parseMarkdownForImport,
} from "./test-utils.js";

// Tests can structure the data this way and use runTests to
// test the various conversions.
Expand Down Expand Up @@ -619,26 +624,26 @@ describe("Whacky shit", function () {
});

describe("front matter parsing", function () {
const content = `---
title: 2024-09-29
tags: weekly-todo
createdAt: 2024-09-30T17:50:22.000Z
updatedAt: 2024-11-04T16:24:11.000Z
---
#weekly-todo
Last week: [2024-09-22](../work/0193acd4fa3574698c36c4514b907c70.md)
**I am on call this week** [On call week of 2024-09-30](../persona/0193acd4fa45731f81350d4443c1ed16.md)
## Monday
`;

// A very basic "it works" test
// todo: End to end test with a real document, asserting against the database values
it("parses front matter as an mdast node, and can be parsed with yaml.parse", function () {
const content = dedent(`---
title: 2024-09-29
tags: weekly-todo
createdAt: 2024-09-30T17:50:22.000Z
updatedAt: 2024-11-04T16:24:11.000Z
---
#weekly-todo
Last week: [2024-09-22](../work/0193acd4fa3574698c36c4514b907c70.md)
**I am on call this week** [On call week of 2024-09-30](../persona/0193acd4fa45731f81350d4443c1ed16.md)
## Monday
`);

const parsed = parseMarkdown(content);
expect(parsed.children[0].type).to.equal("yaml");
expect(parsed.children[0].value).to.equal(
Expand All @@ -656,4 +661,19 @@ Last week: [2024-09-22](../work/0193acd4fa3574698c36c4514b907c70.md)
updatedAt: "2024-11-04T16:24:11.000Z",
});
});

it("handles colons in front matter titles", function () {
const content = dedent(`---
title: "2024-09-29: A day to remember"
---
Last week I...
`);

const parsed = parseMarkdown(content);
const frontMatter = yaml.parse(parsed.children[0].value as string);
expect(frontMatter).to.deep.equal({
title: "2024-09-29: A day to remember",
});
});
});
7 changes: 4 additions & 3 deletions src/markdown/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,18 @@ function wrapImages(tree: mdast.Root) {
return tree;
}

// The importer has additional support for #tag and [[WikiLink]], but converts them
// During import (processing) parse #tag and [[WikiLink]]; importer converts them
// to Chronicles tags and markdown links. Future versions may support these properly.
export const parseMarkdownForImport = (markdown: string): mdast.Root => {
export const parseMarkdownForImportProcessing = (
markdown: string,
): mdast.Root => {
return fromMarkdown(markdown, {
extensions: [gfm(), ofmTag(), ofmWikilink(), frontmatter(["yaml"])],
mdastExtensions: [
gfmFromMarkdown(),
ofmTagFromMarkdown(),
ofmWikilinkFromMarkdown(),
// https://github.com/micromark/micromark-extension-frontmatter?tab=readme-ov-file#preset
// todo: support toml (need toml parser)
frontmatterFromMarkdown(["yaml"]),
],
});
Expand Down
52 changes: 51 additions & 1 deletion src/markdown/test-utils.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Root } from "mdast";

import {
parseMarkdownForImport as parseMarkdownForImportRaw,
parseMarkdownForImportProcessing as parseMarkdownForImportRaw,
parseMarkdown as parseMarkdownRaw,
} from "./index.js";

Expand Down Expand Up @@ -43,3 +43,53 @@ export function dig(obj: any, path: string) {
}
}, obj);
}

// Adapted from https://github.com/MartinKolarik/dedent-js
// Copyright (c) 2015 Martin Kolárik. Released under the MIT license.
export function dedent(
templateStrings: TemplateStringsArray | string,
...values: any[]
) {
let matches = [];
let strings =
typeof templateStrings === "string"
? [templateStrings]
: templateStrings.slice();

// 1. Remove trailing whitespace.
strings[strings.length - 1] = strings[strings.length - 1].replace(
/\r?\n([\t ]*)$/,
"",
);

// 2. Find all line breaks to determine the highest common indentation level.
for (let i = 0; i < strings.length; i++) {
let match;

if ((match = strings[i].match(/\n[\t ]+/g))) {
matches.push(...match);
}
}

// 3. Remove the common indentation from all strings.
if (matches.length) {
let size = Math.min(...matches.map((value) => value.length - 1));
let pattern = new RegExp(`\n[\t ]{${size}}`, "g");

for (let i = 0; i < strings.length; i++) {
strings[i] = strings[i].replace(pattern, "\n");
}
}

// 4. Remove leading whitespace.
strings[0] = strings[0].replace(/^\r?\n/, "");

// 5. Perform interpolation.
let string = strings[0];

for (let i = 0; i < values.length; i++) {
string += values[i] + strings[i + 1];
}

return string;
}
108 changes: 85 additions & 23 deletions src/preload/client/importer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ import { uuidv7obj } from "uuidv7";
import {
isNoteLink,
mdastToString,
parseMarkdownForImport as stringToMdast,
parseMarkdownForImportProcessing,
} from "../../markdown";
import { FilesImportResolver } from "./importer/FilesImportResolver";
import { SourceType } from "./importer/SourceType";
import { parseTitleAndFrontMatter } from "./importer/frontmatter";
import { parseTitleAndFrontMatterForImport } from "./importer/frontmatter";

// UUID in Notion notes look like 32 character hex strings; make this somewhat more lenient
const hexIdRegex = /\b[0-9a-f]{16,}\b/;
Expand Down Expand Up @@ -90,6 +90,20 @@ export class ImporterClient {
private syncs: ISyncClient, // sync is keyword?
) {}

processPending = async () => {
const pendingImports = await this.knex("imports").where({
status: "pending",
});

for (const pendingImport of pendingImports) {
await this.processStagedNotes(
await this.ensureRoot(),
SourceType.Other,
new FilesImportResolver(this.knex, pendingImport.id, this.files),
);
}
};

/**
* Imports importDir into the chronicles root directory, grabbing all markdown
* and linked files; makes the following changes:
Expand All @@ -104,7 +118,8 @@ export class ImporterClient {
importDir: string,
sourceType: SourceType = SourceType.Other,
) => {
await this.clearImportTables();
// await this.clearImportTables();
await this.clearIncomplete();
const importerId = uuidv7obj().toHex();
const chroniclesRoot = await this.ensureRoot();

Expand Down Expand Up @@ -165,7 +180,7 @@ export class ImporterClient {

for await (const file of Files.walk(
importDir,
30, // avoid infinite loops, random guess at reasoable depth
10, // random guess at reasoable max depth

(dirent) => {
// Skip hidden files and directories
Expand Down Expand Up @@ -210,7 +225,7 @@ export class ImporterClient {

try {
// todo: fallback title to filename - uuid
const { frontMatter, body } = parseTitleAndFrontMatter(
const { frontMatter, body } = parseTitleAndFrontMatterForImport(
contents,
name,
sourceType,
Expand Down Expand Up @@ -270,8 +285,32 @@ export class ImporterClient {

await this.knex("import_notes").insert(stagedNote);
} catch (e) {
// todo: this error handler is far too big, obviously
console.error("Error processing note", file.path, e);
// todo: this error handler is too big
if ((e as any).code === "SQLITE_CONSTRAINT_PRIMARYKEY") {
console.log("Skipping re-import of note", file.path);
} else {
// track staging errors for review. For example, if a note has a title
// that is too long, or a front-matter key that is not supported, etc, user
// can use table logs to fix and re-run th e import
try {
const noteId = uuidv7obj().toHex();
await this.knex("import_notes").insert({
importerId,
sourcePath: file.path,
content: contents,
error: (e as any).message,

// note: these all have non-null / unique constraints:
chroniclesId: noteId,
chroniclesPath: "staging_error",
journal: "staging_error",
frontMatter: {},
status: "staging_error",
});
} catch (err) {
console.error("Error tracking staging import error", file.path, err);
}
}
}
};

Expand Down Expand Up @@ -300,12 +339,18 @@ export class ImporterClient {

const items = await this.knex<StagedNote>("import_notes").where({
importerId,
status: "pending",
});

for await (const item of items) {
const frontMatter: FrontMatter = JSON.parse(item.frontMatter);

const mdast = stringToMdast(item.content) as any as mdast.Root;
// note: At this stage, we parse ofmTags and ofmWikilinks, to convert them to
// Chronicles tags and markdown links; they are not supported natively in Chronicles
// as of now.
const mdast = parseMarkdownForImportProcessing(
item.content,
) as any as mdast.Root;
await this.updateNoteLinks(mdast, item, linkMapping, wikiLinkMapping);

// NOTE: A bit hacky: When we update file links, we also mark the file as referenced
Expand Down Expand Up @@ -374,30 +419,46 @@ export class ImporterClient {
// 1. Delete notes directory
// 2. Run this command
// 3. Re-run import
private clearImportTables = async () => {
clearImportTables = async () => {
await this.db.exec("DELETE FROM import_notes");
await this.db.exec("DELETE FROM import_files");
await this.db.exec("DELETE FROM imports");
};

// todo: optionally allow re-importing form a specific import directory by clearing
// all imports

// Clear errored or stuck notes so re-import can be attempted; all notes that
// are not in the 'note_created' state are deleted.
clearIncomplete = async () => {
await this.knex("import_notes").not.where({ status: "note_created" }).del();
};

// Pull all staged notes and generate a mapping of original file path
// (sourcePath) to the new file path (chroniclesPath). This is used to update
// links in the notes after they are moved.
private noteLinksMapping = async (importerId: string) => {
let linkMapping: Record<string, { journal: string; chroniclesId: string }> =
{};

const importedItems = await this.knex("import_notes")
.where({ importerId })
.select("sourcePath", "journal", "chroniclesId");
try {
let linkMapping: Record<
string,
{ journal: string; chroniclesId: string }
> = {};

const importedItems = await this.knex("import_notes")
.where({ importerId })
.select("sourcePath", "journal", "chroniclesId");

for (const item of importedItems) {
if ("error" in item && item.error) continue;
const { journal, chroniclesId, sourcePath } = item;
linkMapping[sourcePath] = { journal, chroniclesId };
}

for (const item of importedItems) {
if ("error" in item && item.error) continue;
const { journal, chroniclesId, sourcePath } = item;
linkMapping[sourcePath] = { journal, chroniclesId };
return linkMapping;
} catch (err) {
console.error("Error generating link mappings", err);
throw err;
}

return linkMapping;
};

// Pull all staged notes and generate a mapping of original note title
Expand All @@ -409,11 +470,12 @@ export class ImporterClient {

const importedItems = await this.knex("import_notes")
.where({ importerId })
.select("title", "journal", "chroniclesId");
.select("frontMatter", "journal", "chroniclesId", "error");

for (const item of importedItems) {
if ("error" in item && item.error) continue;
const { journal, chroniclesId, title } = item;
const { journal, chroniclesId } = item;
const title = JSON.parse(item.frontMatter).title;
linkMapping[title] = { journal, chroniclesId };
}

Expand Down
2 changes: 1 addition & 1 deletion src/preload/client/importer/FilesImportResolver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ export class FilesImportResolver {
): Promise<string | undefined> => {
// check db for chronicles id matching name, if any
const result = await this.knex("import_files")
.where({ filename: name })
.where({ filename: name, importerId: this.importerId })
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If no importerId, may grab files from different imports, specifically from different import directories. Since the current wikilinks do not handle duplicate file names, this could (did) result in e.g. a different image or video than in the original note being displayed (if they had the same name).

.select("chroniclesId", "extension")
.first()!;

Expand Down
Loading
Loading