Skip to content

Commit

Permalink
add markdown/notion importer (#258)
Browse files Browse the repository at this point in the history
* add markdown/notion importer

- add importer for my notion export
- re-name prior importer client to sync client
- add diff dependency for temporary hacky testing helper
- allow creating documents without indexing, to support import-then-sync approach
- types to es2020 for a misc lib function I needed

* track import items; resolve note links

- add table to track import items and import links to support importing in steps
- resolve notion note links while importing; if good update them to chronicles format so they work as note links

very hacky / messy

* also import nested folders; imported name validation / fixing

* clean-up tests; properly implement them

- re-work tests to separate known issues with my current Notion export, vs hypothetical ones and non-Notion import issues
- handle remaining cases
- clean-up prasing related logging

* better journal name generation

- improve name-generation by using just folder name, and allowing root folder to be a journal name; slicing to handle length. Tons of edge cases but pretty decent overall

* silence note link errors, rely on import items instead

* parse Category as journal name

* move (copy) file references, track import status

- now referenced files are imported to the _attachments directory
- import_item status is now updated to help with debugging

next steps are to debug some lingering failed imports (~5/200 notes), then track all links to confirm everything is imported (or not) as expected, then move on to clean-up

* fix race conditions / missing await

- fix some missing awaits that resulted in race conditions; all notes now import with note and file references
- add basic status tracking on import items; very messy

needs refactoring and cleanup

* fix a few file import edge cases

- if file link points to a url, dont try to import it (as a file); ignore instead
- if file link has query params (e.g. ?size=800), strip them

Fixes about 25 file import errors (all valid files now import)

* isolate test and front matter functions

- move faux tests into own routine, add button for it
- move front-matter code into isolated module
- light clean-up re-naming part 2 / ?

* use staging table for file references

- use a staging table for file references when importing; its slower but easier to debug validate
- remove import links table / tracking, unused at this point
- some misc. clean-up (still v. messy, but improving)

* more cleanup

- move all db code to knex
- simplify note links updating
- organize note links / file links logic so its grouped in importer
- clean-up many comments
- drop error tracking on import items except for final step

* minor cleanup; hacky test

* re-name import_notes
  • Loading branch information
cloverich authored Nov 7, 2024
1 parent d1e27f7 commit 4fc3ae7
Show file tree
Hide file tree
Showing 21 changed files with 1,665 additions and 85 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"chai": "^5.0.3",
"class-variance-authority": "^0.7.0",
"date-fns": "^3.3.1",
"deep-object-diff": "^1.1.9",
"electron": "^28.2.0",
"esbuild": "^0.20.0",
"evergreen-ui": "^7.1.9",
Expand Down
17 changes: 6 additions & 11 deletions src/electron/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,9 @@ app.on("activate", () => {
// event.reply("preferences-updated");
// });

// Preferences in UI allows user to specify user files directory
ipcMain.on("select-chronicles-root", async (event, arg) => {
// Preferences in UI allows user to specify chronicles root
// and imports directories
ipcMain.on("select-directory", async (event, arg) => {
if (!mainWindow) {
console.error(
"received request to open file picker but mainWindow is undefined",
Expand All @@ -325,35 +326,29 @@ ipcMain.on("select-chronicles-root", async (event, arg) => {

// user selected cancel
if (!filepath) {
event.reply("preferences-updated", {
name: "NOTES_DIR",
event.reply("directory-selected", {
value: null,
error: null,
});
return;
}

// todo: feedback to user if error
// https://github.com/cloverich/chronicles/issues/52
try {
ensureDir(filepath);
settings.set("NOTES_DIR", filepath);
} catch (err) {
console.error(
`Error accessing directory ${filepath}; canceling update to NOTES_DIR`,
err,
);
event.reply("preferences-updated", {
name: "NOTES_DIR",
event.reply("directory-selected", {
value: null,
error: `Error accessing directory ${filepath}; canceling update to NOTES_DIR`,
});
return;
}

// NOTE: Do not change this name without updating UI handlers
event.reply("preferences-updated", {
name: "NOTES_DIR",
event.reply("directory-selected", {
value: filepath,
error: null,
});
Expand Down
39 changes: 38 additions & 1 deletion src/electron/migrations/20211005142122.sql
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,41 @@ CREATE TABLE IF NOT EXISTS "document_tags" (

CREATE INDEX IF NOT EXISTS "documents_title_idx" ON "documents"("title");
CREATE INDEX IF NOT EXISTS "documents_createdat_idx" ON "documents"("createdAt");
CREATE INDEX IF NOT EXISTS "tags_name_idx" ON "document_tags"("tag");
CREATE INDEX IF NOT EXISTS "tags_name_idx" ON "document_tags"("tag");


-- DROP TABLE IF EXISTS "imports";
-- DROP TABLE IF EXISTS "import_notes";
-- DROP TABLE IF EXISTS "import_files";

CREATE TABLE IF NOT EXISTS "imports" (
"id" TEXT NOT NULL PRIMARY KEY,
"importDir" TEXT NOT NULL,
"createdAt" TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
"status" TEXT NOT NULL -- active, inactive
);

CREATE TABLE IF NOT EXISTS "import_files" (
"importerId" TEXT NOT NULL,
"sourcePathResolved" TEXT NOT NULL PRIMARY KEY,
"status" TEXT NOT NULL DEFAULT "pending",
"chroniclesId" TEXT NOT NULL,
"extension" TEXT NOT NULL,
"error" TEXT
);

-- First, Import Items table
CREATE TABLE IF NOT EXISTS "import_notes" (
"importerId" TEXT NOT NULL,
"status" TEXT NOT NULL, -- success, error
"chroniclesId" TEXT NOT NULL,
"chroniclesPath" TEXT NOT NULL,
"sourcePath" TEXT NOT NULL PRIMARY KEY,
"sourceId" TEXT,
"error" BOOLEAN,
"title" TEXT NOT NULL,
"journal" TEXT NOT NULL,
"frontMatter" TEXT,
"content" TEXT
);
2 changes: 2 additions & 0 deletions src/hooks/stores/journals.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ export class JournalsStore {
}
};

// todo: client.journals has its own validation that is more robust than this
// and isn ow exported us that
validateName = (name: string) => {
name = name.trim();
if (!name) return ["Journal name cannot be empty", name];
Expand Down
14 changes: 7 additions & 7 deletions src/markdown/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { Node as SNode } from "slate";
import { unified } from "unified";
import { remarkToSlate, slateToRemark } from "./remark-slate-transformer";

import * as Mdast from "ts-mdast";
export * from "ts-mdast";

// I usually forget how unified works, so just leaving some notes for reference
Expand Down Expand Up @@ -34,12 +35,11 @@ const stringToSlateProcessor = parser
.use(remarkToSlate);

export function mdastToString(mdast: any): string {
// todo: types
return stringifier.stringify(mdast) as any;
return stringifier.stringify(mdast) as any as string;
}

export function stringToMdast(text: string) {
return parser.parse(text);
export function stringToMdast(text: string): Mdast.Root {
return parser.parse(text) as any as Mdast.Root;
}

export function stringToSlate(text: string) {
Expand All @@ -62,11 +62,11 @@ export function stringToSlate(text: string) {
* debug helper function to see the slate to mdast conversion
* before stringifying
*/
export function slateToMdast(nodes: SNode[]): any {
export function slateToMdast(nodes: SNode[]): Mdast.Root {
return slateToStringProcessor.runSync({
type: "root",
children: nodes,
});
}) as any as Mdast.Root;
}

export function slateToString(nodes: SNode[]): string {
Expand All @@ -78,5 +78,5 @@ export function slateToString(nodes: SNode[]): string {
children: nodes,
});

return slateToStringProcessor.stringify(ast) as any;
return slateToStringProcessor.stringify(ast) as any as string;
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import {
ELEMENT_UL,
} from "@udecode/plate"; // todo: sub-package which has only elements?

import { toSlateLink } from "../../../views/edit/editor/features/note-linking/toMdast";
import { toSlateNoteLink } from "../../../views/edit/editor/features/note-linking/toMdast";

export type Decoration = {
[key in (
Expand Down Expand Up @@ -378,7 +378,7 @@ export type Link = ReturnType<typeof createLink>;
function createLink(node: mdast.Link, deco: Decoration) {
const { children, url, title } = node;

const res = toSlateLink({ url, children, deco, convertNodes });
const res = toSlateNoteLink({ url, children, deco, convertNodes });

if (res) return res;

Expand Down
26 changes: 21 additions & 5 deletions src/preload/client/documents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ export class DocumentsClient {
} else {
args.createdAt = new Date().toISOString();
args.updatedAt = new Date().toISOString();
id = await this.createDocument(args);
[id] = await this.createDocument(args);
}

return this.findById({ id });
Expand All @@ -224,11 +224,27 @@ updatedAt: ${document.updatedAt}
return `${fm}\n\n${document.content}`;
};

private createDocument = async (args: SaveRequest): Promise<string> => {
const id = uuidv7();
/**
* Create (upload) a new document and index it
* @param args - The document to create
* @param index - Whether to index the document - set to false when importing (we import, then call `sync` instead)
*/
createDocument = async (
args: SaveRequest,
index: boolean = true,
): Promise<[string, string]> => {
const id = args.id || uuidv7();
const content = this.contentsWithFrontMatter(args);
await this.files.uploadDocument({ id, content }, args.journal);
return this.createIndex({ id, ...args });
const docPath = await this.files.uploadDocument(
{ id, content },
args.journal,
);

if (index) {
return [this.createIndex({ id, ...args }), docPath];
} else {
return [id, docPath];
}
};

private updateDocument = async (args: SaveRequest): Promise<void> => {
Expand Down
56 changes: 51 additions & 5 deletions src/preload/client/files.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import Store from "electron-store";

import fs from "fs";
import mkdirp from "mkdirp";
import path from "path";
import { uuidv7 } from "uuidv7";
const { readFile, writeFile, access, stat } = fs.promises;
Expand Down Expand Up @@ -139,6 +138,7 @@ export class FilesClient {
* @param baseDir - Chronicles root directory
* @param document
* @param journalName
* @returns - The path to the saved document
*/
uploadDocument = async (
document: { id: string; content: string },
Expand All @@ -149,8 +149,9 @@ export class FilesClient {
document.id,
);

await mkdirp(journalPath);
await fs.promises.mkdir(journalPath, { recursive: true });
await fs.promises.writeFile(docPath, document.content);
return docPath;
};

deleteDocument = async (documentId: string, journal: string) => {
Expand Down Expand Up @@ -197,14 +198,46 @@ export class FilesClient {
return { docPath: resolvedDocPath, journalPath: resolvedJournalPath };
};

/**
* Check if a filepath exists and can be accessed; useful for confirming
* imported / updated links are valid.
*
* @param filepath
*/
validFile = async (
filepath: string,
propagateErr: boolean = true,
): Promise<boolean> => {
try {
const file = await stat(filepath);
if (!file.isFile()) {
throw new Error(
`ensureFile called but ${filepath} already exists as a directory`,
);
}
} catch (err: any) {
if (err.code !== "ENOENT" && propagateErr) throw err;
return false;
}

// todo: idk if this is how the API is supposed to be used
try {
await access(filepath, fs.constants.R_OK | fs.constants.W_OK);
return true;
} catch (err: any) {
if (err.code !== "ENOENT" && propagateErr) throw err;
return false;
}
};

/**
* Ensure directory exists and can be accessed
*
* WARN: Logic to handle errors when writing / reading files from directory
* are still needed as access check may be innaccurate or could change while
* app is running.
*/
async ensureDir(directory: string): Promise<void> {
ensureDir = async (directory: string): Promise<void> => {
try {
const dir = await stat(directory);
if (!dir.isDirectory()) {
Expand All @@ -214,12 +247,25 @@ export class FilesClient {
}
} catch (err: any) {
if (err.code !== "ENOENT") throw err;
await mkdirp(directory);
await fs.promises.mkdir(directory, { recursive: true });
}

// NOTE: Documentation suggests Windows may report ok here, but then choke
// when actually writing. Better to move this logic to the actual file
// upload handlers.
await access(directory, fs.constants.R_OK | fs.constants.W_OK);
}
};

copyFile = async (src: string, dest: string): Promise<string> => {
return new Promise<string>((resolve, reject) => {
fs.createReadStream(src)
.once("error", reject)
.pipe(
fs
.createWriteStream(dest)
.once("error", reject)
.once("close", () => resolve(dest)),
);
});
};
}
Loading

0 comments on commit 4fc3ae7

Please sign in to comment.