From f494a5d032e67aa6a3dfa9ecef7ea6eb2cd943d6 Mon Sep 17 00:00:00 2001 From: chris <1010084+cloverich@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:23:22 -0800 Subject: [PATCH] When syncing ignore more files and directories, refactor walk (#281) - refactor: drop klaw, replace with nodes fs.opendir - do not descend into directories we don't intend to sync (hidden folders like .git, node_modules, _attachments in sync specifically) - Add more ignored files / folders and indicate which are skipped in preferences (todo: make configurable) - refactor: drop mkdirp (after discovering it mostly exists in fs now) --- package.json | 3 - src/electron/ensureDir.js | 9 ++- src/electron/userFilesInit.js | 1 - src/preload/client/files.ts | 18 +---- src/preload/client/importer.ts | 22 +++--- .../client/importer/FilesImportResolver.ts | 4 +- src/preload/client/sync.ts | 57 +++++++-------- src/preload/client/types.ts | 13 ++++ src/preload/files.ts | 73 +++++++++++++------ src/views/preferences/index.tsx | 19 +++++ yarn.lock | 16 +--- 11 files changed, 132 insertions(+), 103 deletions(-) diff --git a/package.json b/package.json index 8a0d7bc..218e6ac 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,6 @@ "better-sqlite3": "^9.2.2", "electron-store": "^8.0.1", "knex": "^2.5.0", - "mkdirp": "^1.0.4", "uuidv7": "^0.6.3" }, "devDependencies": { @@ -41,7 +40,6 @@ "@radix-ui/react-visually-hidden": "^1.1.0", "@types/better-sqlite3": "^5.4.0", "@types/chai": "^4.3.11", - "@types/klaw": "^3.0.1", "@types/luxon": "^2.4.0", "@types/mkdirp": "^1.0.1", "@types/mocha": "^7.0.2", @@ -56,7 +54,6 @@ "electron": "^28.2.0", "esbuild": "^0.20.0", "evergreen-ui": "^7.1.9", - "klaw": "^3.0.0", "lodash": "^4.17.21", "lucide-react": "^0.314.0", "luxon": "^2.4.0", diff --git a/src/electron/ensureDir.js b/src/electron/ensureDir.js index e23c109..55f1b28 100644 --- a/src/electron/ensureDir.js +++ b/src/electron/ensureDir.js @@ -1,11 +1,10 @@ const fs = require("fs"); -const mkdirp = require("mkdirp"); /** * Borrowed from api files, since its typescript and this is not * Reconcile that later */ -exports.ensureDir = function ensureDir(directory) { +exports.ensureDir = function ensureDir(directory, create = true) { if (!directory) { throw new Error("ensureDir called with no directory path"); } @@ -19,7 +18,11 @@ exports.ensureDir = function ensureDir(directory) { } } catch (err) { if (err.code !== "ENOENT") throw err; - mkdirp.sync(directory); + try { + fs.mkdirSync(directory, { recursive: true }); + } catch (err) { + if (err.code !== "EEXIST") throw err; + } } // NOTE: Documentation suggests Windows may report ok here, but then choke diff --git a/src/electron/userFilesInit.js b/src/electron/userFilesInit.js index 4204fb6..38f5bd4 100644 --- a/src/electron/userFilesInit.js +++ b/src/electron/userFilesInit.js @@ -1,6 +1,5 @@ const path = require("path"); const fs = require("fs"); -const mkdirp = require("mkdirp"); const settings = require("./settings"); const { ensureDir } = require("./ensureDir"); diff --git a/src/preload/client/files.ts b/src/preload/client/files.ts index cd44a79..eea781b 100644 --- a/src/preload/client/files.ts +++ b/src/preload/client/files.ts @@ -3,6 +3,7 @@ import Store from "electron-store"; import fs from "fs"; import path from "path"; import { uuidv7obj } from "uuidv7"; +import { Files } from "../files"; const { readFile, writeFile, access, stat } = fs.promises; interface UploadResponse { @@ -149,7 +150,7 @@ export class FilesClient { document.id, ); - await fs.promises.mkdir(journalPath, { recursive: true }); + await Files.mkdirp(journalPath); await fs.promises.writeFile(docPath, document.content); return docPath; }; @@ -171,18 +172,7 @@ export class FilesClient { createFolder = async (name: string) => { const baseDir = this.settings.get("NOTES_DIR") as string; const newPath = path.join(baseDir, name); - - try { - await fs.promises.mkdir(newPath, { recursive: true }); - } catch (err) { - // If it already exists, good to go - // note: ts can't find this type: instanceof ErrnoException - if ((err as any).code === "EEXIST") { - return newPath; - } else { - throw err; - } - } + await Files.mkdirp(newPath); }; removeFolder = async (name: string) => { @@ -257,7 +247,7 @@ export class FilesClient { } } catch (err: any) { if (err.code !== "ENOENT") throw err; - await fs.promises.mkdir(directory, { recursive: true }); + await Files.mkdirp(directory); } // NOTE: Documentation suggests Windows may report ok here, but then choke diff --git a/src/preload/client/importer.ts b/src/preload/client/importer.ts index 5d22fe4..b5cf66d 100644 --- a/src/preload/client/importer.ts +++ b/src/preload/client/importer.ts @@ -11,6 +11,7 @@ import { } from "./journals"; import { IPreferencesClient } from "./preferences"; import { ISyncClient } from "./sync"; +import { SKIPPABLE_FILES, SKIPPABLE_PREFIXES } from "./types"; import * as mdast from "mdast"; @@ -26,8 +27,6 @@ import { FilesImportResolver } from "./importer/FilesImportResolver"; import { SourceType } from "./importer/SourceType"; import { parseTitleAndFrontMatter } from "./importer/frontmatter"; -export const SKIPPABLE_FILES = new Set(".DS_Store"); - // UUID in Notion notes look like 32 character hex strings; make this somewhat more lenient const hexIdRegex = /\b[0-9a-f]{16,}\b/; @@ -167,20 +166,21 @@ export class ImporterClient { for await (const file of Files.walk( importDir, - // todo: Skip some directories (e.g. .git, .vscode, etc.) - (filestats) => { - // Skip directories, symbolic links, etc. - if (!filestats.stats.isFile()) return false; - - const name = path.basename(filestats.path); + 30, // avoid infinite loops, random guess at reasoable depth + (dirent) => { // Skip hidden files and directories - if (name.startsWith(".")) return false; - if (SKIPPABLE_FILES.has(name)) return false; + if (dirent.name.startsWith(".")) return false; + if (SKIPPABLE_FILES.has(dirent.name)) return false; + + // Skip prefixes including _, unless its _attachments + if (dirent.name === "_attachments") return true; + for (const prefix of SKIPPABLE_PREFIXES) { + if (dirent.name.startsWith(prefix)) return false; + } return true; }, - {}, )) { if (file.path.endsWith(".md")) { await this.stageNote( diff --git a/src/preload/client/importer/FilesImportResolver.ts b/src/preload/client/importer/FilesImportResolver.ts index 843c82d..bfccc83 100644 --- a/src/preload/client/importer/FilesImportResolver.ts +++ b/src/preload/client/importer/FilesImportResolver.ts @@ -4,7 +4,7 @@ import mdast from "mdast"; import path from "path"; import { uuidv7obj } from "uuidv7"; import { isNoteLink } from "../../../markdown"; -import { PathStatsFile } from "../../files"; +import { Files, PathStatsFile } from "../../files"; import { IFilesClient } from "../files"; const ATTACHMENTS_DIR = "_attachments"; @@ -226,7 +226,7 @@ export class FilesImportResolver { }); const attachmentsDir = path.join(chroniclesRoot, ATTACHMENTS_DIR); - await fs.promises.mkdir(attachmentsDir, { recursive: true }); + await Files.mkdirp(attachmentsDir); for await (const file of files) { const { sourcePathResolved, extension, chroniclesId } = file; diff --git a/src/preload/client/sync.ts b/src/preload/client/sync.ts index 2462390..d50dff9 100644 --- a/src/preload/client/sync.ts +++ b/src/preload/client/sync.ts @@ -1,4 +1,5 @@ import { Database } from "better-sqlite3"; +import fs from "fs"; import { Knex } from "knex"; import path from "path"; import { UUID } from "uuidv7"; @@ -7,11 +8,31 @@ import { IDocumentsClient } from "./documents"; import { IFilesClient } from "./files"; import { IJournalsClient } from "./journals"; import { IPreferencesClient } from "./preferences"; -import { GetDocumentResponse } from "./types"; +import { + GetDocumentResponse, + SKIPPABLE_FILES, + SKIPPABLE_PREFIXES, +} from "./types"; export type ISyncClient = SyncClient; -const SKIPPABLE_FILES = new Set(".DS_Store"); +// Indicates which files to index when syncing +const shouldIndex = (dirent: fs.Dirent) => { + for (const prefix of SKIPPABLE_PREFIXES) { + if (dirent.name.startsWith(prefix)) return false; + } + + if (SKIPPABLE_FILES.has(dirent.name)) return false; + + if (dirent.isFile()) { + // for files, only index markdown files, unlike importer + // which will import markdown and other files (if referenced) + return dirent.name.endsWith(".md"); + } else { + // at this point assume its a directory that likely has markdown files + return true; + } +}; export class SyncClient { constructor( @@ -80,31 +101,8 @@ updatedAt: ${document.updatedAt} let syncedCount = 0; - for await (const file of Files.walk(rootDir, () => true, { - // depth: dont go into subdirectories - depthLimit: 1, - })) { - // For some reason it yields the root folder first, what is the point of that shrug - if (file.path == rootDir) continue; - - const { ext, name, dir } = path.parse(file.path); - if (name.startsWith(".")) continue; - if (SKIPPABLE_FILES.has(name)) continue; - - if (file.stats.isDirectory()) { - const dirname = name; - if (dirname === "_attachments") { - continue; - } - - // Defer creating journals until we find a markdown file - // in the directory - continue; - } - - // Only process markdown files - if (ext !== ".md") continue; - + for await (const file of Files.walk(rootDir, 1, shouldIndex)) { + const { name, dir } = path.parse(file.path); // filename is id; ensure it is formatted as a uuidv7 const documentId = name; @@ -124,11 +122,6 @@ updatedAt: ${document.updatedAt} // NOTE: This directory check only works because we limit depth to 1 const dirname = path.basename(dir); - // _attachments is for images (etc), not notes - if (dirname === "_attachments") { - continue; - } - // Once we find at least one markdown file, we treat this directory // as a journal if (!(dirname in journals)) { diff --git a/src/preload/client/types.ts b/src/preload/client/types.ts index 5281ba8..bc3f15e 100644 --- a/src/preload/client/types.ts +++ b/src/preload/client/types.ts @@ -127,3 +127,16 @@ export interface SaveRequest { createdAt?: string; updatedAt?: string; } + +// Nobody would put node_modules in their note directory... right? +// todo: Make this configurable +export const SKIPPABLE_FILES = new Set([ + "node_modules", + "dist", + "build", + "out", +]); + +// Skip hidden folders and files, especially .git, .DS_Store, .Thumbs.db, etc +// NOTE: This also skips _attachments, so add exclusion in importer routine +export const SKIPPABLE_PREFIXES = new Set([".", "_", "*", "~"]); diff --git a/src/preload/files.ts b/src/preload/files.ts index a8f72e5..e53e4c2 100644 --- a/src/preload/files.ts +++ b/src/preload/files.ts @@ -1,8 +1,6 @@ import fs, { Stats } from "fs"; -import mkdirp from "mkdirp"; import path from "path"; import { NotFoundError, ValidationError } from "./errors"; -import walk = require("klaw"); const { readFile, writeFile, access, stat } = fs.promises; const readFileStr = (path: string) => readFile(path, "utf8"); @@ -13,7 +11,7 @@ export interface PathStatsFile { stats: Stats; } -type ShouldIndex = (file: PathStatsFile) => boolean; +type ShouldIndex = (file: fs.Dirent) => boolean; // for matching exact (ex: 2020-05-01) const reg = /^\d{4}-\d{2}-\d{2}$/; @@ -40,6 +38,26 @@ export class Files { ); } + static async mkdirp(dir: string) { + try { + await fs.promises.mkdir(dir, { recursive: true }); + } catch (err) { + // note: ts can't find this type: instanceof ErrnoException + if ((err as any).code === "EEXIST") { + // confirm it's a directory + const stats = await fs.promises.stat(dir); + if (!stats.isDirectory()) { + throw new Error(`[Files.mkdirp] ${dir} already exists as a file`); + } + + // already exists, good to go + return dir; + } else { + throw err; + } + } + } + static async read(fp: string) { try { return await readFileStr(fp); @@ -70,7 +88,7 @@ export class Files { const fp = Files.pathForEntry(journalPath, date); const dir = path.parse(fp).dir; - await mkdirp(dir); + await Files.mkdirp(dir); return await writeFile(fp, contents); } @@ -102,27 +120,38 @@ export class Files { } /** + * Walk directory, for index and sync routines + * @param dir - Where to start walking + * @param depthLimit - A limit on how deep to walk + * @param shouldIndex - A function that determines whether to index a file / directory * - * @param directory - The folder to walk - * @param shouldIndex - A function that determines whether to index a file - * @param opts - Klaw options https://github.com/jprichardson/node-klaw - * - * todo: If bored, implement a more efficient and easier to work with API: - * - Implement walk with w/ node APIs - * - Filter on filename -- avoid non-journal directories and calling fs.stat needlessly + * usage: + * ``` + * for await (const file of Files.walk2(rootDir, 1, shouldIndex)) { ... } + * ``` */ static async *walk( - directory: string, + dir: string, + depthLimit = Infinity, shouldIndex: ShouldIndex, - opts: walk.Options = {}, - ) { - // todo: statistics - const walking = walk(directory, opts); - - // NOTE: Docs say walk is lexicographical but if I log out statements, its not walking in order - for await (const entry of walking) { - if (shouldIndex(entry)) { - yield entry as PathStatsFile; + currentDepth = 0, + ): AsyncGenerator { + if (currentDepth > depthLimit) return; + + const dirHandle = await fs.promises.opendir(dir); + for await (const entry of dirHandle) { + const fullPath = path.join(dir, entry.name); + + // Skip hidden files/directories or other excluded names + if (entry.isSymbolicLink()) continue; // Skip symlinks entirely + if (!shouldIndex(entry)) continue; + + if (entry.isDirectory()) { + // we don't yield directories, just contents + yield* Files.walk(fullPath, depthLimit, shouldIndex, currentDepth + 1); + } else { + const stats = await fs.promises.lstat(fullPath); // Use lstat to check for symlinks + yield { path: fullPath, stats }; // Yield file path and stats } } } @@ -144,7 +173,7 @@ export class Files { } } catch (err: any) { if (err.code !== "ENOENT") throw err; - await mkdirp(directory); + await Files.mkdirp(directory); } // NOTE: Documentation suggests Windows may report ok here, but then choke diff --git a/src/views/preferences/index.tsx b/src/views/preferences/index.tsx index f5b8c84..a4a4763 100644 --- a/src/views/preferences/index.tsx +++ b/src/views/preferences/index.tsx @@ -14,6 +14,10 @@ import useClient from "../../hooks/useClient"; import { useJournals } from "../../hooks/useJournals"; import { SourceType } from "../../preload/client/importer/SourceType"; import { Preferences } from "../../preload/client/preferences"; +import { + SKIPPABLE_FILES, + SKIPPABLE_PREFIXES, +} from "../../preload/client/types"; import Titlebar from "../../titlebar/macos"; import * as Base from "../layout"; @@ -140,6 +144,21 @@ const Preferences = observer(() => {

Import markdown directory

Import a directory of markdown files. Experimental.

+

The following file / directory names will be skipped:

+ +

+ Other than _attachments, the following prefixes will cause a file or + directory to be skipped: +

+