From 42ef2fc5dab1766ad97ae9049e26650af8030f72 Mon Sep 17 00:00:00 2001 From: Christopher Loverich <1010084+cloverich@users.noreply.github.com> Date: Mon, 9 Dec 2024 10:55:59 -0800 Subject: [PATCH 1/4] refactor directory traversal - refactor: drop klaw and replace with nodes opendir in Files.walk - skip descending into non-syncable directories and add many more excluded folders and prefixes --- package.json | 2 -- src/preload/client/importer.ts | 23 ++++++------ src/preload/client/sync.ts | 66 +++++++++++++++++----------------- src/preload/files.ts | 48 +++++++++++++++---------- yarn.lock | 16 +-------- 5 files changed, 75 insertions(+), 80 deletions(-) diff --git a/package.json b/package.json index 8a0d7bc..bd55c01 100644 --- a/package.json +++ b/package.json @@ -41,7 +41,6 @@ "@radix-ui/react-visually-hidden": "^1.1.0", "@types/better-sqlite3": "^5.4.0", "@types/chai": "^4.3.11", - "@types/klaw": "^3.0.1", "@types/luxon": "^2.4.0", "@types/mkdirp": "^1.0.1", "@types/mocha": "^7.0.2", @@ -56,7 +55,6 @@ "electron": "^28.2.0", "esbuild": "^0.20.0", "evergreen-ui": "^7.1.9", - "klaw": "^3.0.0", "lodash": "^4.17.21", "lucide-react": "^0.314.0", "luxon": "^2.4.0", diff --git a/src/preload/client/importer.ts b/src/preload/client/importer.ts index 5d22fe4..aadf995 100644 --- a/src/preload/client/importer.ts +++ b/src/preload/client/importer.ts @@ -10,7 +10,7 @@ import { validateJournalName, } from "./journals"; import { IPreferencesClient } from "./preferences"; -import { ISyncClient } from "./sync"; +import { ISyncClient, SKIPPABLE_FILES, SKIPPABLE_PREFIXES } from "./sync"; import * as mdast from "mdast"; @@ -26,8 +26,6 @@ import { FilesImportResolver } from "./importer/FilesImportResolver"; import { SourceType } from "./importer/SourceType"; import { parseTitleAndFrontMatter } from "./importer/frontmatter"; -export const SKIPPABLE_FILES = new Set(".DS_Store"); - // UUID in Notion notes look like 32 character hex strings; make this somewhat more lenient const hexIdRegex = /\b[0-9a-f]{16,}\b/; @@ -167,20 +165,21 @@ export class ImporterClient { for await (const file of Files.walk( importDir, - // todo: Skip some directories (e.g. .git, .vscode, etc.) - (filestats) => { - // Skip directories, symbolic links, etc. - if (!filestats.stats.isFile()) return false; - - const name = path.basename(filestats.path); + 30, // avoid infinite loops, random guess at reasoable depth + (dirent) => { // Skip hidden files and directories - if (name.startsWith(".")) return false; - if (SKIPPABLE_FILES.has(name)) return false; + if (dirent.name.startsWith(".")) return false; + if (SKIPPABLE_FILES.has(dirent.name)) return false; + + // Skip prefixes including _, unless its _attachments + if (dirent.name === "_attachments") return true; + for (const prefix of SKIPPABLE_PREFIXES) { + if (dirent.name.startsWith(prefix)) return false; + } return true; }, - {}, )) { if (file.path.endsWith(".md")) { await this.stageNote( diff --git a/src/preload/client/sync.ts b/src/preload/client/sync.ts index 2462390..eebc9b1 100644 --- a/src/preload/client/sync.ts +++ b/src/preload/client/sync.ts @@ -1,4 +1,5 @@ import { Database } from "better-sqlite3"; +import fs from "fs"; import { Knex } from "knex"; import path from "path"; import { UUID } from "uuidv7"; @@ -11,7 +12,36 @@ import { GetDocumentResponse } from "./types"; export type ISyncClient = SyncClient; -const SKIPPABLE_FILES = new Set(".DS_Store"); +// Nobody would put node_modules in their note directory... right? +// todo: Make this configurable +export const SKIPPABLE_FILES = new Set([ + "node_modules", + "dist", + "build", + "out", +]); + +// Skip hidden folders and files, especially .git, .DS_Store, .Thumbs.db, etc +// NOTE: This also skips _attachments, so add exclusion in importer routine +export const SKIPPABLE_PREFIXES = new Set([".", "_", "*", "~"]); + +// Indicates which files to index when syncing +const shouldIndex = (dirent: fs.Dirent) => { + for (const prefix of SKIPPABLE_PREFIXES) { + if (dirent.name.startsWith(prefix)) return false; + } + + if (SKIPPABLE_FILES.has(dirent.name)) return false; + + if (dirent.isFile()) { + // for files, only index markdown files, unlike importer + // which will import markdown and other files (if referenced) + return dirent.name.endsWith(".md"); + } else { + // at this point assume its a directory that likely has markdown files + return true; + } +}; export class SyncClient { constructor( @@ -40,7 +70,7 @@ updatedAt: ${document.updatedAt} /** * Sync the notes directory with the database */ - sync = async (force = false) => { + sync = async (force = true) => { // Skip sync if completed recently; not much thought put into this const lastSync = await this.knex("sync").orderBy("id", "desc").first(); if (lastSync?.completedAt && !force) { @@ -80,31 +110,8 @@ updatedAt: ${document.updatedAt} let syncedCount = 0; - for await (const file of Files.walk(rootDir, () => true, { - // depth: dont go into subdirectories - depthLimit: 1, - })) { - // For some reason it yields the root folder first, what is the point of that shrug - if (file.path == rootDir) continue; - - const { ext, name, dir } = path.parse(file.path); - if (name.startsWith(".")) continue; - if (SKIPPABLE_FILES.has(name)) continue; - - if (file.stats.isDirectory()) { - const dirname = name; - if (dirname === "_attachments") { - continue; - } - - // Defer creating journals until we find a markdown file - // in the directory - continue; - } - - // Only process markdown files - if (ext !== ".md") continue; - + for await (const file of Files.walk(rootDir, 1, shouldIndex)) { + const { name, dir } = path.parse(file.path); // filename is id; ensure it is formatted as a uuidv7 const documentId = name; @@ -124,11 +131,6 @@ updatedAt: ${document.updatedAt} // NOTE: This directory check only works because we limit depth to 1 const dirname = path.basename(dir); - // _attachments is for images (etc), not notes - if (dirname === "_attachments") { - continue; - } - // Once we find at least one markdown file, we treat this directory // as a journal if (!(dirname in journals)) { diff --git a/src/preload/files.ts b/src/preload/files.ts index a8f72e5..1695e90 100644 --- a/src/preload/files.ts +++ b/src/preload/files.ts @@ -2,7 +2,6 @@ import fs, { Stats } from "fs"; import mkdirp from "mkdirp"; import path from "path"; import { NotFoundError, ValidationError } from "./errors"; -import walk = require("klaw"); const { readFile, writeFile, access, stat } = fs.promises; const readFileStr = (path: string) => readFile(path, "utf8"); @@ -13,7 +12,7 @@ export interface PathStatsFile { stats: Stats; } -type ShouldIndex = (file: PathStatsFile) => boolean; +type ShouldIndex = (file: fs.Dirent) => boolean; // for matching exact (ex: 2020-05-01) const reg = /^\d{4}-\d{2}-\d{2}$/; @@ -102,27 +101,38 @@ export class Files { } /** + * Walk directory, for index and sync routines + * @param dir - Where to start walking + * @param depthLimit - A limit on how deep to walk + * @param shouldIndex - A function that determines whether to index a file / directory * - * @param directory - The folder to walk - * @param shouldIndex - A function that determines whether to index a file - * @param opts - Klaw options https://github.com/jprichardson/node-klaw - * - * todo: If bored, implement a more efficient and easier to work with API: - * - Implement walk with w/ node APIs - * - Filter on filename -- avoid non-journal directories and calling fs.stat needlessly + * usage: + * ``` + * for await (const file of Files.walk2(rootDir, 1, shouldIndex)) { ... } + * ``` */ static async *walk( - directory: string, + dir: string, + depthLimit = Infinity, shouldIndex: ShouldIndex, - opts: walk.Options = {}, - ) { - // todo: statistics - const walking = walk(directory, opts); - - // NOTE: Docs say walk is lexicographical but if I log out statements, its not walking in order - for await (const entry of walking) { - if (shouldIndex(entry)) { - yield entry as PathStatsFile; + currentDepth = 0, + ): AsyncGenerator { + if (currentDepth > depthLimit) return; + + const dirHandle = await fs.promises.opendir(dir); + for await (const entry of dirHandle) { + const fullPath = path.join(dir, entry.name); + + // Skip hidden files/directories or other excluded names + if (entry.isSymbolicLink()) continue; // Skip symlinks entirely + if (!shouldIndex(entry)) continue; + + if (entry.isDirectory()) { + // we don't yield directories, just contents + yield* Files.walk(fullPath, depthLimit, shouldIndex, currentDepth + 1); + } else { + const stats = await fs.promises.lstat(fullPath); // Use lstat to check for symlinks + yield { path: fullPath, stats }; // Yield file path and stats } } } diff --git a/yarn.lock b/yarn.lock index 1e9de7e..10618bb 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1321,13 +1321,6 @@ dependencies: "@types/node" "*" -"@types/klaw@^3.0.1": - version "3.0.1" - resolved "https://registry.yarnpkg.com/@types/klaw/-/klaw-3.0.1.tgz#29f90021c0234976aa4eb97efced9cb6db9fa8b3" - integrity sha512-acnF3n9mYOr1aFJKFyvfNX0am9EtPUsYPq22QUCGdJE+MVt6UyAN1jwo+PmOPqXD4K7ZS9MtxDEp/un0lxFccA== - dependencies: - "@types/node" "*" - "@types/lodash@^4.14.200": version "4.14.202" resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.14.202.tgz#f09dbd2fb082d507178b2f2a5c7e74bd72ff98f8" @@ -3232,7 +3225,7 @@ graceful-fs@^4.1.2: resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.11.tgz#4183e4e8bf08bb6e05bbb2f7d2e0c8f712ca40e3" integrity sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ== -graceful-fs@^4.1.6, graceful-fs@^4.1.9, graceful-fs@^4.2.0: +graceful-fs@^4.1.6, graceful-fs@^4.2.0: version "4.2.4" resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.4.tgz#2256bde14d3632958c465ebc96dc467ca07a29fb" integrity sha512-WjKPNJF79dtJAVniUlGGWHYGz2jWxT6VhN/4m1NdkbZ2nOsEF+cI1Edgql5zCRhs/VsQYRvrXctxktVXZUkixw== @@ -3665,13 +3658,6 @@ keyv@^4.0.0: dependencies: json-buffer "3.0.1" -klaw@^3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/klaw/-/klaw-3.0.0.tgz#b11bec9cf2492f06756d6e809ab73a2910259146" - integrity sha512-0Fo5oir+O9jnXu5EefYbVK+mHMBeEVEy2cmctR1O1NECcCkPRreJKrS6Qt/j3KC2C148Dfo9i3pCmCMsdqGr0g== - dependencies: - graceful-fs "^4.1.9" - knex@^2.5.0: version "2.5.1" resolved "https://registry.yarnpkg.com/knex/-/knex-2.5.1.tgz#a6c6b449866cf4229f070c17411f23871ba52ef9" From 52fa08c0d9f6fd088c8dd1872448be3dada42ed2 Mon Sep 17 00:00:00 2001 From: Christopher Loverich <1010084+cloverich@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:00:43 -0800 Subject: [PATCH 2/4] list skipped items in preferences --- src/preload/client/importer.ts | 3 ++- src/preload/client/sync.ts | 21 ++++++--------------- src/preload/client/types.ts | 13 +++++++++++++ src/views/preferences/index.tsx | 19 +++++++++++++++++++ 4 files changed, 40 insertions(+), 16 deletions(-) diff --git a/src/preload/client/importer.ts b/src/preload/client/importer.ts index aadf995..b5cf66d 100644 --- a/src/preload/client/importer.ts +++ b/src/preload/client/importer.ts @@ -10,7 +10,8 @@ import { validateJournalName, } from "./journals"; import { IPreferencesClient } from "./preferences"; -import { ISyncClient, SKIPPABLE_FILES, SKIPPABLE_PREFIXES } from "./sync"; +import { ISyncClient } from "./sync"; +import { SKIPPABLE_FILES, SKIPPABLE_PREFIXES } from "./types"; import * as mdast from "mdast"; diff --git a/src/preload/client/sync.ts b/src/preload/client/sync.ts index eebc9b1..d50dff9 100644 --- a/src/preload/client/sync.ts +++ b/src/preload/client/sync.ts @@ -8,23 +8,14 @@ import { IDocumentsClient } from "./documents"; import { IFilesClient } from "./files"; import { IJournalsClient } from "./journals"; import { IPreferencesClient } from "./preferences"; -import { GetDocumentResponse } from "./types"; +import { + GetDocumentResponse, + SKIPPABLE_FILES, + SKIPPABLE_PREFIXES, +} from "./types"; export type ISyncClient = SyncClient; -// Nobody would put node_modules in their note directory... right? -// todo: Make this configurable -export const SKIPPABLE_FILES = new Set([ - "node_modules", - "dist", - "build", - "out", -]); - -// Skip hidden folders and files, especially .git, .DS_Store, .Thumbs.db, etc -// NOTE: This also skips _attachments, so add exclusion in importer routine -export const SKIPPABLE_PREFIXES = new Set([".", "_", "*", "~"]); - // Indicates which files to index when syncing const shouldIndex = (dirent: fs.Dirent) => { for (const prefix of SKIPPABLE_PREFIXES) { @@ -70,7 +61,7 @@ updatedAt: ${document.updatedAt} /** * Sync the notes directory with the database */ - sync = async (force = true) => { + sync = async (force = false) => { // Skip sync if completed recently; not much thought put into this const lastSync = await this.knex("sync").orderBy("id", "desc").first(); if (lastSync?.completedAt && !force) { diff --git a/src/preload/client/types.ts b/src/preload/client/types.ts index 5281ba8..20300bc 100644 --- a/src/preload/client/types.ts +++ b/src/preload/client/types.ts @@ -127,3 +127,16 @@ export interface SaveRequest { createdAt?: string; updatedAt?: string; } +// Nobody would put node_modules in their note directory... right? +// todo: Make this configurable + +export const SKIPPABLE_FILES = new Set([ + "node_modules", + "dist", + "build", + "out", +]); +// Skip hidden folders and files, especially .git, .DS_Store, .Thumbs.db, etc +// NOTE: This also skips _attachments, so add exclusion in importer routine + +export const SKIPPABLE_PREFIXES = new Set([".", "_", "*", "~"]); diff --git a/src/views/preferences/index.tsx b/src/views/preferences/index.tsx index f5b8c84..a4a4763 100644 --- a/src/views/preferences/index.tsx +++ b/src/views/preferences/index.tsx @@ -14,6 +14,10 @@ import useClient from "../../hooks/useClient"; import { useJournals } from "../../hooks/useJournals"; import { SourceType } from "../../preload/client/importer/SourceType"; import { Preferences } from "../../preload/client/preferences"; +import { + SKIPPABLE_FILES, + SKIPPABLE_PREFIXES, +} from "../../preload/client/types"; import Titlebar from "../../titlebar/macos"; import * as Base from "../layout"; @@ -140,6 +144,21 @@ const Preferences = observer(() => {

Import markdown directory

Import a directory of markdown files. Experimental.

+

The following file / directory names will be skipped:

+ +

+ Other than _attachments, the following prefixes will cause a file or + directory to be skipped: +

+