Skip to content

Commit

Permalink
When syncing ignore more files and directories, refactor walk (#281)
Browse files Browse the repository at this point in the history
- refactor: drop klaw, replace with nodes fs.opendir
- do not descend into directories we don't intend to sync (hidden folders like .git, node_modules, _attachments in sync specifically)
- Add more ignored files / folders and indicate which are skipped in preferences (todo: make configurable)
- refactor: drop mkdirp (after discovering it mostly exists in fs now)
  • Loading branch information
cloverich authored Dec 9, 2024
1 parent 344bf13 commit f494a5d
Show file tree
Hide file tree
Showing 11 changed files with 132 additions and 103 deletions.
3 changes: 0 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
"better-sqlite3": "^9.2.2",
"electron-store": "^8.0.1",
"knex": "^2.5.0",
"mkdirp": "^1.0.4",
"uuidv7": "^0.6.3"
},
"devDependencies": {
Expand All @@ -41,7 +40,6 @@
"@radix-ui/react-visually-hidden": "^1.1.0",
"@types/better-sqlite3": "^5.4.0",
"@types/chai": "^4.3.11",
"@types/klaw": "^3.0.1",
"@types/luxon": "^2.4.0",
"@types/mkdirp": "^1.0.1",
"@types/mocha": "^7.0.2",
Expand All @@ -56,7 +54,6 @@
"electron": "^28.2.0",
"esbuild": "^0.20.0",
"evergreen-ui": "^7.1.9",
"klaw": "^3.0.0",
"lodash": "^4.17.21",
"lucide-react": "^0.314.0",
"luxon": "^2.4.0",
Expand Down
9 changes: 6 additions & 3 deletions src/electron/ensureDir.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
const fs = require("fs");
const mkdirp = require("mkdirp");

/**
* Borrowed from api files, since its typescript and this is not
* Reconcile that later
*/
exports.ensureDir = function ensureDir(directory) {
exports.ensureDir = function ensureDir(directory, create = true) {
if (!directory) {
throw new Error("ensureDir called with no directory path");
}
Expand All @@ -19,7 +18,11 @@ exports.ensureDir = function ensureDir(directory) {
}
} catch (err) {
if (err.code !== "ENOENT") throw err;
mkdirp.sync(directory);
try {
fs.mkdirSync(directory, { recursive: true });
} catch (err) {
if (err.code !== "EEXIST") throw err;
}
}

// NOTE: Documentation suggests Windows may report ok here, but then choke
Expand Down
1 change: 0 additions & 1 deletion src/electron/userFilesInit.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
const path = require("path");
const fs = require("fs");
const mkdirp = require("mkdirp");
const settings = require("./settings");
const { ensureDir } = require("./ensureDir");

Expand Down
18 changes: 4 additions & 14 deletions src/preload/client/files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import Store from "electron-store";
import fs from "fs";
import path from "path";
import { uuidv7obj } from "uuidv7";
import { Files } from "../files";
const { readFile, writeFile, access, stat } = fs.promises;

interface UploadResponse {
Expand Down Expand Up @@ -149,7 +150,7 @@ export class FilesClient {
document.id,
);

await fs.promises.mkdir(journalPath, { recursive: true });
await Files.mkdirp(journalPath);
await fs.promises.writeFile(docPath, document.content);
return docPath;
};
Expand All @@ -171,18 +172,7 @@ export class FilesClient {
createFolder = async (name: string) => {
const baseDir = this.settings.get("NOTES_DIR") as string;
const newPath = path.join(baseDir, name);

try {
await fs.promises.mkdir(newPath, { recursive: true });
} catch (err) {
// If it already exists, good to go
// note: ts can't find this type: instanceof ErrnoException
if ((err as any).code === "EEXIST") {
return newPath;
} else {
throw err;
}
}
await Files.mkdirp(newPath);
};

removeFolder = async (name: string) => {
Expand Down Expand Up @@ -257,7 +247,7 @@ export class FilesClient {
}
} catch (err: any) {
if (err.code !== "ENOENT") throw err;
await fs.promises.mkdir(directory, { recursive: true });
await Files.mkdirp(directory);
}

// NOTE: Documentation suggests Windows may report ok here, but then choke
Expand Down
22 changes: 11 additions & 11 deletions src/preload/client/importer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
} from "./journals";
import { IPreferencesClient } from "./preferences";
import { ISyncClient } from "./sync";
import { SKIPPABLE_FILES, SKIPPABLE_PREFIXES } from "./types";

import * as mdast from "mdast";

Expand All @@ -26,8 +27,6 @@ import { FilesImportResolver } from "./importer/FilesImportResolver";
import { SourceType } from "./importer/SourceType";
import { parseTitleAndFrontMatter } from "./importer/frontmatter";

export const SKIPPABLE_FILES = new Set(".DS_Store");

// UUID in Notion notes look like 32 character hex strings; make this somewhat more lenient
const hexIdRegex = /\b[0-9a-f]{16,}\b/;

Expand Down Expand Up @@ -167,20 +166,21 @@ export class ImporterClient {

for await (const file of Files.walk(
importDir,
// todo: Skip some directories (e.g. .git, .vscode, etc.)
(filestats) => {
// Skip directories, symbolic links, etc.
if (!filestats.stats.isFile()) return false;

const name = path.basename(filestats.path);
30, // avoid infinite loops, random guess at reasoable depth

(dirent) => {
// Skip hidden files and directories
if (name.startsWith(".")) return false;
if (SKIPPABLE_FILES.has(name)) return false;
if (dirent.name.startsWith(".")) return false;
if (SKIPPABLE_FILES.has(dirent.name)) return false;

// Skip prefixes including _, unless its _attachments
if (dirent.name === "_attachments") return true;
for (const prefix of SKIPPABLE_PREFIXES) {
if (dirent.name.startsWith(prefix)) return false;
}

return true;
},
{},
)) {
if (file.path.endsWith(".md")) {
await this.stageNote(
Expand Down
4 changes: 2 additions & 2 deletions src/preload/client/importer/FilesImportResolver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import mdast from "mdast";
import path from "path";
import { uuidv7obj } from "uuidv7";
import { isNoteLink } from "../../../markdown";
import { PathStatsFile } from "../../files";
import { Files, PathStatsFile } from "../../files";
import { IFilesClient } from "../files";

const ATTACHMENTS_DIR = "_attachments";
Expand Down Expand Up @@ -226,7 +226,7 @@ export class FilesImportResolver {
});

const attachmentsDir = path.join(chroniclesRoot, ATTACHMENTS_DIR);
await fs.promises.mkdir(attachmentsDir, { recursive: true });
await Files.mkdirp(attachmentsDir);

for await (const file of files) {
const { sourcePathResolved, extension, chroniclesId } = file;
Expand Down
57 changes: 25 additions & 32 deletions src/preload/client/sync.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Database } from "better-sqlite3";
import fs from "fs";
import { Knex } from "knex";
import path from "path";
import { UUID } from "uuidv7";
Expand All @@ -7,11 +8,31 @@ import { IDocumentsClient } from "./documents";
import { IFilesClient } from "./files";
import { IJournalsClient } from "./journals";
import { IPreferencesClient } from "./preferences";
import { GetDocumentResponse } from "./types";
import {
GetDocumentResponse,
SKIPPABLE_FILES,
SKIPPABLE_PREFIXES,
} from "./types";

export type ISyncClient = SyncClient;

const SKIPPABLE_FILES = new Set(".DS_Store");
// Indicates which files to index when syncing
const shouldIndex = (dirent: fs.Dirent) => {
for (const prefix of SKIPPABLE_PREFIXES) {
if (dirent.name.startsWith(prefix)) return false;
}

if (SKIPPABLE_FILES.has(dirent.name)) return false;

if (dirent.isFile()) {
// for files, only index markdown files, unlike importer
// which will import markdown and other files (if referenced)
return dirent.name.endsWith(".md");
} else {
// at this point assume its a directory that likely has markdown files
return true;
}
};

export class SyncClient {
constructor(
Expand Down Expand Up @@ -80,31 +101,8 @@ updatedAt: ${document.updatedAt}

let syncedCount = 0;

for await (const file of Files.walk(rootDir, () => true, {
// depth: dont go into subdirectories
depthLimit: 1,
})) {
// For some reason it yields the root folder first, what is the point of that shrug
if (file.path == rootDir) continue;

const { ext, name, dir } = path.parse(file.path);
if (name.startsWith(".")) continue;
if (SKIPPABLE_FILES.has(name)) continue;

if (file.stats.isDirectory()) {
const dirname = name;
if (dirname === "_attachments") {
continue;
}

// Defer creating journals until we find a markdown file
// in the directory
continue;
}

// Only process markdown files
if (ext !== ".md") continue;

for await (const file of Files.walk(rootDir, 1, shouldIndex)) {
const { name, dir } = path.parse(file.path);
// filename is id; ensure it is formatted as a uuidv7
const documentId = name;

Expand All @@ -124,11 +122,6 @@ updatedAt: ${document.updatedAt}
// NOTE: This directory check only works because we limit depth to 1
const dirname = path.basename(dir);

// _attachments is for images (etc), not notes
if (dirname === "_attachments") {
continue;
}

// Once we find at least one markdown file, we treat this directory
// as a journal
if (!(dirname in journals)) {
Expand Down
13 changes: 13 additions & 0 deletions src/preload/client/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,16 @@ export interface SaveRequest {
createdAt?: string;
updatedAt?: string;
}

// Nobody would put node_modules in their note directory... right?
// todo: Make this configurable
export const SKIPPABLE_FILES = new Set([
"node_modules",
"dist",
"build",
"out",
]);

// Skip hidden folders and files, especially .git, .DS_Store, .Thumbs.db, etc
// NOTE: This also skips _attachments, so add exclusion in importer routine
export const SKIPPABLE_PREFIXES = new Set([".", "_", "*", "~"]);
73 changes: 51 additions & 22 deletions src/preload/files.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import fs, { Stats } from "fs";
import mkdirp from "mkdirp";
import path from "path";
import { NotFoundError, ValidationError } from "./errors";
import walk = require("klaw");
const { readFile, writeFile, access, stat } = fs.promises;
const readFileStr = (path: string) => readFile(path, "utf8");

Expand All @@ -13,7 +11,7 @@ export interface PathStatsFile {
stats: Stats;
}

type ShouldIndex = (file: PathStatsFile) => boolean;
type ShouldIndex = (file: fs.Dirent) => boolean;

// for matching exact (ex: 2020-05-01)
const reg = /^\d{4}-\d{2}-\d{2}$/;
Expand All @@ -40,6 +38,26 @@ export class Files {
);
}

static async mkdirp(dir: string) {
try {
await fs.promises.mkdir(dir, { recursive: true });
} catch (err) {
// note: ts can't find this type: instanceof ErrnoException
if ((err as any).code === "EEXIST") {
// confirm it's a directory
const stats = await fs.promises.stat(dir);
if (!stats.isDirectory()) {
throw new Error(`[Files.mkdirp] ${dir} already exists as a file`);
}

// already exists, good to go
return dir;
} else {
throw err;
}
}
}

static async read(fp: string) {
try {
return await readFileStr(fp);
Expand Down Expand Up @@ -70,7 +88,7 @@ export class Files {
const fp = Files.pathForEntry(journalPath, date);
const dir = path.parse(fp).dir;

await mkdirp(dir);
await Files.mkdirp(dir);
return await writeFile(fp, contents);
}

Expand Down Expand Up @@ -102,27 +120,38 @@ export class Files {
}

/**
* Walk directory, for index and sync routines
* @param dir - Where to start walking
* @param depthLimit - A limit on how deep to walk
* @param shouldIndex - A function that determines whether to index a file / directory
*
* @param directory - The folder to walk
* @param shouldIndex - A function that determines whether to index a file
* @param opts - Klaw options https://github.com/jprichardson/node-klaw
*
* todo: If bored, implement a more efficient and easier to work with API:
* - Implement walk with w/ node APIs
* - Filter on filename -- avoid non-journal directories and calling fs.stat needlessly
* usage:
* ```
* for await (const file of Files.walk2(rootDir, 1, shouldIndex)) { ... }
* ```
*/
static async *walk(
directory: string,
dir: string,
depthLimit = Infinity,
shouldIndex: ShouldIndex,
opts: walk.Options = {},
) {
// todo: statistics
const walking = walk(directory, opts);

// NOTE: Docs say walk is lexicographical but if I log out statements, its not walking in order
for await (const entry of walking) {
if (shouldIndex(entry)) {
yield entry as PathStatsFile;
currentDepth = 0,
): AsyncGenerator<PathStatsFile> {
if (currentDepth > depthLimit) return;

const dirHandle = await fs.promises.opendir(dir);
for await (const entry of dirHandle) {
const fullPath = path.join(dir, entry.name);

// Skip hidden files/directories or other excluded names
if (entry.isSymbolicLink()) continue; // Skip symlinks entirely
if (!shouldIndex(entry)) continue;

if (entry.isDirectory()) {
// we don't yield directories, just contents
yield* Files.walk(fullPath, depthLimit, shouldIndex, currentDepth + 1);
} else {
const stats = await fs.promises.lstat(fullPath); // Use lstat to check for symlinks
yield { path: fullPath, stats }; // Yield file path and stats
}
}
}
Expand All @@ -144,7 +173,7 @@ export class Files {
}
} catch (err: any) {
if (err.code !== "ENOENT") throw err;
await mkdirp(directory);
await Files.mkdirp(directory);
}

// NOTE: Documentation suggests Windows may report ok here, but then choke
Expand Down
Loading

0 comments on commit f494a5d

Please sign in to comment.