perf: incremental rebuild (--fastRebuild v2 but default) (#1841)

* checkpoint

* incremental all the things

* properly splice changes array

* smol doc update

* update docs

* make fancy logger dumb in ci
This commit is contained in:
Jacky Zhao
2025-03-16 14:17:31 -07:00
committed by GitHub
parent a72b1a4224
commit a737207981
36 changed files with 767 additions and 1151 deletions

View File

@ -9,7 +9,7 @@ import { parseMarkdown } from "./processors/parse"
import { filterContent } from "./processors/filter"
import { emitContent } from "./processors/emit"
import cfg from "../quartz.config"
import { FilePath, FullSlug, joinSegments, slugifyFilePath } from "./util/path"
import { FilePath, joinSegments, slugifyFilePath } from "./util/path"
import chokidar from "chokidar"
import { ProcessedContent } from "./plugins/vfile"
import { Argv, BuildCtx } from "./util/ctx"
@ -17,34 +17,39 @@ import { glob, toPosixPath } from "./util/glob"
import { trace } from "./util/trace"
import { options } from "./util/sourcemap"
import { Mutex } from "async-mutex"
import DepGraph from "./depgraph"
import { getStaticResourcesFromPlugins } from "./plugins"
import { randomIdNonSecure } from "./util/random"
import { ChangeEvent } from "./plugins/types"
import { minimatch } from "minimatch"
type Dependencies = Record<string, DepGraph<FilePath> | null>
type ContentMap = Map<
FilePath,
| {
type: "markdown"
content: ProcessedContent
}
| {
type: "other"
}
>
type BuildData = {
ctx: BuildCtx
ignored: GlobbyFilterFunction
mut: Mutex
initialSlugs: FullSlug[]
// TODO merge contentMap and trackedAssets
contentMap: Map<FilePath, ProcessedContent>
trackedAssets: Set<FilePath>
toRebuild: Set<FilePath>
toRemove: Set<FilePath>
contentMap: ContentMap
changesSinceLastBuild: Record<FilePath, ChangeEvent["type"]>
lastBuildMs: number
dependencies: Dependencies
}
type FileEvent = "add" | "change" | "delete"
async function buildQuartz(argv: Argv, mut: Mutex, clientRefresh: () => void) {
const ctx: BuildCtx = {
buildId: randomIdNonSecure(),
argv,
cfg,
allSlugs: [],
allFiles: [],
incremental: false,
}
const perf = new PerfTimer()
@ -67,64 +72,70 @@ async function buildQuartz(argv: Argv, mut: Mutex, clientRefresh: () => void) {
perf.addEvent("glob")
const allFiles = await glob("**/*.*", argv.directory, cfg.configuration.ignorePatterns)
const fps = allFiles.filter((fp) => fp.endsWith(".md")).sort()
const markdownPaths = allFiles.filter((fp) => fp.endsWith(".md")).sort()
console.log(
`Found ${fps.length} input files from \`${argv.directory}\` in ${perf.timeSince("glob")}`,
`Found ${markdownPaths.length} input files from \`${argv.directory}\` in ${perf.timeSince("glob")}`,
)
const filePaths = fps.map((fp) => joinSegments(argv.directory, fp) as FilePath)
const filePaths = markdownPaths.map((fp) => joinSegments(argv.directory, fp) as FilePath)
ctx.allFiles = allFiles
ctx.allSlugs = allFiles.map((fp) => slugifyFilePath(fp as FilePath))
const parsedFiles = await parseMarkdown(ctx, filePaths)
const filteredContent = filterContent(ctx, parsedFiles)
const dependencies: Record<string, DepGraph<FilePath> | null> = {}
// Only build dependency graphs if we're doing a fast rebuild
if (argv.fastRebuild) {
const staticResources = getStaticResourcesFromPlugins(ctx)
for (const emitter of cfg.plugins.emitters) {
dependencies[emitter.name] =
(await emitter.getDependencyGraph?.(ctx, filteredContent, staticResources)) ?? null
}
}
await emitContent(ctx, filteredContent)
console.log(chalk.green(`Done processing ${fps.length} files in ${perf.timeSince()}`))
console.log(chalk.green(`Done processing ${markdownPaths.length} files in ${perf.timeSince()}`))
release()
if (argv.serve) {
return startServing(ctx, mut, parsedFiles, clientRefresh, dependencies)
if (argv.watch) {
ctx.incremental = true
return startWatching(ctx, mut, parsedFiles, clientRefresh)
}
}
// setup watcher for rebuilds
async function startServing(
async function startWatching(
ctx: BuildCtx,
mut: Mutex,
initialContent: ProcessedContent[],
clientRefresh: () => void,
dependencies: Dependencies, // emitter name: dep graph
) {
const { argv } = ctx
const { argv, allFiles } = ctx
// cache file parse results
const contentMap = new Map<FilePath, ProcessedContent>()
for (const content of initialContent) {
const [_tree, vfile] = content
contentMap.set(vfile.data.filePath!, content)
const contentMap: ContentMap = new Map()
for (const filePath of allFiles) {
contentMap.set(filePath, {
type: "other",
})
}
for (const content of initialContent) {
const [_tree, vfile] = content
contentMap.set(vfile.data.relativePath!, {
type: "markdown",
content,
})
}
const gitIgnoredMatcher = await isGitIgnored()
const buildData: BuildData = {
ctx,
mut,
dependencies,
contentMap,
ignored: await isGitIgnored(),
initialSlugs: ctx.allSlugs,
toRebuild: new Set<FilePath>(),
toRemove: new Set<FilePath>(),
trackedAssets: new Set<FilePath>(),
ignored: (path) => {
if (gitIgnoredMatcher(path)) return true
const pathStr = path.toString()
for (const pattern of cfg.configuration.ignorePatterns) {
if (minimatch(pathStr, pattern)) {
return true
}
}
return false
},
changesSinceLastBuild: {},
lastBuildMs: 0,
}
@ -134,34 +145,37 @@ async function startServing(
ignoreInitial: true,
})
const buildFromEntry = argv.fastRebuild ? partialRebuildFromEntrypoint : rebuildFromEntrypoint
const changes: ChangeEvent[] = []
watcher
.on("add", (fp) => buildFromEntry(fp as string, "add", clientRefresh, buildData))
.on("change", (fp) => buildFromEntry(fp as string, "change", clientRefresh, buildData))
.on("unlink", (fp) => buildFromEntry(fp as string, "delete", clientRefresh, buildData))
.on("add", (fp) => {
if (buildData.ignored(fp)) return
changes.push({ path: fp as FilePath, type: "add" })
void rebuild(changes, clientRefresh, buildData)
})
.on("change", (fp) => {
if (buildData.ignored(fp)) return
changes.push({ path: fp as FilePath, type: "change" })
void rebuild(changes, clientRefresh, buildData)
})
.on("unlink", (fp) => {
if (buildData.ignored(fp)) return
changes.push({ path: fp as FilePath, type: "delete" })
void rebuild(changes, clientRefresh, buildData)
})
return async () => {
await watcher.close()
}
}
async function partialRebuildFromEntrypoint(
filepath: string,
action: FileEvent,
clientRefresh: () => void,
buildData: BuildData, // note: this function mutates buildData
) {
const { ctx, ignored, dependencies, contentMap, mut, toRemove } = buildData
async function rebuild(changes: ChangeEvent[], clientRefresh: () => void, buildData: BuildData) {
const { ctx, contentMap, mut, changesSinceLastBuild } = buildData
const { argv, cfg } = ctx
// don't do anything for gitignored files
if (ignored(filepath)) {
return
}
const buildId = randomIdNonSecure()
ctx.buildId = buildId
buildData.lastBuildMs = new Date().getTime()
const numChangesInBuild = changes.length
const release = await mut.acquire()
// if there's another build after us, release and let them do it
@ -171,261 +185,105 @@ async function partialRebuildFromEntrypoint(
}
const perf = new PerfTimer()
perf.addEvent("rebuild")
console.log(chalk.yellow("Detected change, rebuilding..."))
// UPDATE DEP GRAPH
const fp = joinSegments(argv.directory, toPosixPath(filepath)) as FilePath
// update changesSinceLastBuild
for (const change of changes) {
changesSinceLastBuild[change.path] = change.type
}
const staticResources = getStaticResourcesFromPlugins(ctx)
let processedFiles: ProcessedContent[] = []
switch (action) {
case "add":
// add to cache when new file is added
processedFiles = await parseMarkdown(ctx, [fp])
processedFiles.forEach(([tree, vfile]) => contentMap.set(vfile.data.filePath!, [tree, vfile]))
// update the dep graph by asking all emitters whether they depend on this file
for (const emitter of cfg.plugins.emitters) {
const emitterGraph =
(await emitter.getDependencyGraph?.(ctx, processedFiles, staticResources)) ?? null
if (emitterGraph) {
const existingGraph = dependencies[emitter.name]
if (existingGraph !== null) {
existingGraph.mergeGraph(emitterGraph)
} else {
// might be the first time we're adding a mardown file
dependencies[emitter.name] = emitterGraph
}
}
}
break
case "change":
// invalidate cache when file is changed
processedFiles = await parseMarkdown(ctx, [fp])
processedFiles.forEach(([tree, vfile]) => contentMap.set(vfile.data.filePath!, [tree, vfile]))
// only content files can have added/removed dependencies because of transclusions
if (path.extname(fp) === ".md") {
for (const emitter of cfg.plugins.emitters) {
// get new dependencies from all emitters for this file
const emitterGraph =
(await emitter.getDependencyGraph?.(ctx, processedFiles, staticResources)) ?? null
// only update the graph if the emitter plugin uses the changed file
// eg. Assets plugin ignores md files, so we skip updating the graph
if (emitterGraph?.hasNode(fp)) {
// merge the new dependencies into the dep graph
dependencies[emitter.name]?.updateIncomingEdgesForNode(emitterGraph, fp)
}
}
}
break
case "delete":
toRemove.add(fp)
break
const pathsToParse: FilePath[] = []
for (const [fp, type] of Object.entries(changesSinceLastBuild)) {
if (type === "delete" || path.extname(fp) !== ".md") continue
const fullPath = joinSegments(argv.directory, toPosixPath(fp)) as FilePath
pathsToParse.push(fullPath)
}
if (argv.verbose) {
console.log(`Updated dependency graphs in ${perf.timeSince()}`)
const parsed = await parseMarkdown(ctx, pathsToParse)
for (const content of parsed) {
contentMap.set(content[1].data.relativePath!, {
type: "markdown",
content,
})
}
// EMIT
perf.addEvent("rebuild")
// update state using changesSinceLastBuild
// we do this weird play of add => compute change events => remove
// so that partialEmitters can do appropriate cleanup based on the content of deleted files
for (const [file, change] of Object.entries(changesSinceLastBuild)) {
if (change === "delete") {
// universal delete case
contentMap.delete(file as FilePath)
}
// manually track non-markdown files as processed files only
// contains markdown files
if (change === "add" && path.extname(file) !== ".md") {
contentMap.set(file as FilePath, {
type: "other",
})
}
}
const changeEvents: ChangeEvent[] = Object.entries(changesSinceLastBuild).map(([fp, type]) => {
const path = fp as FilePath
const processedContent = contentMap.get(path)
if (processedContent?.type === "markdown") {
const [_tree, file] = processedContent.content
return {
type,
path,
file,
}
}
return {
type,
path,
}
})
// update allFiles and then allSlugs with the consistent view of content map
ctx.allFiles = Array.from(contentMap.keys())
ctx.allSlugs = ctx.allFiles.map((fp) => slugifyFilePath(fp as FilePath))
const processedFiles = Array.from(contentMap.values())
.filter((file) => file.type === "markdown")
.map((file) => file.content)
let emittedFiles = 0
for (const emitter of cfg.plugins.emitters) {
const depGraph = dependencies[emitter.name]
// emitter hasn't defined a dependency graph. call it with all processed files
if (depGraph === null) {
if (argv.verbose) {
console.log(
`Emitter ${emitter.name} doesn't define a dependency graph. Calling it with all files...`,
)
}
const files = [...contentMap.values()].filter(
([_node, vfile]) => !toRemove.has(vfile.data.filePath!),
)
const emitted = await emitter.emit(ctx, files, staticResources)
if (Symbol.asyncIterator in emitted) {
// Async generator case
for await (const file of emitted) {
emittedFiles++
if (ctx.argv.verbose) {
console.log(`[emit:${emitter.name}] ${file}`)
}
}
} else {
// Array case
emittedFiles += emitted.length
if (ctx.argv.verbose) {
for (const file of emitted) {
console.log(`[emit:${emitter.name}] ${file}`)
}
}
}
// Try to use partialEmit if available, otherwise assume the output is static
const emitFn = emitter.partialEmit ?? emitter.emit
const emitted = await emitFn(ctx, processedFiles, staticResources, changeEvents)
if (emitted === null) {
continue
}
// only call the emitter if it uses this file
if (depGraph.hasNode(fp)) {
// re-emit using all files that are needed for the downstream of this file
// eg. for ContentIndex, the dep graph could be:
// a.md --> contentIndex.json
// b.md ------^
//
// if a.md changes, we need to re-emit contentIndex.json,
// and supply [a.md, b.md] to the emitter
const upstreams = [...depGraph.getLeafNodeAncestors(fp)] as FilePath[]
const upstreamContent = upstreams
// filter out non-markdown files
.filter((file) => contentMap.has(file))
// if file was deleted, don't give it to the emitter
.filter((file) => !toRemove.has(file))
.map((file) => contentMap.get(file)!)
const emitted = await emitter.emit(ctx, upstreamContent, staticResources)
if (Symbol.asyncIterator in emitted) {
// Async generator case
for await (const file of emitted) {
emittedFiles++
if (ctx.argv.verbose) {
console.log(`[emit:${emitter.name}] ${file}`)
}
}
} else {
// Array case
emittedFiles += emitted.length
if (Symbol.asyncIterator in emitted) {
// Async generator case
for await (const file of emitted) {
emittedFiles++
if (ctx.argv.verbose) {
for (const file of emitted) {
console.log(`[emit:${emitter.name}] ${file}`)
}
console.log(`[emit:${emitter.name}] ${file}`)
}
}
} else {
// Array case
emittedFiles += emitted.length
if (ctx.argv.verbose) {
for (const file of emitted) {
console.log(`[emit:${emitter.name}] ${file}`)
}
}
}
}
console.log(`Emitted ${emittedFiles} files to \`${argv.output}\` in ${perf.timeSince("rebuild")}`)
// CLEANUP
const destinationsToDelete = new Set<FilePath>()
for (const file of toRemove) {
// remove from cache
contentMap.delete(file)
Object.values(dependencies).forEach((depGraph) => {
// remove the node from dependency graphs
depGraph?.removeNode(file)
// remove any orphan nodes. eg if a.md is deleted, a.html is orphaned and should be removed
const orphanNodes = depGraph?.removeOrphanNodes()
orphanNodes?.forEach((node) => {
// only delete files that are in the output directory
if (node.startsWith(argv.output)) {
destinationsToDelete.add(node)
}
})
})
}
await rimraf([...destinationsToDelete])
console.log(chalk.green(`Done rebuilding in ${perf.timeSince()}`))
toRemove.clear()
release()
changes.splice(0, numChangesInBuild)
clientRefresh()
}
async function rebuildFromEntrypoint(
fp: string,
action: FileEvent,
clientRefresh: () => void,
buildData: BuildData, // note: this function mutates buildData
) {
const { ctx, ignored, mut, initialSlugs, contentMap, toRebuild, toRemove, trackedAssets } =
buildData
const { argv } = ctx
// don't do anything for gitignored files
if (ignored(fp)) {
return
}
// dont bother rebuilding for non-content files, just track and refresh
fp = toPosixPath(fp)
const filePath = joinSegments(argv.directory, fp) as FilePath
if (path.extname(fp) !== ".md") {
if (action === "add" || action === "change") {
trackedAssets.add(filePath)
} else if (action === "delete") {
trackedAssets.delete(filePath)
}
clientRefresh()
return
}
if (action === "add" || action === "change") {
toRebuild.add(filePath)
} else if (action === "delete") {
toRemove.add(filePath)
}
const buildId = randomIdNonSecure()
ctx.buildId = buildId
buildData.lastBuildMs = new Date().getTime()
const release = await mut.acquire()
// there's another build after us, release and let them do it
if (ctx.buildId !== buildId) {
release()
return
}
const perf = new PerfTimer()
console.log(chalk.yellow("Detected change, rebuilding..."))
try {
const filesToRebuild = [...toRebuild].filter((fp) => !toRemove.has(fp))
const parsedContent = await parseMarkdown(ctx, filesToRebuild)
for (const content of parsedContent) {
const [_tree, vfile] = content
contentMap.set(vfile.data.filePath!, content)
}
for (const fp of toRemove) {
contentMap.delete(fp)
}
const parsedFiles = [...contentMap.values()]
const filteredContent = filterContent(ctx, parsedFiles)
// re-update slugs
const trackedSlugs = [...new Set([...contentMap.keys(), ...toRebuild, ...trackedAssets])]
.filter((fp) => !toRemove.has(fp))
.map((fp) => slugifyFilePath(path.posix.relative(argv.directory, fp) as FilePath))
ctx.allSlugs = [...new Set([...initialSlugs, ...trackedSlugs])]
// TODO: we can probably traverse the link graph to figure out what's safe to delete here
// instead of just deleting everything
await rimraf(path.join(argv.output, ".*"), { glob: true })
await emitContent(ctx, filteredContent)
console.log(chalk.green(`Done rebuilding in ${perf.timeSince()}`))
} catch (err) {
console.log(chalk.yellow(`Rebuild failed. Waiting on a change to fix the error...`))
if (argv.verbose) {
console.log(chalk.red(err))
}
}
clientRefresh()
toRebuild.clear()
toRemove.clear()
release()
}