From a5748bd37bb0411b0ddd507120c72fa0335d5c39 Mon Sep 17 00:00:00 2001 From: Dawid Rycerz Date: Thu, 29 Jan 2026 16:46:22 +0100 Subject: feat(pleroma): add incremental caching for post fetching Use Astro 5 MetaStore to persist sync state (newestStatusId, lastSyncTime, accountId) between builds. On subsequent builds, only fetch new posts via the Mastodon API since_id parameter instead of re-fetching all ~344 statuses. Includes force-refresh support via PLEROMA_FORCE_REFRESH env var or config option, cache TTL, and automatic fallback to full fetch on errors. Co-Authored-By: Claude Opus 4.5 --- SPRINT.md | 6 +-- src/loaders/pleroma.ts | 103 +++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 94 insertions(+), 15 deletions(-) diff --git a/SPRINT.md b/SPRINT.md index 541cdab..ba4c150 100644 --- a/SPRINT.md +++ b/SPRINT.md @@ -7,9 +7,6 @@ Goal: Initialize project tooling for Claude Code ## In Progress ## Backlog (Prioritized) -- [ ] **[FEATURE-001]** Add caching for Pleroma post fetching - - Implement local cache to avoid re-fetching all posts from social.craftknight.com on every build - - Cache should store fetched posts and only pull new/updated ones - [ ] **[FEATURE-002]** Import posts from old Mastodon instance - Fetch posts from https://mastodon.com.pl/@knightdave using the same approach as the Pleroma loader - Integrate into the existing content collection alongside Pleroma posts @@ -21,6 +18,9 @@ Goal: Initialize project tooling for Claude Code - Let users see total page count and jump to specific pages ## Completed This Sprint +- [x] **[FEATURE-001]** Add caching for Pleroma post fetching + - Completed: 2026-01-29 + - Notes: Incremental fetching via Astro 5 MetaStore + Mastodon since_id API. Supports force refresh via env var, config option, and cache TTL. - [x] **[CHORE-001]** Initial Claude Code setup - Completed: 2026-01-29 - Notes: Added CLAUDE.md, SPRINT.md, .claude/settings.local.json diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts index 766e9ee..1d11747 100644 --- a/src/loaders/pleroma.ts +++ b/src/loaders/pleroma.ts @@ -14,6 +14,8 @@ interface PleromaFeedConfig { accountId?: string; // Optional: if provided, skips account lookup allowedTags?: string[]; // Optional: if provided, only posts with these tags are included mergeThreads?: boolean; // Optional: if true, merges thread posts into single entry (default: true) + forceRefresh?: boolean; // Optional: if true, always do a full fetch ignoring cache + cacheTtlSeconds?: number; // Optional: if set, do a full fetch when cache is older than this } interface PleromaAccount { @@ -350,6 +352,7 @@ async function fetchAccountStatuses( accountId: string, maxPosts: number, logger: Logger, + sinceId?: string, ): Promise { const allStatuses: PleromaStatus[] = []; let maxId: string | null = null; @@ -376,6 +379,10 @@ async function fetchAccountStatuses( params.set("max_id", maxId); } + if (sinceId && pageCount === 1) { + params.set("since_id", sinceId); + } + const statusesUrl = `${instanceUrl}/api/v1/accounts/${accountId}/statuses?${params.toString()}`; // Add retry logic for network issues @@ -663,14 +670,12 @@ function extractTitle(content: string): string { export function pleromaLoader(config: PleromaFeedConfig): Loader { return { name: "pleroma-loader", - load: async ({ store, logger }) => { + load: async ({ store, logger, meta }) => { try { const { instanceUrl, username, maxPosts = 20, accountId: configAccountId } = config; - logger.info(`Fetching Pleroma posts via API for user: ${username}`); - - // Get account ID (use provided one or lookup by username) - let accountId: string | undefined = configAccountId; + // Resolve account ID (use cached, provided, or lookup) + let accountId: string | undefined = configAccountId || meta.get("accountId") || undefined; if (!accountId) { const lookedUpAccountId = await getAccountId(instanceUrl, username, logger); if (!lookedUpAccountId) { @@ -679,10 +684,64 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader { return; } accountId = lookedUpAccountId; + meta.set("accountId", accountId); + } + + // Determine if we should do a full fetch or incremental + const cachedNewestId = meta.get("newestStatusId"); + const lastSyncTime = meta.get("lastSyncTime"); + const storeHasEntries = store.keys().length > 0; + + const forceRefresh = + config.forceRefresh || + process.env.PLEROMA_FORCE_REFRESH === "true" || + (config.cacheTtlSeconds != null && + lastSyncTime != null && + Date.now() - Number(lastSyncTime) > config.cacheTtlSeconds * 1000); + + const canDoIncremental = !forceRefresh && cachedNewestId != null && storeHasEntries; + + if (forceRefresh) { + logger.info("Force refresh: clearing cache and re-fetching all posts."); + meta.delete("newestStatusId"); + meta.delete("lastSyncTime"); + } + + let statuses: PleromaStatus[]; + + if (canDoIncremental) { + logger.info( + `Incremental mode: fetching posts newer than ${cachedNewestId} (last sync: ${lastSyncTime ? new Date(Number(lastSyncTime)).toISOString() : "unknown"})`, + ); + + try { + statuses = await fetchAccountStatuses( + instanceUrl, + accountId, + maxPosts, + logger, + cachedNewestId, + ); + } catch (error) { + logger.warn(`Incremental fetch failed: ${error}. Falling back to full fetch.`); + meta.delete("newestStatusId"); + meta.delete("lastSyncTime"); + statuses = await fetchAccountStatuses(instanceUrl, accountId, maxPosts, logger); + store.clear(); + } + + if (statuses.length === 0) { + logger.info( + `Cache hit: no new posts since last sync. ${store.keys().length} posts in store.`, + ); + return; + } + } else { + logger.info("Full fetch: no cached data or refresh requested. Fetching all posts."); + statuses = await fetchAccountStatuses(instanceUrl, accountId, maxPosts, logger); + store.clear(); } - // Fetch statuses from API - const statuses = await fetchAccountStatuses(instanceUrl, accountId, maxPosts, logger); logger.info(`Fetched ${statuses.length} statuses from API`); // Filter statuses @@ -693,11 +752,11 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader { }); logger.info(`After filtering: ${validStatuses.length} valid posts`); - // Collect all post IDs for link replacement + // Collect all post IDs for link replacement (existing store + new statuses) const allPostIds = new Set(validStatuses.map((status) => status.id)); - - // Clear existing entries - store.clear(); + for (const key of store.keys()) { + allPostIds.add(key.replace(/^pleroma-/, "")); + } // Process each status for (const status of validStatuses) { @@ -786,12 +845,32 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader { } } - logger.info(`Successfully loaded ${validStatuses.length} Pleroma posts`); + // Update sync metadata + // Statuses are returned newest-first from the API + const newestId = + validStatuses.length > 0 ? validStatuses[0]?.id : (cachedNewestId ?? undefined); + if (newestId) { + meta.set("newestStatusId", newestId); + } + meta.set("lastSyncTime", String(Date.now())); + + const totalInStore = store.keys().length; + if (canDoIncremental) { + logger.info( + `Incremental update: ${validStatuses.length} new posts fetched, ${totalInStore} total in store.`, + ); + } else { + logger.info( + `Full fetch complete: ${validStatuses.length} posts processed, ${totalInStore} total in store.`, + ); + } } catch (error) { logger.warn(`Pleroma loader failed: ${error}`); logger.info("Continuing build without Pleroma posts..."); // Don't throw error to prevent build failure store.clear(); + meta.delete("newestStatusId"); + meta.delete("lastSyncTime"); } }, }; -- cgit v1.2.3