import type { Loader } from "astro/loaders"; import { marked } from "marked"; import TurndownService from "turndown"; interface PleromaFeedConfig { instanceUrl: string; username: string; maxPosts?: number; accountId?: string; // Optional: if provided, skips account lookup allowedTags?: string[]; // Optional: if provided, only posts with these tags are included } interface PleromaAccount { id: string; username: string; acct: string; display_name: string; url: string; } interface PleromaMediaAttachment { id: string; type: "image" | "video" | "gifv" | "audio" | "unknown"; url: string; preview_url: string; description?: string; } interface PleromaStatus { id: string; created_at: string; content: string; url: string; reblog: PleromaStatus | null; in_reply_to_id: string | null; sensitive: boolean; media_attachments: PleromaMediaAttachment[]; visibility: string; } async function getAccountId( instanceUrl: string, username: string, logger: any, ): Promise { try { const searchUrl = `${instanceUrl}/api/v1/accounts/search?q=${encodeURIComponent(username)}&limit=1`; logger.info(`Looking up account ID for username: ${username}`); const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 10000); const response = await fetch(searchUrl, { headers: { "User-Agent": "Astro Blog (pleroma-loader)", }, signal: controller.signal, }); clearTimeout(timeoutId); if (!response.ok) { logger.warn(`Failed to search for account: HTTP ${response.status}`); return null; } const accounts: PleromaAccount[] = await response.json(); if (accounts.length === 0 || !accounts[0]) { logger.warn(`No account found for username: ${username}`); return null; } const account = accounts[0]; logger.info(`Found account ID: ${account.id} for @${account.acct}`); return account.id; } catch (error) { logger.warn(`Failed to lookup account ID: ${error}`); return null; } } async function fetchAccountStatuses( instanceUrl: string, accountId: string, maxPosts: number, logger: any, ): Promise { let response: Response | undefined; let lastError: unknown; // Add retry logic for network issues for (let attempt = 1; attempt <= 3; attempt++) { try { logger.info(`Attempt ${attempt} to fetch statuses...`); const statusesUrl = `${instanceUrl}/api/v1/accounts/${accountId}/statuses?limit=${maxPosts}&exclude_replies=true&exclude_reblogs=true`; // Create timeout controller const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 10000); response = await fetch(statusesUrl, { headers: { "User-Agent": "Astro Blog (pleroma-loader)", }, signal: controller.signal, }); clearTimeout(timeoutId); if (response.ok) { break; // Success, exit retry loop } throw new Error(`HTTP ${response.status}: ${response.statusText}`); } catch (error) { lastError = error; logger.warn(`Attempt ${attempt} failed: ${error}`); if (attempt < 3) { logger.info("Retrying in 2 seconds..."); await new Promise((resolve) => setTimeout(resolve, 2000)); } } } if (!response || !response.ok) { throw new Error(`Failed to fetch statuses after 3 attempts. Last error: ${lastError}`); } const statuses: PleromaStatus[] = await response.json(); return statuses; } function isFilteredStatus(status: PleromaStatus): boolean { // Filter out boosts/reblogs (already handled by API parameter, but double-check) if (status.reblog) { return true; } // Filter out replies (already handled by API parameter, but double-check) if (status.in_reply_to_id) { return true; } // Filter out NSFW/sensitive content if (status.sensitive) { return true; } return false; } function extractHashtags(htmlContent: string): string[] { // Extract hashtags from HTML spans and plain text const hashtagPattern = /#(\w+)/gi; const matches = htmlContent.match(hashtagPattern); return matches ? [...new Set(matches.map((tag) => tag.toLowerCase()))] : []; } function hasAllowedTag(status: PleromaStatus, allowedTags: string[]): boolean { if (!allowedTags || allowedTags.length === 0) { return true; // No filtering if no tags specified } const content = status.content || ""; const hashtags = extractHashtags(content); const normalizedAllowedTags = allowedTags.map((tag) => tag.toLowerCase().replace(/^#/, "")); const normalizedHashtags = hashtags.map((tag) => tag.toLowerCase().replace(/^#/, "")); return normalizedHashtags.some((tag) => normalizedAllowedTags.includes(tag)); } function cleanContent(htmlContent: string): string { const turndownService = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced", }); // Remove or replace common Pleroma/Mastodon elements const cleanedContent = htmlContent .replace(/]*>/gi, "") // Remove mention spans but keep content .replace(/<\/span>/gi, "") .replace(/]*>/gi, "") // Remove hashtag spans but keep content .replace(/]*>.*?<\/span>/gi, "") // Remove ellipsis .replace(/]*>.*?<\/span>/gi, ""); // Remove invisible text // Convert to markdown const markdown = turndownService.turndown(cleanedContent); // Clean up extra whitespace return markdown.trim().replace(/\n\s*\n\s*\n/g, "\n\n"); } function markdownToHtml(markdown: string): string { // Configure marked options for safe rendering marked.setOptions({ breaks: true, // Convert line breaks to
gfm: true, // GitHub flavored markdown }); // Convert markdown to HTML const html = marked.parse(markdown); // Return as string (marked.parse can return string or Promise) return typeof html === "string" ? html : ""; } function extractTitle(content: string): string { // Extract first line or first sentence as title const firstLine = content.split("\n")[0]; if (!firstLine) return "Micro post"; const firstSentence = firstLine.split(/[.!?]/)[0]; if (!firstSentence) return "Micro post"; // Limit title length and clean it up const title = (firstSentence.length > 60 ? `${firstSentence.substring(0, 57)}...` : firstSentence) .replace(/[#*_`]/g, "") // Remove markdown formatting .trim(); return title || "Micro post"; } export function pleromaLoader(config: PleromaFeedConfig): Loader { return { name: "pleroma-loader", load: async ({ store, logger }) => { try { const { instanceUrl, username, maxPosts = 20, accountId: configAccountId } = config; logger.info(`Fetching Pleroma posts via API for user: ${username}`); // Get account ID (use provided one or lookup by username) let accountId: string | undefined = configAccountId; if (!accountId) { const lookedUpAccountId = await getAccountId(instanceUrl, username, logger); if (!lookedUpAccountId) { logger.warn("Failed to get account ID. Continuing without Pleroma posts..."); store.clear(); return; } accountId = lookedUpAccountId; } // Fetch statuses from API const statuses = await fetchAccountStatuses(instanceUrl, accountId, maxPosts, logger); logger.info(`Fetched ${statuses.length} statuses from API`); // Filter statuses const validStatuses = statuses.filter((status) => { if (isFilteredStatus(status)) return false; if (config.allowedTags && !hasAllowedTag(status, config.allowedTags)) return false; return true; }); logger.info(`After filtering: ${validStatuses.length} valid posts`); // Clear existing entries store.clear(); // Process each status for (const status of validStatuses) { try { const content = status.content || ""; const cleanedContent = cleanContent(content); const title = extractTitle(cleanedContent); // Extract post ID from status const postId = status.id; // Use status URL as source const sourceUrl = status.url; // Extract image attachments only const attachments = status.media_attachments .filter((attachment) => attachment.type === "image") .map((attachment) => ({ url: attachment.url, type: `image/${attachment.url.split(".").pop() || "jpeg"}`, })); // Create note entry store.set({ id: `pleroma-${postId}`, data: { title, description: cleanedContent.substring(0, 160) + (cleanedContent.length > 160 ? "..." : ""), publishDate: new Date(status.created_at), sourceUrl, attachments, }, body: cleanedContent, rendered: { html: markdownToHtml(cleanedContent), }, }); logger.info(`Processed post: ${title.substring(0, 50)}...`); } catch (error) { logger.warn(`Failed to process status ${status.id}: ${error}`); } } logger.info(`Successfully loaded ${validStatuses.length} Pleroma posts`); } catch (error) { logger.warn(`Pleroma loader failed: ${error}`); logger.info("Continuing build without Pleroma posts..."); // Don't throw error to prevent build failure store.clear(); } }, }; }