import type { Loader } from "astro/loaders"; import { XMLParser } from "fast-xml-parser"; import TurndownService from "turndown"; interface PleromaFeedConfig { instanceUrl: string; username: string; maxPosts?: number; feedType?: "rss" | "atom"; } interface RssItem { guid: string; title: string; description: string; pubDate: string; link: string; category?: string | string[]; "activity:object-type"?: string; "activity:verb"?: string; "thr:in-reply-to"?: { "@_ref": string; }; } interface RssFeed { rss: { channel: { title: string; description: string; link: string; item?: RssItem | RssItem[]; }; }; } interface AtomEntry { id: string; title: string; content: { "#text": string; "@_type": string; }; published: string; updated: string; link: { "@_href": string; "@_rel": string; "@_type": string; }[]; author: { name: string; uri: string; }; category?: { "@_term": string; }[]; "activity:object-type"?: string; "activity:verb"?: string; "thr:in-reply-to"?: { "@_ref": string; }; } interface AtomFeed { feed: { title: string; id: string; updated: string; entry?: AtomEntry | AtomEntry[]; }; } function parseAtomFeed(xmlContent: string): AtomEntry[] { const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: "@_", parseAttributeValue: true, }); const result: AtomFeed = parser.parse(xmlContent); if (!result.feed?.entry) { return []; } // Handle both single entry and array of entries const entries = Array.isArray(result.feed.entry) ? result.feed.entry : [result.feed.entry]; return entries; } function parseRssFeed(xmlContent: string): RssItem[] { const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: "@_", parseAttributeValue: true, }); try { const result: RssFeed = parser.parse(xmlContent); if (!result.rss?.channel?.item) { console.log("RSS structure:", JSON.stringify(result, null, 2)); return []; } // Handle both single item and array of items const items = Array.isArray(result.rss.channel.item) ? result.rss.channel.item : [result.rss.channel.item]; return items; } catch (error) { console.error("Failed to parse RSS feed:", error); console.log("XML content length:", xmlContent.length); console.log("XML preview:", xmlContent.substring(0, 1000)); return []; } } function isFilteredPostAtom(entry: AtomEntry): boolean { // Filter out boosts/reblogs if (entry["activity:verb"] === "http://activitystrea.ms/schema/1.0/share") { return true; } // Filter out replies if (entry["thr:in-reply-to"]) { return true; } // Filter out NSFW/sensitive content if (entry.category) { const categories = Array.isArray(entry.category) ? entry.category : [entry.category]; const hasNsfwTag = categories.some( (cat) => cat["@_term"]?.toLowerCase().includes("nsfw") || cat["@_term"]?.toLowerCase().includes("sensitive"), ); if (hasNsfwTag) { return true; } } return false; } function isFilteredPostRss(item: RssItem): boolean { // Filter out boosts/reblogs if (item["activity:verb"] === "http://activitystrea.ms/schema/1.0/share") { return true; } // Filter out replies if (item["thr:in-reply-to"]) { return true; } // Filter out NSFW/sensitive content if (item.category) { const categories = Array.isArray(item.category) ? item.category : [item.category]; const hasNsfwTag = categories.some( (cat) => cat?.toLowerCase().includes("nsfw") || cat?.toLowerCase().includes("sensitive"), ); if (hasNsfwTag) { return true; } } return false; } function cleanContent(htmlContent: string): string { const turndownService = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced", }); // Remove or replace common Pleroma/Mastodon elements const cleanedContent = htmlContent .replace(/]*>/gi, "") // Remove mention spans but keep content .replace(/<\/span>/gi, "") .replace(/]*>/gi, "") // Remove hashtag spans but keep content .replace(/]*>.*?<\/span>/gi, "") // Remove ellipsis .replace(/]*>.*?<\/span>/gi, ""); // Remove invisible text // Convert to markdown const markdown = turndownService.turndown(cleanedContent); // Clean up extra whitespace return markdown.trim().replace(/\n\s*\n\s*\n/g, "\n\n"); } function extractTitle(content: string): string { // Extract first line or first sentence as title const firstLine = content.split("\n")[0]; if (!firstLine) return "Micro post"; const firstSentence = firstLine.split(/[.!?]/)[0]; if (!firstSentence) return "Micro post"; // Limit title length and clean it up const title = (firstSentence.length > 60 ? `${firstSentence.substring(0, 57)}...` : firstSentence) .replace(/[#*_`]/g, "") // Remove markdown formatting .trim(); return title || "Micro post"; } export function pleromaLoader(config: PleromaFeedConfig): Loader { return { name: "pleroma-loader", load: async ({ store, logger }) => { try { const { instanceUrl, username, maxPosts = 20 } = config; // Use RSS URL that redirects to Atom - this bypasses some access restrictions const feedUrl = `${instanceUrl}/users/${username}.rss`; logger.info(`Fetching Pleroma feed from: ${feedUrl}`); // Add retry logic for network issues let response: Response | undefined; let lastError: unknown; for (let attempt = 1; attempt <= 3; attempt++) { try { logger.info(`Attempt ${attempt} to fetch feed...`); // Create timeout controller const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 10000); response = await fetch(feedUrl, { headers: { "User-Agent": "Astro Blog (pleroma-loader)", }, redirect: "follow", // Follow redirects signal: controller.signal, }); clearTimeout(timeoutId); if (response.ok) { break; // Success, exit retry loop } throw new Error(`HTTP ${response.status}: ${response.statusText}`); } catch (error) { lastError = error; logger.warn(`Attempt ${attempt} failed: ${error}`); if (attempt < 3) { logger.info("Retrying in 2 seconds..."); await new Promise((resolve) => setTimeout(resolve, 2000)); } } } if (!response || !response.ok) { logger.warn(`Failed to fetch Pleroma feed after 3 attempts. Last error: ${lastError}`); logger.info("Continuing without Pleroma posts..."); store.clear(); return; } const xmlContent = await response.text(); logger.info(`Received XML content length: ${xmlContent.length}`); // Auto-detect if it's Atom or RSS based on content const isAtomFeed = xmlContent.includes(" !isFilteredPostAtom(entry)).slice(0, maxPosts); logger.info(`After filtering: ${validEntries.length} valid posts`); // Clear existing entries store.clear(); // Process each Atom entry for (const entry of validEntries) { try { const content = entry.content?.["#text"] || ""; const cleanedContent = cleanContent(content); const title = extractTitle(cleanedContent); // Extract post ID from the entry ID const postId = entry.id.split("/").pop() || entry.id; // Create note entry store.set({ id: `pleroma-${postId}`, data: { title, description: cleanedContent.substring(0, 160) + (cleanedContent.length > 160 ? "..." : ""), publishDate: new Date(entry.published), }, body: cleanedContent, rendered: { html: `

${cleanedContent.replace(/\n\n/g, "

")}

`, }, }); logger.info(`Processed post: ${title.substring(0, 50)}...`); } catch (error) { logger.warn(`Failed to process entry ${entry.id}: ${error}`); } } } else { // Process as RSS feed const items = parseRssFeed(xmlContent); logger.info(`Parsed ${items.length} items from RSS feed`); const validRssItems = items.filter((item) => !isFilteredPostRss(item)).slice(0, maxPosts); logger.info(`After filtering: ${validRssItems.length} valid posts`); // Clear existing entries store.clear(); // Process each RSS item for (const item of validRssItems) { try { const content = item.description || ""; const cleanedContent = cleanContent(content); const title = extractTitle(cleanedContent); // Extract post ID from the GUID or link const postId = item.guid?.split("/").pop() || (typeof item.link === "string" ? item.link.split("/").pop() : null) || Math.random().toString(36); // Create note entry store.set({ id: `pleroma-${postId}`, data: { title, description: cleanedContent.substring(0, 160) + (cleanedContent.length > 160 ? "..." : ""), publishDate: new Date(item.pubDate), }, body: cleanedContent, rendered: { html: `

${cleanedContent.replace(/\n\n/g, "

")}

`, }, }); logger.info(`Processed post: ${title.substring(0, 50)}...`); } catch (error) { logger.warn(`Failed to process RSS item ${item.guid}: ${error}`); } } } logger.info(`Successfully loaded ${validEntries.length} Pleroma posts`); } catch (error) { logger.warn(`Pleroma loader failed: ${error}`); logger.info("Continuing build without Pleroma posts..."); // Don't throw error to prevent build failure store.clear(); } }, }; }