Replace xml based posts with native pleroma api

author: Dawid Rycerz <dawid@rycerz.xyz> 2026-01-12 18:03:49 +0100
committer: Dawid Rycerz <dawid@rycerz.xyz> 2026-01-12 18:04:32 +0100
commit: 9f50f425bda5e56f4b73d8ce975eb6dafdbb46af (patch)
tree: 701682a15b4e3325a67b3d544452f9e88348f63d /src
parent: eb7920efa3dd6889fe30249de1f0f0f121f1715f (diff)
2 files changed, 166 insertions, 293 deletions
diff --git a/src/content.config.ts b/src/content.config.ts
index c85d88e..5dff41a 100644
--- a/src/content.config.ts
+++ b/src/content.config.ts
@@ -42,7 +42,6 @@ const micro = defineCollection({
 		instanceUrl: "https://social.craftknight.com",
 		username: "dawid",
 		maxPosts: 50,
-		feedType: "atom",
 	}),
 	schema: baseSchema.extend({
 		description: z.string().optional(),
diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts
index 952e491..a833248 100644
--- a/src/loaders/pleroma.ts
+++ b/src/loaders/pleroma.ts
@@ -1,5 +1,4 @@
 import type { Loader } from "astro/loaders";
-import { XMLParser } from "fast-xml-parser";
 import { marked } from "marked";
 import TurndownService from "turndown";
 
@@ -7,166 +6,145 @@ interface PleromaFeedConfig {
 	instanceUrl: string;
 	username: string;
 	maxPosts?: number;
-	feedType?: "rss" | "atom";
+	accountId?: string; // Optional: if provided, skips account lookup
 }
 
-interface RssItem {
-	guid: string;
-	title: string;
-	description: string;
-	pubDate: string;
-	link: string;
-	category?: string | string[];
-	"activity:object-type"?: string;
-	"activity:verb"?: string;
-	"thr:in-reply-to"?: {
-		"@_ref": string;
-	};
-}
-
-interface RssFeed {
-	rss: {
-		channel: {
-			title: string;
-			description: string;
-			link: string;
-			item?: RssItem | RssItem[];
-		};
-	};
+interface PleromaAccount {
+	id: string;
+	username: string;
+	acct: string;
+	display_name: string;
+	url: string;
 }
 
-interface AtomEntry {
+interface PleromaMediaAttachment {
 	id: string;
-	title: string;
-	content: {
-		"#text": string;
-		"@_type": string;
-	};
-	published: string;
-	updated: string;
-	link: {
-		"@_href": string;
-		"@_rel": string;
-		"@_type": string;
-	}[];
-	author: {
-		name: string;
-		uri: string;
-	};
-	category?: {
-		"@_term": string;
-	}[];
-	"activity:object-type"?: string;
-	"activity:verb"?: string;
-	"thr:in-reply-to"?: {
-		"@_ref": string;
-	};
+	type: "image" | "video" | "gifv" | "audio" | "unknown";
+	url: string;
+	preview_url: string;
+	description?: string;
 }
 
-interface AtomFeed {
-	feed: {
-		title: string;
-		id: string;
-		updated: string;
-		entry?: AtomEntry | AtomEntry[];
-	};
+interface PleromaStatus {
+	id: string;
+	created_at: string;
+	content: string;
+	url: string;
+	reblog: PleromaStatus | null;
+	in_reply_to_id: string | null;
+	sensitive: boolean;
+	media_attachments: PleromaMediaAttachment[];
+	visibility: string;
 }
 
-function parseAtomFeed(xmlContent: string): AtomEntry[] {
-	const parser = new XMLParser({
-		ignoreAttributes: false,
-		attributeNamePrefix: "@_",
-		parseAttributeValue: true,
-	});
-
-	const result: AtomFeed = parser.parse(xmlContent);
+async function getAccountId(
+	instanceUrl: string,
+	username: string,
+	logger: any,
+): Promise<string | null> {
+	try {
+		const searchUrl = `${instanceUrl}/api/v1/accounts/search?q=${encodeURIComponent(username)}&limit=1`;
+		logger.info(`Looking up account ID for username: ${username}`);
 
-	if (!result.feed?.entry) {
-		return [];
-	}
+		const controller = new AbortController();
+		const timeoutId = setTimeout(() => controller.abort(), 10000);
 
-	// Handle both single entry and array of entries
-	const entries = Array.isArray(result.feed.entry) ? result.feed.entry : [result.feed.entry];
+		const response = await fetch(searchUrl, {
+			headers: {
+				"User-Agent": "Astro Blog (pleroma-loader)",
+			},
+			signal: controller.signal,
+		});
 
-	return entries;
-}
+		clearTimeout(timeoutId);
 
-function parseRssFeed(xmlContent: string): RssItem[] {
-	const parser = new XMLParser({
-		ignoreAttributes: false,
-		attributeNamePrefix: "@_",
-		parseAttributeValue: true,
-	});
+		if (!response.ok) {
+			logger.warn(`Failed to search for account: HTTP ${response.status}`);
+			return null;
+		}
 
-	try {
-		const result: RssFeed = parser.parse(xmlContent);
+		const accounts: PleromaAccount[] = await response.json();
 
-		if (!result.rss?.channel?.item) {
-			console.log("RSS structure:", JSON.stringify(result, null, 2));
-			return [];
+		if (accounts.length === 0 || !accounts[0]) {
+			logger.warn(`No account found for username: ${username}`);
+			return null;
 		}
 
-		// Handle both single item and array of items
-		const items = Array.isArray(result.rss.channel.item)
-			? result.rss.channel.item
-			: [result.rss.channel.item];
-
-		return items;
+		const account = accounts[0];
+		logger.info(`Found account ID: ${account.id} for @${account.acct}`);
+		return account.id;
 	} catch (error) {
-		console.error("Failed to parse RSS feed:", error);
-		console.log("XML content length:", xmlContent.length);
-		console.log("XML preview:", xmlContent.substring(0, 1000));
-		return [];
+		logger.warn(`Failed to lookup account ID: ${error}`);
+		return null;
 	}
 }
 
-function isFilteredPostAtom(entry: AtomEntry): boolean {
-	// Filter out boosts/reblogs
-	if (entry["activity:verb"] === "http://activitystrea.ms/schema/1.0/share") {
-		return true;
-	}
-
-	// Filter out replies
-	if (entry["thr:in-reply-to"]) {
-		return true;
+async function fetchAccountStatuses(
+	instanceUrl: string,
+	accountId: string,
+	maxPosts: number,
+	logger: any,
+): Promise<PleromaStatus[]> {
+	let response: Response | undefined;
+	let lastError: unknown;
+
+	// Add retry logic for network issues
+	for (let attempt = 1; attempt <= 3; attempt++) {
+		try {
+			logger.info(`Attempt ${attempt} to fetch statuses...`);
+
+			const statusesUrl = `${instanceUrl}/api/v1/accounts/${accountId}/statuses?limit=${maxPosts}&exclude_replies=true&exclude_reblogs=true`;
+
+			// Create timeout controller
+			const controller = new AbortController();
+			const timeoutId = setTimeout(() => controller.abort(), 10000);
+
+			response = await fetch(statusesUrl, {
+				headers: {
+					"User-Agent": "Astro Blog (pleroma-loader)",
+				},
+				signal: controller.signal,
+			});
+
+			clearTimeout(timeoutId);
+
+			if (response.ok) {
+				break; // Success, exit retry loop
+			}
+			throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+		} catch (error) {
+			lastError = error;
+			logger.warn(`Attempt ${attempt} failed: ${error}`);
+
+			if (attempt < 3) {
+				logger.info("Retrying in 2 seconds...");
+				await new Promise((resolve) => setTimeout(resolve, 2000));
+			}
+		}
 	}
 
-	// Filter out NSFW/sensitive content
-	if (entry.category) {
-		const categories = Array.isArray(entry.category) ? entry.category : [entry.category];
-		const hasNsfwTag = categories.some(
-			(cat) =>
-				cat["@_term"]?.toLowerCase().includes("nsfw") ||
-				cat["@_term"]?.toLowerCase().includes("sensitive"),
-		);
-		if (hasNsfwTag) {
-			return true;
-		}
+	if (!response || !response.ok) {
+		throw new Error(`Failed to fetch statuses after 3 attempts. Last error: ${lastError}`);
 	}
 
-	return false;
+	const statuses: PleromaStatus[] = await response.json();
+	return statuses;
 }
 
-function isFilteredPostRss(item: RssItem): boolean {
-	// Filter out boosts/reblogs
-	if (item["activity:verb"] === "http://activitystrea.ms/schema/1.0/share") {
+function isFilteredStatus(status: PleromaStatus): boolean {
+	// Filter out boosts/reblogs (already handled by API parameter, but double-check)
+	if (status.reblog) {
 		return true;
 	}
 
-	// Filter out replies
-	if (item["thr:in-reply-to"]) {
+	// Filter out replies (already handled by API parameter, but double-check)
+	if (status.in_reply_to_id) {
 		return true;
 	}
 
 	// Filter out NSFW/sensitive content
-	if (item.category) {
-		const categories = Array.isArray(item.category) ? item.category : [item.category];
-		const hasNsfwTag = categories.some(
-			(cat) => cat?.toLowerCase().includes("nsfw") || cat?.toLowerCase().includes("sensitive"),
-		);
-		if (hasNsfwTag) {
-			return true;
-		}
+	if (status.sensitive) {
+		return true;
 	}
 
 	return false;
@@ -228,182 +206,78 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader {
 		name: "pleroma-loader",
 		load: async ({ store, logger }) => {
 			try {
-				const { instanceUrl, username, maxPosts = 20 } = config;
-				// Use RSS URL that redirects to Atom - this bypasses some access restrictions
-				const feedUrl = `${instanceUrl}/users/${username}.rss`;
-
-				logger.info(`Fetching Pleroma feed from: ${feedUrl}`);
+				const { instanceUrl, username, maxPosts = 20, accountId: configAccountId } = config;
+
+				logger.info(`Fetching Pleroma posts via API for user: ${username}`);
+
+				// Get account ID (use provided one or lookup by username)
+				let accountId: string | undefined = configAccountId;
+				if (!accountId) {
+					const lookedUpAccountId = await getAccountId(instanceUrl, username, logger);
+					if (!lookedUpAccountId) {
+						logger.warn("Failed to get account ID. Continuing without Pleroma posts...");
+						store.clear();
+						return;
+					}
+					accountId = lookedUpAccountId;
+				}
 
-				// Add retry logic for network issues
-				let response: Response | undefined;
-				let lastError: unknown;
+				// Fetch statuses from API
+				const statuses = await fetchAccountStatuses(instanceUrl, accountId, maxPosts, logger);
+				logger.info(`Fetched ${statuses.length} statuses from API`);
 
-				for (let attempt = 1; attempt <= 3; attempt++) {
-					try {
-						logger.info(`Attempt ${attempt} to fetch feed...`);
+				// Filter statuses
+				const validStatuses = statuses.filter((status) => !isFilteredStatus(status));
+				logger.info(`After filtering: ${validStatuses.length} valid posts`);
 
-						// Create timeout controller
-						const controller = new AbortController();
-						const timeoutId = setTimeout(() => controller.abort(), 10000);
+				// Clear existing entries
+				store.clear();
 
-						response = await fetch(feedUrl, {
-							headers: {
-								"User-Agent": "Astro Blog (pleroma-loader)",
+				// Process each status
+				for (const status of validStatuses) {
+					try {
+						const content = status.content || "";
+						const cleanedContent = cleanContent(content);
+						const title = extractTitle(cleanedContent);
+
+						// Extract post ID from status
+						const postId = status.id;
+
+						// Use status URL as source
+						const sourceUrl = status.url;
+
+						// Extract image attachments only
+						const attachments = status.media_attachments
+							.filter((attachment) => attachment.type === "image")
+							.map((attachment) => ({
+								url: attachment.url,
+								type: `image/${attachment.url.split(".").pop() || "jpeg"}`,
+							}));
+
+						// Create note entry
+						store.set({
+							id: `pleroma-${postId}`,
+							data: {
+								title,
+								description:
+									cleanedContent.substring(0, 160) + (cleanedContent.length > 160 ? "..." : ""),
+								publishDate: new Date(status.created_at),
+								sourceUrl,
+								attachments,
+							},
+							body: cleanedContent,
+							rendered: {
+								html: markdownToHtml(cleanedContent),
 							},
-							redirect: "follow", // Follow redirects
-							signal: controller.signal,
 						});
 
-						clearTimeout(timeoutId);
-
-						if (response.ok) {
-							break; // Success, exit retry loop
-						}
-						throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+						logger.info(`Processed post: ${title.substring(0, 50)}...`);
 					} catch (error) {
-						lastError = error;
-						logger.warn(`Attempt ${attempt} failed: ${error}`);
-
-						if (attempt < 3) {
-							logger.info("Retrying in 2 seconds...");
-							await new Promise((resolve) => setTimeout(resolve, 2000));
-						}
-					}
-				}
-
-				if (!response || !response.ok) {
-					logger.warn(`Failed to fetch Pleroma feed after 3 attempts. Last error: ${lastError}`);
-					logger.info("Continuing without Pleroma posts...");
-					store.clear();
-					return;
-				}
-
-				const xmlContent = await response.text();
-				logger.info(`Received XML content length: ${xmlContent.length}`);
-
-				// Auto-detect if it's Atom or RSS based on content
-				const isAtomFeed =
-					xmlContent.includes("<feed") ||
-					xmlContent.includes('xmlns="http://www.w3.org/2005/Atom"');
-				logger.info(`Detected feed type: ${isAtomFeed ? "Atom" : "RSS"}`);
-
-				let validEntries: AtomEntry[] = [];
-
-				if (isAtomFeed) {
-					// Process as Atom feed
-					const entries = parseAtomFeed(xmlContent);
-					logger.info(`Parsed ${entries.length} entries from Atom feed`);
-
-					validEntries = entries.filter((entry) => !isFilteredPostAtom(entry)).slice(0, maxPosts);
-
-					logger.info(`After filtering: ${validEntries.length} valid posts`);
-
-					// Clear existing entries
-					store.clear();
-
-					// Process each Atom entry
-					for (const entry of validEntries) {
-						try {
-							const content = entry.content?.["#text"] || "";
-							const cleanedContent = cleanContent(content);
-							const title = extractTitle(cleanedContent);
-
-							// Extract post ID from the entry ID
-							const postId = entry.id.split("/").pop() || entry.id;
-
-							// Extract source URL from the entry
-							const sourceUrl =
-								entry.link?.find((link) => link["@_rel"] === "alternate")?.["@_href"] || entry.id;
-
-							// Extract image attachments
-							const attachments =
-								entry.link
-									?.filter(
-										(link) => link["@_rel"] === "enclosure" && link["@_type"]?.startsWith("image/"),
-									)
-									.map((link) => ({
-										url: link["@_href"],
-										type: link["@_type"],
-									})) || [];
-
-							// Create note entry
-							store.set({
-								id: `pleroma-${postId}`,
-								data: {
-									title,
-									description:
-										cleanedContent.substring(0, 160) + (cleanedContent.length > 160 ? "..." : ""),
-									publishDate: new Date(entry.published),
-									sourceUrl,
-									attachments,
-								},
-								body: cleanedContent,
-								rendered: {
-									html: markdownToHtml(cleanedContent),
-								},
-							});
-
-							logger.info(`Processed post: ${title.substring(0, 50)}...`);
-						} catch (error) {
-							logger.warn(`Failed to process entry ${entry.id}: ${error}`);
-						}
-					}
-				} else {
-					// Process as RSS feed
-					const items = parseRssFeed(xmlContent);
-					logger.info(`Parsed ${items.length} items from RSS feed`);
-
-					const validRssItems = items.filter((item) => !isFilteredPostRss(item)).slice(0, maxPosts);
-
-					logger.info(`After filtering: ${validRssItems.length} valid posts`);
-
-					// Clear existing entries
-					store.clear();
-
-					// Process each RSS item
-					for (const item of validRssItems) {
-						try {
-							const content = item.description || "";
-							const cleanedContent = cleanContent(content);
-							const title = extractTitle(cleanedContent);
-
-							// Extract post ID from the GUID or link
-							const postId =
-								item.guid?.split("/").pop() ||
-								(typeof item.link === "string" ? item.link.split("/").pop() : null) ||
-								Math.random().toString(36);
-
-							// Use the link as source URL
-							const sourceUrl = typeof item.link === "string" ? item.link : item.guid || "";
-
-							// For RSS, attachments would be empty since we're actually getting Atom feeds
-							const attachments: { url: string; type: string }[] = [];
-
-							// Create note entry
-							store.set({
-								id: `pleroma-${postId}`,
-								data: {
-									title,
-									description:
-										cleanedContent.substring(0, 160) + (cleanedContent.length > 160 ? "..." : ""),
-									publishDate: new Date(item.pubDate),
-									sourceUrl,
-									attachments,
-								},
-								body: cleanedContent,
-								rendered: {
-									html: markdownToHtml(cleanedContent),
-								},
-							});
-
-							logger.info(`Processed post: ${title.substring(0, 50)}...`);
-						} catch (error) {
-							logger.warn(`Failed to process RSS item ${item.guid}: ${error}`);
-						}
+						logger.warn(`Failed to process status ${status.id}: ${error}`);
 					}
 				}
 
-				logger.info(`Successfully loaded ${validEntries.length} Pleroma posts`);
+				logger.info(`Successfully loaded ${validStatuses.length} Pleroma posts`);
 			} catch (error) {
 				logger.warn(`Pleroma loader failed: ${error}`);
 				logger.info("Continuing build without Pleroma posts...");
author	Dawid Rycerz <dawid@rycerz.xyz>	2026-01-12 18:03:49 +0100
committer	Dawid Rycerz <dawid@rycerz.xyz>	2026-01-12 18:04:32 +0100
commit	9f50f425bda5e56f4b73d8ce975eb6dafdbb46af (patch)
tree	701682a15b4e3325a67b3d544452f9e88348f63d /src
parent	eb7920efa3dd6889fe30249de1f0f0f121f1715f (diff)