summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDawid Rycerz <dawid@rycerz.xyz>2026-01-12 18:03:49 +0100
committerDawid Rycerz <dawid@rycerz.xyz>2026-01-12 18:04:32 +0100
commit9f50f425bda5e56f4b73d8ce975eb6dafdbb46af (patch)
tree701682a15b4e3325a67b3d544452f9e88348f63d /src
parenteb7920efa3dd6889fe30249de1f0f0f121f1715f (diff)
Replace xml based posts with native pleroma api
Diffstat (limited to 'src')
-rw-r--r--src/content.config.ts1
-rw-r--r--src/loaders/pleroma.ts458
2 files changed, 166 insertions, 293 deletions
diff --git a/src/content.config.ts b/src/content.config.ts
index c85d88e..5dff41a 100644
--- a/src/content.config.ts
+++ b/src/content.config.ts
@@ -42,7 +42,6 @@ const micro = defineCollection({
instanceUrl: "https://social.craftknight.com",
username: "dawid",
maxPosts: 50,
- feedType: "atom",
}),
schema: baseSchema.extend({
description: z.string().optional(),
diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts
index 952e491..a833248 100644
--- a/src/loaders/pleroma.ts
+++ b/src/loaders/pleroma.ts
@@ -1,5 +1,4 @@
import type { Loader } from "astro/loaders";
-import { XMLParser } from "fast-xml-parser";
import { marked } from "marked";
import TurndownService from "turndown";
@@ -7,166 +6,145 @@ interface PleromaFeedConfig {
instanceUrl: string;
username: string;
maxPosts?: number;
- feedType?: "rss" | "atom";
+ accountId?: string; // Optional: if provided, skips account lookup
}
-interface RssItem {
- guid: string;
- title: string;
- description: string;
- pubDate: string;
- link: string;
- category?: string | string[];
- "activity:object-type"?: string;
- "activity:verb"?: string;
- "thr:in-reply-to"?: {
- "@_ref": string;
- };
-}
-
-interface RssFeed {
- rss: {
- channel: {
- title: string;
- description: string;
- link: string;
- item?: RssItem | RssItem[];
- };
- };
+interface PleromaAccount {
+ id: string;
+ username: string;
+ acct: string;
+ display_name: string;
+ url: string;
}
-interface AtomEntry {
+interface PleromaMediaAttachment {
id: string;
- title: string;
- content: {
- "#text": string;
- "@_type": string;
- };
- published: string;
- updated: string;
- link: {
- "@_href": string;
- "@_rel": string;
- "@_type": string;
- }[];
- author: {
- name: string;
- uri: string;
- };
- category?: {
- "@_term": string;
- }[];
- "activity:object-type"?: string;
- "activity:verb"?: string;
- "thr:in-reply-to"?: {
- "@_ref": string;
- };
+ type: "image" | "video" | "gifv" | "audio" | "unknown";
+ url: string;
+ preview_url: string;
+ description?: string;
}
-interface AtomFeed {
- feed: {
- title: string;
- id: string;
- updated: string;
- entry?: AtomEntry | AtomEntry[];
- };
+interface PleromaStatus {
+ id: string;
+ created_at: string;
+ content: string;
+ url: string;
+ reblog: PleromaStatus | null;
+ in_reply_to_id: string | null;
+ sensitive: boolean;
+ media_attachments: PleromaMediaAttachment[];
+ visibility: string;
}
-function parseAtomFeed(xmlContent: string): AtomEntry[] {
- const parser = new XMLParser({
- ignoreAttributes: false,
- attributeNamePrefix: "@_",
- parseAttributeValue: true,
- });
-
- const result: AtomFeed = parser.parse(xmlContent);
+async function getAccountId(
+ instanceUrl: string,
+ username: string,
+ logger: any,
+): Promise<string | null> {
+ try {
+ const searchUrl = `${instanceUrl}/api/v1/accounts/search?q=${encodeURIComponent(username)}&limit=1`;
+ logger.info(`Looking up account ID for username: ${username}`);
- if (!result.feed?.entry) {
- return [];
- }
+ const controller = new AbortController();
+ const timeoutId = setTimeout(() => controller.abort(), 10000);
- // Handle both single entry and array of entries
- const entries = Array.isArray(result.feed.entry) ? result.feed.entry : [result.feed.entry];
+ const response = await fetch(searchUrl, {
+ headers: {
+ "User-Agent": "Astro Blog (pleroma-loader)",
+ },
+ signal: controller.signal,
+ });
- return entries;
-}
+ clearTimeout(timeoutId);
-function parseRssFeed(xmlContent: string): RssItem[] {
- const parser = new XMLParser({
- ignoreAttributes: false,
- attributeNamePrefix: "@_",
- parseAttributeValue: true,
- });
+ if (!response.ok) {
+ logger.warn(`Failed to search for account: HTTP ${response.status}`);
+ return null;
+ }
- try {
- const result: RssFeed = parser.parse(xmlContent);
+ const accounts: PleromaAccount[] = await response.json();
- if (!result.rss?.channel?.item) {
- console.log("RSS structure:", JSON.stringify(result, null, 2));
- return [];
+ if (accounts.length === 0 || !accounts[0]) {
+ logger.warn(`No account found for username: ${username}`);
+ return null;
}
- // Handle both single item and array of items
- const items = Array.isArray(result.rss.channel.item)
- ? result.rss.channel.item
- : [result.rss.channel.item];
-
- return items;
+ const account = accounts[0];
+ logger.info(`Found account ID: ${account.id} for @${account.acct}`);
+ return account.id;
} catch (error) {
- console.error("Failed to parse RSS feed:", error);
- console.log("XML content length:", xmlContent.length);
- console.log("XML preview:", xmlContent.substring(0, 1000));
- return [];
+ logger.warn(`Failed to lookup account ID: ${error}`);
+ return null;
}
}
-function isFilteredPostAtom(entry: AtomEntry): boolean {
- // Filter out boosts/reblogs
- if (entry["activity:verb"] === "http://activitystrea.ms/schema/1.0/share") {
- return true;
- }
-
- // Filter out replies
- if (entry["thr:in-reply-to"]) {
- return true;
+async function fetchAccountStatuses(
+ instanceUrl: string,
+ accountId: string,
+ maxPosts: number,
+ logger: any,
+): Promise<PleromaStatus[]> {
+ let response: Response | undefined;
+ let lastError: unknown;
+
+ // Add retry logic for network issues
+ for (let attempt = 1; attempt <= 3; attempt++) {
+ try {
+ logger.info(`Attempt ${attempt} to fetch statuses...`);
+
+ const statusesUrl = `${instanceUrl}/api/v1/accounts/${accountId}/statuses?limit=${maxPosts}&exclude_replies=true&exclude_reblogs=true`;
+
+ // Create timeout controller
+ const controller = new AbortController();
+ const timeoutId = setTimeout(() => controller.abort(), 10000);
+
+ response = await fetch(statusesUrl, {
+ headers: {
+ "User-Agent": "Astro Blog (pleroma-loader)",
+ },
+ signal: controller.signal,
+ });
+
+ clearTimeout(timeoutId);
+
+ if (response.ok) {
+ break; // Success, exit retry loop
+ }
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+ } catch (error) {
+ lastError = error;
+ logger.warn(`Attempt ${attempt} failed: ${error}`);
+
+ if (attempt < 3) {
+ logger.info("Retrying in 2 seconds...");
+ await new Promise((resolve) => setTimeout(resolve, 2000));
+ }
+ }
}
- // Filter out NSFW/sensitive content
- if (entry.category) {
- const categories = Array.isArray(entry.category) ? entry.category : [entry.category];
- const hasNsfwTag = categories.some(
- (cat) =>
- cat["@_term"]?.toLowerCase().includes("nsfw") ||
- cat["@_term"]?.toLowerCase().includes("sensitive"),
- );
- if (hasNsfwTag) {
- return true;
- }
+ if (!response || !response.ok) {
+ throw new Error(`Failed to fetch statuses after 3 attempts. Last error: ${lastError}`);
}
- return false;
+ const statuses: PleromaStatus[] = await response.json();
+ return statuses;
}
-function isFilteredPostRss(item: RssItem): boolean {
- // Filter out boosts/reblogs
- if (item["activity:verb"] === "http://activitystrea.ms/schema/1.0/share") {
+function isFilteredStatus(status: PleromaStatus): boolean {
+ // Filter out boosts/reblogs (already handled by API parameter, but double-check)
+ if (status.reblog) {
return true;
}
- // Filter out replies
- if (item["thr:in-reply-to"]) {
+ // Filter out replies (already handled by API parameter, but double-check)
+ if (status.in_reply_to_id) {
return true;
}
// Filter out NSFW/sensitive content
- if (item.category) {
- const categories = Array.isArray(item.category) ? item.category : [item.category];
- const hasNsfwTag = categories.some(
- (cat) => cat?.toLowerCase().includes("nsfw") || cat?.toLowerCase().includes("sensitive"),
- );
- if (hasNsfwTag) {
- return true;
- }
+ if (status.sensitive) {
+ return true;
}
return false;
@@ -228,182 +206,78 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader {
name: "pleroma-loader",
load: async ({ store, logger }) => {
try {
- const { instanceUrl, username, maxPosts = 20 } = config;
- // Use RSS URL that redirects to Atom - this bypasses some access restrictions
- const feedUrl = `${instanceUrl}/users/${username}.rss`;
-
- logger.info(`Fetching Pleroma feed from: ${feedUrl}`);
+ const { instanceUrl, username, maxPosts = 20, accountId: configAccountId } = config;
+
+ logger.info(`Fetching Pleroma posts via API for user: ${username}`);
+
+ // Get account ID (use provided one or lookup by username)
+ let accountId: string | undefined = configAccountId;
+ if (!accountId) {
+ const lookedUpAccountId = await getAccountId(instanceUrl, username, logger);
+ if (!lookedUpAccountId) {
+ logger.warn("Failed to get account ID. Continuing without Pleroma posts...");
+ store.clear();
+ return;
+ }
+ accountId = lookedUpAccountId;
+ }
- // Add retry logic for network issues
- let response: Response | undefined;
- let lastError: unknown;
+ // Fetch statuses from API
+ const statuses = await fetchAccountStatuses(instanceUrl, accountId, maxPosts, logger);
+ logger.info(`Fetched ${statuses.length} statuses from API`);
- for (let attempt = 1; attempt <= 3; attempt++) {
- try {
- logger.info(`Attempt ${attempt} to fetch feed...`);
+ // Filter statuses
+ const validStatuses = statuses.filter((status) => !isFilteredStatus(status));
+ logger.info(`After filtering: ${validStatuses.length} valid posts`);
- // Create timeout controller
- const controller = new AbortController();
- const timeoutId = setTimeout(() => controller.abort(), 10000);
+ // Clear existing entries
+ store.clear();
- response = await fetch(feedUrl, {
- headers: {
- "User-Agent": "Astro Blog (pleroma-loader)",
+ // Process each status
+ for (const status of validStatuses) {
+ try {
+ const content = status.content || "";
+ const cleanedContent = cleanContent(content);
+ const title = extractTitle(cleanedContent);
+
+ // Extract post ID from status
+ const postId = status.id;
+
+ // Use status URL as source
+ const sourceUrl = status.url;
+
+ // Extract image attachments only
+ const attachments = status.media_attachments
+ .filter((attachment) => attachment.type === "image")
+ .map((attachment) => ({
+ url: attachment.url,
+ type: `image/${attachment.url.split(".").pop() || "jpeg"}`,
+ }));
+
+ // Create note entry
+ store.set({
+ id: `pleroma-${postId}`,
+ data: {
+ title,
+ description:
+ cleanedContent.substring(0, 160) + (cleanedContent.length > 160 ? "..." : ""),
+ publishDate: new Date(status.created_at),
+ sourceUrl,
+ attachments,
+ },
+ body: cleanedContent,
+ rendered: {
+ html: markdownToHtml(cleanedContent),
},
- redirect: "follow", // Follow redirects
- signal: controller.signal,
});
- clearTimeout(timeoutId);
-
- if (response.ok) {
- break; // Success, exit retry loop
- }
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+ logger.info(`Processed post: ${title.substring(0, 50)}...`);
} catch (error) {
- lastError = error;
- logger.warn(`Attempt ${attempt} failed: ${error}`);
-
- if (attempt < 3) {
- logger.info("Retrying in 2 seconds...");
- await new Promise((resolve) => setTimeout(resolve, 2000));
- }
- }
- }
-
- if (!response || !response.ok) {
- logger.warn(`Failed to fetch Pleroma feed after 3 attempts. Last error: ${lastError}`);
- logger.info("Continuing without Pleroma posts...");
- store.clear();
- return;
- }
-
- const xmlContent = await response.text();
- logger.info(`Received XML content length: ${xmlContent.length}`);
-
- // Auto-detect if it's Atom or RSS based on content
- const isAtomFeed =
- xmlContent.includes("<feed") ||
- xmlContent.includes('xmlns="http://www.w3.org/2005/Atom"');
- logger.info(`Detected feed type: ${isAtomFeed ? "Atom" : "RSS"}`);
-
- let validEntries: AtomEntry[] = [];
-
- if (isAtomFeed) {
- // Process as Atom feed
- const entries = parseAtomFeed(xmlContent);
- logger.info(`Parsed ${entries.length} entries from Atom feed`);
-
- validEntries = entries.filter((entry) => !isFilteredPostAtom(entry)).slice(0, maxPosts);
-
- logger.info(`After filtering: ${validEntries.length} valid posts`);
-
- // Clear existing entries
- store.clear();
-
- // Process each Atom entry
- for (const entry of validEntries) {
- try {
- const content = entry.content?.["#text"] || "";
- const cleanedContent = cleanContent(content);
- const title = extractTitle(cleanedContent);
-
- // Extract post ID from the entry ID
- const postId = entry.id.split("/").pop() || entry.id;
-
- // Extract source URL from the entry
- const sourceUrl =
- entry.link?.find((link) => link["@_rel"] === "alternate")?.["@_href"] || entry.id;
-
- // Extract image attachments
- const attachments =
- entry.link
- ?.filter(
- (link) => link["@_rel"] === "enclosure" && link["@_type"]?.startsWith("image/"),
- )
- .map((link) => ({
- url: link["@_href"],
- type: link["@_type"],
- })) || [];
-
- // Create note entry
- store.set({
- id: `pleroma-${postId}`,
- data: {
- title,
- description:
- cleanedContent.substring(0, 160) + (cleanedContent.length > 160 ? "..." : ""),
- publishDate: new Date(entry.published),
- sourceUrl,
- attachments,
- },
- body: cleanedContent,
- rendered: {
- html: markdownToHtml(cleanedContent),
- },
- });
-
- logger.info(`Processed post: ${title.substring(0, 50)}...`);
- } catch (error) {
- logger.warn(`Failed to process entry ${entry.id}: ${error}`);
- }
- }
- } else {
- // Process as RSS feed
- const items = parseRssFeed(xmlContent);
- logger.info(`Parsed ${items.length} items from RSS feed`);
-
- const validRssItems = items.filter((item) => !isFilteredPostRss(item)).slice(0, maxPosts);
-
- logger.info(`After filtering: ${validRssItems.length} valid posts`);
-
- // Clear existing entries
- store.clear();
-
- // Process each RSS item
- for (const item of validRssItems) {
- try {
- const content = item.description || "";
- const cleanedContent = cleanContent(content);
- const title = extractTitle(cleanedContent);
-
- // Extract post ID from the GUID or link
- const postId =
- item.guid?.split("/").pop() ||
- (typeof item.link === "string" ? item.link.split("/").pop() : null) ||
- Math.random().toString(36);
-
- // Use the link as source URL
- const sourceUrl = typeof item.link === "string" ? item.link : item.guid || "";
-
- // For RSS, attachments would be empty since we're actually getting Atom feeds
- const attachments: { url: string; type: string }[] = [];
-
- // Create note entry
- store.set({
- id: `pleroma-${postId}`,
- data: {
- title,
- description:
- cleanedContent.substring(0, 160) + (cleanedContent.length > 160 ? "..." : ""),
- publishDate: new Date(item.pubDate),
- sourceUrl,
- attachments,
- },
- body: cleanedContent,
- rendered: {
- html: markdownToHtml(cleanedContent),
- },
- });
-
- logger.info(`Processed post: ${title.substring(0, 50)}...`);
- } catch (error) {
- logger.warn(`Failed to process RSS item ${item.guid}: ${error}`);
- }
+ logger.warn(`Failed to process status ${status.id}: ${error}`);
}
}
- logger.info(`Successfully loaded ${validEntries.length} Pleroma posts`);
+ logger.info(`Successfully loaded ${validStatuses.length} Pleroma posts`);
} catch (error) {
logger.warn(`Pleroma loader failed: ${error}`);
logger.info("Continuing build without Pleroma posts...");