diff options
Diffstat (limited to 'src/loaders/pleroma.ts')
| -rw-r--r-- | src/loaders/pleroma.ts | 458 |
1 files changed, 166 insertions, 292 deletions
diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts index 952e491..a833248 100644 --- a/src/loaders/pleroma.ts +++ b/src/loaders/pleroma.ts @@ -1,5 +1,4 @@ import type { Loader } from "astro/loaders"; -import { XMLParser } from "fast-xml-parser"; import { marked } from "marked"; import TurndownService from "turndown"; @@ -7,166 +6,145 @@ interface PleromaFeedConfig { instanceUrl: string; username: string; maxPosts?: number; - feedType?: "rss" | "atom"; + accountId?: string; // Optional: if provided, skips account lookup } -interface RssItem { - guid: string; - title: string; - description: string; - pubDate: string; - link: string; - category?: string | string[]; - "activity:object-type"?: string; - "activity:verb"?: string; - "thr:in-reply-to"?: { - "@_ref": string; - }; -} - -interface RssFeed { - rss: { - channel: { - title: string; - description: string; - link: string; - item?: RssItem | RssItem[]; - }; - }; +interface PleromaAccount { + id: string; + username: string; + acct: string; + display_name: string; + url: string; } -interface AtomEntry { +interface PleromaMediaAttachment { id: string; - title: string; - content: { - "#text": string; - "@_type": string; - }; - published: string; - updated: string; - link: { - "@_href": string; - "@_rel": string; - "@_type": string; - }[]; - author: { - name: string; - uri: string; - }; - category?: { - "@_term": string; - }[]; - "activity:object-type"?: string; - "activity:verb"?: string; - "thr:in-reply-to"?: { - "@_ref": string; - }; + type: "image" | "video" | "gifv" | "audio" | "unknown"; + url: string; + preview_url: string; + description?: string; } -interface AtomFeed { - feed: { - title: string; - id: string; - updated: string; - entry?: AtomEntry | AtomEntry[]; - }; +interface PleromaStatus { + id: string; + created_at: string; + content: string; + url: string; + reblog: PleromaStatus | null; + in_reply_to_id: string | null; + sensitive: boolean; + media_attachments: PleromaMediaAttachment[]; + visibility: string; } -function parseAtomFeed(xmlContent: string): AtomEntry[] { - const parser = new XMLParser({ - ignoreAttributes: false, - attributeNamePrefix: "@_", - parseAttributeValue: true, - }); - - const result: AtomFeed = parser.parse(xmlContent); +async function getAccountId( + instanceUrl: string, + username: string, + logger: any, +): Promise<string | null> { + try { + const searchUrl = `${instanceUrl}/api/v1/accounts/search?q=${encodeURIComponent(username)}&limit=1`; + logger.info(`Looking up account ID for username: ${username}`); - if (!result.feed?.entry) { - return []; - } + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 10000); - // Handle both single entry and array of entries - const entries = Array.isArray(result.feed.entry) ? result.feed.entry : [result.feed.entry]; + const response = await fetch(searchUrl, { + headers: { + "User-Agent": "Astro Blog (pleroma-loader)", + }, + signal: controller.signal, + }); - return entries; -} + clearTimeout(timeoutId); -function parseRssFeed(xmlContent: string): RssItem[] { - const parser = new XMLParser({ - ignoreAttributes: false, - attributeNamePrefix: "@_", - parseAttributeValue: true, - }); + if (!response.ok) { + logger.warn(`Failed to search for account: HTTP ${response.status}`); + return null; + } - try { - const result: RssFeed = parser.parse(xmlContent); + const accounts: PleromaAccount[] = await response.json(); - if (!result.rss?.channel?.item) { - console.log("RSS structure:", JSON.stringify(result, null, 2)); - return []; + if (accounts.length === 0 || !accounts[0]) { + logger.warn(`No account found for username: ${username}`); + return null; } - // Handle both single item and array of items - const items = Array.isArray(result.rss.channel.item) - ? result.rss.channel.item - : [result.rss.channel.item]; - - return items; + const account = accounts[0]; + logger.info(`Found account ID: ${account.id} for @${account.acct}`); + return account.id; } catch (error) { - console.error("Failed to parse RSS feed:", error); - console.log("XML content length:", xmlContent.length); - console.log("XML preview:", xmlContent.substring(0, 1000)); - return []; + logger.warn(`Failed to lookup account ID: ${error}`); + return null; } } -function isFilteredPostAtom(entry: AtomEntry): boolean { - // Filter out boosts/reblogs - if (entry["activity:verb"] === "http://activitystrea.ms/schema/1.0/share") { - return true; - } - - // Filter out replies - if (entry["thr:in-reply-to"]) { - return true; +async function fetchAccountStatuses( + instanceUrl: string, + accountId: string, + maxPosts: number, + logger: any, +): Promise<PleromaStatus[]> { + let response: Response | undefined; + let lastError: unknown; + + // Add retry logic for network issues + for (let attempt = 1; attempt <= 3; attempt++) { + try { + logger.info(`Attempt ${attempt} to fetch statuses...`); + + const statusesUrl = `${instanceUrl}/api/v1/accounts/${accountId}/statuses?limit=${maxPosts}&exclude_replies=true&exclude_reblogs=true`; + + // Create timeout controller + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 10000); + + response = await fetch(statusesUrl, { + headers: { + "User-Agent": "Astro Blog (pleroma-loader)", + }, + signal: controller.signal, + }); + + clearTimeout(timeoutId); + + if (response.ok) { + break; // Success, exit retry loop + } + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } catch (error) { + lastError = error; + logger.warn(`Attempt ${attempt} failed: ${error}`); + + if (attempt < 3) { + logger.info("Retrying in 2 seconds..."); + await new Promise((resolve) => setTimeout(resolve, 2000)); + } + } } - // Filter out NSFW/sensitive content - if (entry.category) { - const categories = Array.isArray(entry.category) ? entry.category : [entry.category]; - const hasNsfwTag = categories.some( - (cat) => - cat["@_term"]?.toLowerCase().includes("nsfw") || - cat["@_term"]?.toLowerCase().includes("sensitive"), - ); - if (hasNsfwTag) { - return true; - } + if (!response || !response.ok) { + throw new Error(`Failed to fetch statuses after 3 attempts. Last error: ${lastError}`); } - return false; + const statuses: PleromaStatus[] = await response.json(); + return statuses; } -function isFilteredPostRss(item: RssItem): boolean { - // Filter out boosts/reblogs - if (item["activity:verb"] === "http://activitystrea.ms/schema/1.0/share") { +function isFilteredStatus(status: PleromaStatus): boolean { + // Filter out boosts/reblogs (already handled by API parameter, but double-check) + if (status.reblog) { return true; } - // Filter out replies - if (item["thr:in-reply-to"]) { + // Filter out replies (already handled by API parameter, but double-check) + if (status.in_reply_to_id) { return true; } // Filter out NSFW/sensitive content - if (item.category) { - const categories = Array.isArray(item.category) ? item.category : [item.category]; - const hasNsfwTag = categories.some( - (cat) => cat?.toLowerCase().includes("nsfw") || cat?.toLowerCase().includes("sensitive"), - ); - if (hasNsfwTag) { - return true; - } + if (status.sensitive) { + return true; } return false; @@ -228,182 +206,78 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader { name: "pleroma-loader", load: async ({ store, logger }) => { try { - const { instanceUrl, username, maxPosts = 20 } = config; - // Use RSS URL that redirects to Atom - this bypasses some access restrictions - const feedUrl = `${instanceUrl}/users/${username}.rss`; - - logger.info(`Fetching Pleroma feed from: ${feedUrl}`); + const { instanceUrl, username, maxPosts = 20, accountId: configAccountId } = config; + + logger.info(`Fetching Pleroma posts via API for user: ${username}`); + + // Get account ID (use provided one or lookup by username) + let accountId: string | undefined = configAccountId; + if (!accountId) { + const lookedUpAccountId = await getAccountId(instanceUrl, username, logger); + if (!lookedUpAccountId) { + logger.warn("Failed to get account ID. Continuing without Pleroma posts..."); + store.clear(); + return; + } + accountId = lookedUpAccountId; + } - // Add retry logic for network issues - let response: Response | undefined; - let lastError: unknown; + // Fetch statuses from API + const statuses = await fetchAccountStatuses(instanceUrl, accountId, maxPosts, logger); + logger.info(`Fetched ${statuses.length} statuses from API`); - for (let attempt = 1; attempt <= 3; attempt++) { - try { - logger.info(`Attempt ${attempt} to fetch feed...`); + // Filter statuses + const validStatuses = statuses.filter((status) => !isFilteredStatus(status)); + logger.info(`After filtering: ${validStatuses.length} valid posts`); - // Create timeout controller - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), 10000); + // Clear existing entries + store.clear(); - response = await fetch(feedUrl, { - headers: { - "User-Agent": "Astro Blog (pleroma-loader)", + // Process each status + for (const status of validStatuses) { + try { + const content = status.content || ""; + const cleanedContent = cleanContent(content); + const title = extractTitle(cleanedContent); + + // Extract post ID from status + const postId = status.id; + + // Use status URL as source + const sourceUrl = status.url; + + // Extract image attachments only + const attachments = status.media_attachments + .filter((attachment) => attachment.type === "image") + .map((attachment) => ({ + url: attachment.url, + type: `image/${attachment.url.split(".").pop() || "jpeg"}`, + })); + + // Create note entry + store.set({ + id: `pleroma-${postId}`, + data: { + title, + description: + cleanedContent.substring(0, 160) + (cleanedContent.length > 160 ? "..." : ""), + publishDate: new Date(status.created_at), + sourceUrl, + attachments, + }, + body: cleanedContent, + rendered: { + html: markdownToHtml(cleanedContent), }, - redirect: "follow", // Follow redirects - signal: controller.signal, }); - clearTimeout(timeoutId); - - if (response.ok) { - break; // Success, exit retry loop - } - throw new Error(`HTTP ${response.status}: ${response.statusText}`); + logger.info(`Processed post: ${title.substring(0, 50)}...`); } catch (error) { - lastError = error; - logger.warn(`Attempt ${attempt} failed: ${error}`); - - if (attempt < 3) { - logger.info("Retrying in 2 seconds..."); - await new Promise((resolve) => setTimeout(resolve, 2000)); - } - } - } - - if (!response || !response.ok) { - logger.warn(`Failed to fetch Pleroma feed after 3 attempts. Last error: ${lastError}`); - logger.info("Continuing without Pleroma posts..."); - store.clear(); - return; - } - - const xmlContent = await response.text(); - logger.info(`Received XML content length: ${xmlContent.length}`); - - // Auto-detect if it's Atom or RSS based on content - const isAtomFeed = - xmlContent.includes("<feed") || - xmlContent.includes('xmlns="http://www.w3.org/2005/Atom"'); - logger.info(`Detected feed type: ${isAtomFeed ? "Atom" : "RSS"}`); - - let validEntries: AtomEntry[] = []; - - if (isAtomFeed) { - // Process as Atom feed - const entries = parseAtomFeed(xmlContent); - logger.info(`Parsed ${entries.length} entries from Atom feed`); - - validEntries = entries.filter((entry) => !isFilteredPostAtom(entry)).slice(0, maxPosts); - - logger.info(`After filtering: ${validEntries.length} valid posts`); - - // Clear existing entries - store.clear(); - - // Process each Atom entry - for (const entry of validEntries) { - try { - const content = entry.content?.["#text"] || ""; - const cleanedContent = cleanContent(content); - const title = extractTitle(cleanedContent); - - // Extract post ID from the entry ID - const postId = entry.id.split("/").pop() || entry.id; - - // Extract source URL from the entry - const sourceUrl = - entry.link?.find((link) => link["@_rel"] === "alternate")?.["@_href"] || entry.id; - - // Extract image attachments - const attachments = - entry.link - ?.filter( - (link) => link["@_rel"] === "enclosure" && link["@_type"]?.startsWith("image/"), - ) - .map((link) => ({ - url: link["@_href"], - type: link["@_type"], - })) || []; - - // Create note entry - store.set({ - id: `pleroma-${postId}`, - data: { - title, - description: - cleanedContent.substring(0, 160) + (cleanedContent.length > 160 ? "..." : ""), - publishDate: new Date(entry.published), - sourceUrl, - attachments, - }, - body: cleanedContent, - rendered: { - html: markdownToHtml(cleanedContent), - }, - }); - - logger.info(`Processed post: ${title.substring(0, 50)}...`); - } catch (error) { - logger.warn(`Failed to process entry ${entry.id}: ${error}`); - } - } - } else { - // Process as RSS feed - const items = parseRssFeed(xmlContent); - logger.info(`Parsed ${items.length} items from RSS feed`); - - const validRssItems = items.filter((item) => !isFilteredPostRss(item)).slice(0, maxPosts); - - logger.info(`After filtering: ${validRssItems.length} valid posts`); - - // Clear existing entries - store.clear(); - - // Process each RSS item - for (const item of validRssItems) { - try { - const content = item.description || ""; - const cleanedContent = cleanContent(content); - const title = extractTitle(cleanedContent); - - // Extract post ID from the GUID or link - const postId = - item.guid?.split("/").pop() || - (typeof item.link === "string" ? item.link.split("/").pop() : null) || - Math.random().toString(36); - - // Use the link as source URL - const sourceUrl = typeof item.link === "string" ? item.link : item.guid || ""; - - // For RSS, attachments would be empty since we're actually getting Atom feeds - const attachments: { url: string; type: string }[] = []; - - // Create note entry - store.set({ - id: `pleroma-${postId}`, - data: { - title, - description: - cleanedContent.substring(0, 160) + (cleanedContent.length > 160 ? "..." : ""), - publishDate: new Date(item.pubDate), - sourceUrl, - attachments, - }, - body: cleanedContent, - rendered: { - html: markdownToHtml(cleanedContent), - }, - }); - - logger.info(`Processed post: ${title.substring(0, 50)}...`); - } catch (error) { - logger.warn(`Failed to process RSS item ${item.guid}: ${error}`); - } + logger.warn(`Failed to process status ${status.id}: ${error}`); } } - logger.info(`Successfully loaded ${validEntries.length} Pleroma posts`); + logger.info(`Successfully loaded ${validStatuses.length} Pleroma posts`); } catch (error) { logger.warn(`Pleroma loader failed: ${error}`); logger.info("Continuing build without Pleroma posts..."); |
