diff options
| author | Dawid Rycerz <dawid@rycerz.xyz> | 2026-01-12 19:45:47 +0100 |
|---|---|---|
| committer | Dawid Rycerz <dawid@rycerz.xyz> | 2026-01-12 19:46:52 +0100 |
| commit | 5e7ea1523908774c7e2dbfd47f0c6e0a2f503971 (patch) | |
| tree | dbd103336fd92e0d276d13e354218e62adba3f3b /src | |
| parent | 51aa63873681216026d518cde4abeca307818a4b (diff) | |
Add support for pleroma threads
Diffstat (limited to 'src')
| -rw-r--r-- | src/loaders/pleroma.ts | 224 |
1 files changed, 209 insertions, 15 deletions
diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts index 7e1ccb8..be61329 100644 --- a/src/loaders/pleroma.ts +++ b/src/loaders/pleroma.ts @@ -8,6 +8,7 @@ interface PleromaFeedConfig { maxPosts?: number; accountId?: string; // Optional: if provided, skips account lookup allowedTags?: string[]; // Optional: if provided, only posts with these tags are included + mergeThreads?: boolean; // Optional: if true, merges thread posts into single entry (default: true) } interface PleromaAccount { @@ -36,6 +37,47 @@ interface PleromaStatus { sensitive: boolean; media_attachments: PleromaMediaAttachment[]; visibility: string; + account: PleromaAccount; +} + +/** + * Detect if a post is a thread starter by checking for thread markers + * Matches patterns like: ๐งต, ๐, โฌ๏ธ, 1/n, (1/n), [1/n], Thread:, etc. + */ +function isThreadStarter(content: string): boolean { + // Check for thread emojis + const threadEmojis = ["๐งต", "๐", "โฌ๏ธ", "๐", "๐", "โคต๏ธ", "๐ฝ"]; + if (threadEmojis.some((emoji) => content.includes(emoji))) { + return true; + } + + // Check for numbered thread patterns: + // - 1/n, 1/*, 1/2, 1/10 (plain) + // - (1/n), (1/*), (1/2) (parentheses) + // - [1/n], [1/*], [1/2] (brackets) + const numberedPatterns = [ + /\b1\/([n*]|\d+)\b/i, // 1/n, 1/*, 1/2 + /\(1\/([n*]|\d+)\)/i, // (1/n), (1/*) + /\[1\/([n*]|\d+)\]/i, // [1/n], [1/*] + ]; + + if (numberedPatterns.some((pattern) => pattern.test(content))) { + return true; + } + + // Check for text markers (case insensitive) + const textMarkers = [ + /\bthread:/i, // Thread: + /\[thread\]/i, // [Thread] + /^thread about/i, // Thread about... (start of text) + /^a thread about/i, // A thread about... + ]; + + if (textMarkers.some((pattern) => pattern.test(content))) { + return true; + } + + return false; } /** @@ -69,6 +111,137 @@ function parseNextPageMaxId(linkHeader: string | null): string | null { return null; } +/** + * Fetch the context (ancestors and descendants) for a given status + * Returns only the descendants array for thread building + */ +async function fetchStatusContext( + instanceUrl: string, + statusId: string, + logger: any, +): Promise<PleromaStatus[]> { + try { + const contextUrl = `${instanceUrl}/api/v1/statuses/${statusId}/context`; + logger.info(`Fetching context for status: ${statusId}`); + + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 10000); + + const response = await fetch(contextUrl, { + headers: { + "User-Agent": "Astro Blog (pleroma-loader)", + }, + signal: controller.signal, + }); + + clearTimeout(timeoutId); + + if (!response.ok) { + logger.warn(`Failed to fetch context: HTTP ${response.status}`); + return []; + } + + const context: { ancestors: PleromaStatus[]; descendants: PleromaStatus[] } = + await response.json(); + logger.info(`Fetched ${context.descendants.length} descendants for status ${statusId}`); + return context.descendants; + } catch (error) { + logger.warn(`Failed to fetch status context: ${error}`); + return []; + } +} + +/** + * Build a direct author-to-author reply chain from the thread starter + * Stops when encountering a reply from another user or a missing link + */ +function buildAuthorChain(starter: PleromaStatus, descendants: PleromaStatus[]): PleromaStatus[] { + const chain: PleromaStatus[] = [starter]; + const authorAccountId = starter.account.id; + let currentId = starter.id; + + // Keep following the chain as long as we find direct author replies + while (true) { + // Find the next post in the chain: it must be by the same author and reply to the current post + const nextPost = descendants.find( + (status) => status.in_reply_to_id === currentId && status.account.id === authorAccountId, + ); + + if (!nextPost) { + // No more direct author replies found, chain ends here + break; + } + + chain.push(nextPost); + currentId = nextPost.id; + } + + return chain; +} + +/** + * Strip thread markers from content (1/n, 2/n, 3/4, etc.) + */ +function stripThreadMarkers(content: string): string { + return content + .replace(/\s*\d+\/[n*\d]+\s*/gi, " ") + .replace(/๐งต/g, "") + .trim(); +} + +/** + * Merge thread posts into a single content structure with image grids per segment + */ +function mergeThreadContent(chain: PleromaStatus[]): { + content: string; + attachments: Array<{ url: string; type: string }>; +} { + const segments: string[] = []; + const allAttachments: Array<{ url: string; type: string }> = []; + + for (const post of chain) { + // Clean and strip thread markers from content + const cleanedContent = cleanContent(post.content || ""); + const contentWithoutMarkers = stripThreadMarkers(cleanedContent); + + // Build segment with text + let segment = contentWithoutMarkers; + + // Add image attachments as HTML grid after the text + const imageAttachments = post.media_attachments.filter( + (attachment) => attachment.type === "image", + ); + + if (imageAttachments.length > 0) { + // Build HTML grid for images + const imageGrid = ` +<div class="mt-4 mb-4 grid grid-cols-1 gap-4 sm:grid-cols-2"> +${imageAttachments + .map((attachment) => { + const description = attachment.description || "Image"; + allAttachments.push({ + url: attachment.url, + type: `image/${attachment.url.split(".").pop() || "jpeg"}`, + }); + return `<a href="${attachment.url}" target="_blank" rel="noopener noreferrer" class="block overflow-hidden rounded-lg border border-gray-200 transition-colors hover:border-gray-300 dark:border-gray-700 dark:hover:border-gray-600"> +<img src="${attachment.url}" alt="${description}" class="h-48 w-full object-cover" loading="lazy" /> +</a>`; + }) + .join("\n")} +</div>`; + + segment = `${segment}\n\n${imageGrid}`; + } + + segments.push(segment); + } + + // Join segments with horizontal rule separator + const content = segments.join("\n\n---\n\n"); + + return { content, attachments: [] }; // Return empty attachments to avoid duplicate grid at end +} + async function getAccountId( instanceUrl: string, username: string, @@ -351,22 +524,43 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader { for (const status of validStatuses) { try { const content = status.content || ""; - const cleanedContent = cleanContent(content); - const title = extractTitle(cleanedContent); - - // Extract post ID from status - const postId = status.id; + let cleanedContent: string; + let attachments: Array<{ url: string; type: string }>; + let postId: string; + let sourceUrl: string; + + // Check if this is a thread starter and thread merging is enabled + if (config.mergeThreads !== false && isThreadStarter(content)) { + logger.info(`Detected thread starter: ${status.id}`); + + // Fetch context and build the author chain + const descendants = await fetchStatusContext(instanceUrl, status.id, logger); + const chain = buildAuthorChain(status, descendants); + + logger.info(`Built chain with ${chain.length} post(s) for thread ${status.id}`); + + // Merge thread content + const merged = mergeThreadContent(chain); + cleanedContent = merged.content; + attachments = merged.attachments; + postId = status.id; + sourceUrl = status.url; + } else { + // Process as single post + cleanedContent = cleanContent(content); + postId = status.id; + sourceUrl = status.url; + + // Extract image attachments only + attachments = status.media_attachments + .filter((attachment) => attachment.type === "image") + .map((attachment) => ({ + url: attachment.url, + type: `image/${attachment.url.split(".").pop() || "jpeg"}`, + })); + } - // Use status URL as source - const sourceUrl = status.url; - - // Extract image attachments only - const attachments = status.media_attachments - .filter((attachment) => attachment.type === "image") - .map((attachment) => ({ - url: attachment.url, - type: `image/${attachment.url.split(".").pop() || "jpeg"}`, - })); + const title = extractTitle(cleanedContent); // Create note entry store.set({ |
