Add support for pleroma threads

author: Dawid Rycerz <dawid@rycerz.xyz> 2026-01-12 19:45:47 +0100
committer: Dawid Rycerz <dawid@rycerz.xyz> 2026-01-12 19:46:52 +0100
commit: 5e7ea1523908774c7e2dbfd47f0c6e0a2f503971 (patch)
tree: dbd103336fd92e0d276d13e354218e62adba3f3b /src
parent: 51aa63873681216026d518cde4abeca307818a4b (diff)
1 files changed, 209 insertions, 15 deletions
diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts
index 7e1ccb8..be61329 100644
--- a/src/loaders/pleroma.ts
+++ b/src/loaders/pleroma.ts
@@ -8,6 +8,7 @@ interface PleromaFeedConfig {
 	maxPosts?: number;
 	accountId?: string; // Optional: if provided, skips account lookup
 	allowedTags?: string[]; // Optional: if provided, only posts with these tags are included
+	mergeThreads?: boolean; // Optional: if true, merges thread posts into single entry (default: true)
 }
 
 interface PleromaAccount {
@@ -36,6 +37,47 @@ interface PleromaStatus {
 	sensitive: boolean;
 	media_attachments: PleromaMediaAttachment[];
 	visibility: string;
+	account: PleromaAccount;
+}
+
+/**
+ * Detect if a post is a thread starter by checking for thread markers
+ * Matches patterns like: 🧵, 👇, ⬇️, 1/n, (1/n), [1/n], Thread:, etc.
+ */
+function isThreadStarter(content: string): boolean {
+	// Check for thread emojis
+	const threadEmojis = ["🧵", "👇", "⬇️", "📝", "📖", "⤵️", "🔽"];
+	if (threadEmojis.some((emoji) => content.includes(emoji))) {
+		return true;
+	}
+
+	// Check for numbered thread patterns:
+	// - 1/n, 1/*, 1/2, 1/10 (plain)
+	// - (1/n), (1/*), (1/2) (parentheses)
+	// - [1/n], [1/*], [1/2] (brackets)
+	const numberedPatterns = [
+		/\b1\/([n*]|\d+)\b/i, // 1/n, 1/*, 1/2
+		/\(1\/([n*]|\d+)\)/i, // (1/n), (1/*)
+		/\[1\/([n*]|\d+)\]/i, // [1/n], [1/*]
+	];
+
+	if (numberedPatterns.some((pattern) => pattern.test(content))) {
+		return true;
+	}
+
+	// Check for text markers (case insensitive)
+	const textMarkers = [
+		/\bthread:/i, // Thread:
+		/\[thread\]/i, // [Thread]
+		/^thread about/i, // Thread about... (start of text)
+		/^a thread about/i, // A thread about...
+	];
+
+	if (textMarkers.some((pattern) => pattern.test(content))) {
+		return true;
+	}
+
+	return false;
 }
 
 /**
@@ -69,6 +111,137 @@ function parseNextPageMaxId(linkHeader: string | null): string | null {
 	return null;
 }
 
+/**
+ * Fetch the context (ancestors and descendants) for a given status
+ * Returns only the descendants array for thread building
+ */
+async function fetchStatusContext(
+	instanceUrl: string,
+	statusId: string,
+	logger: any,
+): Promise<PleromaStatus[]> {
+	try {
+		const contextUrl = `${instanceUrl}/api/v1/statuses/${statusId}/context`;
+		logger.info(`Fetching context for status: ${statusId}`);
+
+		const controller = new AbortController();
+		const timeoutId = setTimeout(() => controller.abort(), 10000);
+
+		const response = await fetch(contextUrl, {
+			headers: {
+				"User-Agent": "Astro Blog (pleroma-loader)",
+			},
+			signal: controller.signal,
+		});
+
+		clearTimeout(timeoutId);
+
+		if (!response.ok) {
+			logger.warn(`Failed to fetch context: HTTP ${response.status}`);
+			return [];
+		}
+
+		const context: { ancestors: PleromaStatus[]; descendants: PleromaStatus[] } =
+			await response.json();
+		logger.info(`Fetched ${context.descendants.length} descendants for status ${statusId}`);
+		return context.descendants;
+	} catch (error) {
+		logger.warn(`Failed to fetch status context: ${error}`);
+		return [];
+	}
+}
+
+/**
+ * Build a direct author-to-author reply chain from the thread starter
+ * Stops when encountering a reply from another user or a missing link
+ */
+function buildAuthorChain(starter: PleromaStatus, descendants: PleromaStatus[]): PleromaStatus[] {
+	const chain: PleromaStatus[] = [starter];
+	const authorAccountId = starter.account.id;
+	let currentId = starter.id;
+
+	// Keep following the chain as long as we find direct author replies
+	while (true) {
+		// Find the next post in the chain: it must be by the same author and reply to the current post
+		const nextPost = descendants.find(
+			(status) => status.in_reply_to_id === currentId && status.account.id === authorAccountId,
+		);
+
+		if (!nextPost) {
+			// No more direct author replies found, chain ends here
+			break;
+		}
+
+		chain.push(nextPost);
+		currentId = nextPost.id;
+	}
+
+	return chain;
+}
+
+/**
+ * Strip thread markers from content (1/n, 2/n, 3/4, etc.)
+ */
+function stripThreadMarkers(content: string): string {
+	return content
+		.replace(/\s*\d+\/[n*\d]+\s*/gi, " ")
+		.replace(/🧵/g, "")
+		.trim();
+}
+
+/**
+ * Merge thread posts into a single content structure with image grids per segment
+ */
+function mergeThreadContent(chain: PleromaStatus[]): {
+	content: string;
+	attachments: Array<{ url: string; type: string }>;
+} {
+	const segments: string[] = [];
+	const allAttachments: Array<{ url: string; type: string }> = [];
+
+	for (const post of chain) {
+		// Clean and strip thread markers from content
+		const cleanedContent = cleanContent(post.content || "");
+		const contentWithoutMarkers = stripThreadMarkers(cleanedContent);
+
+		// Build segment with text
+		let segment = contentWithoutMarkers;
+
+		// Add image attachments as HTML grid after the text
+		const imageAttachments = post.media_attachments.filter(
+			(attachment) => attachment.type === "image",
+		);
+
+		if (imageAttachments.length > 0) {
+			// Build HTML grid for images
+			const imageGrid = `
+<div class="mt-4 mb-4 grid grid-cols-1 gap-4 sm:grid-cols-2">
+${imageAttachments
+	.map((attachment) => {
+		const description = attachment.description || "Image";
+		allAttachments.push({
+			url: attachment.url,
+			type: `image/${attachment.url.split(".").pop() || "jpeg"}`,
+		});
+		return `<a href="${attachment.url}" target="_blank" rel="noopener noreferrer" class="block overflow-hidden rounded-lg border border-gray-200 transition-colors hover:border-gray-300 dark:border-gray-700 dark:hover:border-gray-600">
+<img src="${attachment.url}" alt="${description}" class="h-48 w-full object-cover" loading="lazy" />
+</a>`;
+	})
+	.join("\n")}
+</div>`;
+
+			segment = `${segment}\n\n${imageGrid}`;
+		}
+
+		segments.push(segment);
+	}
+
+	// Join segments with horizontal rule separator
+	const content = segments.join("\n\n---\n\n");
+
+	return { content, attachments: [] }; // Return empty attachments to avoid duplicate grid at end
+}
+
 async function getAccountId(
 	instanceUrl: string,
 	username: string,
@@ -351,22 +524,43 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader {
 				for (const status of validStatuses) {
 					try {
 						const content = status.content || "";
-						const cleanedContent = cleanContent(content);
-						const title = extractTitle(cleanedContent);
-
-						// Extract post ID from status
-						const postId = status.id;
+						let cleanedContent: string;
+						let attachments: Array<{ url: string; type: string }>;
+						let postId: string;
+						let sourceUrl: string;
+
+						// Check if this is a thread starter and thread merging is enabled
+						if (config.mergeThreads !== false && isThreadStarter(content)) {
+							logger.info(`Detected thread starter: ${status.id}`);
+
+							// Fetch context and build the author chain
+							const descendants = await fetchStatusContext(instanceUrl, status.id, logger);
+							const chain = buildAuthorChain(status, descendants);
+
+							logger.info(`Built chain with ${chain.length} post(s) for thread ${status.id}`);
+
+							// Merge thread content
+							const merged = mergeThreadContent(chain);
+							cleanedContent = merged.content;
+							attachments = merged.attachments;
+							postId = status.id;
+							sourceUrl = status.url;
+						} else {
+							// Process as single post
+							cleanedContent = cleanContent(content);
+							postId = status.id;
+							sourceUrl = status.url;
+
+							// Extract image attachments only
+							attachments = status.media_attachments
+								.filter((attachment) => attachment.type === "image")
+								.map((attachment) => ({
+									url: attachment.url,
+									type: `image/${attachment.url.split(".").pop() || "jpeg"}`,
+								}));
+						}
 
-						// Use status URL as source
-						const sourceUrl = status.url;
-
-						// Extract image attachments only
-						const attachments = status.media_attachments
-							.filter((attachment) => attachment.type === "image")
-							.map((attachment) => ({
-								url: attachment.url,
-								type: `image/${attachment.url.split(".").pop() || "jpeg"}`,
-							}));
+						const title = extractTitle(cleanedContent);
 
 						// Create note entry
 						store.set({
author	Dawid Rycerz <dawid@rycerz.xyz>	2026-01-12 19:45:47 +0100
committer	Dawid Rycerz <dawid@rycerz.xyz>	2026-01-12 19:46:52 +0100
commit	5e7ea1523908774c7e2dbfd47f0c6e0a2f503971 (patch)
tree	dbd103336fd92e0d276d13e354218e62adba3f3b /src
parent	51aa63873681216026d518cde4abeca307818a4b (diff)