summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDawid Rycerz <dawid@rycerz.xyz>2026-01-12 19:45:47 +0100
committerDawid Rycerz <dawid@rycerz.xyz>2026-01-12 19:46:52 +0100
commit5e7ea1523908774c7e2dbfd47f0c6e0a2f503971 (patch)
treedbd103336fd92e0d276d13e354218e62adba3f3b /src
parent51aa63873681216026d518cde4abeca307818a4b (diff)
Add support for pleroma threads
Diffstat (limited to 'src')
-rw-r--r--src/loaders/pleroma.ts224
1 files changed, 209 insertions, 15 deletions
diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts
index 7e1ccb8..be61329 100644
--- a/src/loaders/pleroma.ts
+++ b/src/loaders/pleroma.ts
@@ -8,6 +8,7 @@ interface PleromaFeedConfig {
maxPosts?: number;
accountId?: string; // Optional: if provided, skips account lookup
allowedTags?: string[]; // Optional: if provided, only posts with these tags are included
+ mergeThreads?: boolean; // Optional: if true, merges thread posts into single entry (default: true)
}
interface PleromaAccount {
@@ -36,6 +37,47 @@ interface PleromaStatus {
sensitive: boolean;
media_attachments: PleromaMediaAttachment[];
visibility: string;
+ account: PleromaAccount;
+}
+
+/**
+ * Detect if a post is a thread starter by checking for thread markers
+ * Matches patterns like: ๐Ÿงต, ๐Ÿ‘‡, โฌ‡๏ธ, 1/n, (1/n), [1/n], Thread:, etc.
+ */
+function isThreadStarter(content: string): boolean {
+ // Check for thread emojis
+ const threadEmojis = ["๐Ÿงต", "๐Ÿ‘‡", "โฌ‡๏ธ", "๐Ÿ“", "๐Ÿ“–", "โคต๏ธ", "๐Ÿ”ฝ"];
+ if (threadEmojis.some((emoji) => content.includes(emoji))) {
+ return true;
+ }
+
+ // Check for numbered thread patterns:
+ // - 1/n, 1/*, 1/2, 1/10 (plain)
+ // - (1/n), (1/*), (1/2) (parentheses)
+ // - [1/n], [1/*], [1/2] (brackets)
+ const numberedPatterns = [
+ /\b1\/([n*]|\d+)\b/i, // 1/n, 1/*, 1/2
+ /\(1\/([n*]|\d+)\)/i, // (1/n), (1/*)
+ /\[1\/([n*]|\d+)\]/i, // [1/n], [1/*]
+ ];
+
+ if (numberedPatterns.some((pattern) => pattern.test(content))) {
+ return true;
+ }
+
+ // Check for text markers (case insensitive)
+ const textMarkers = [
+ /\bthread:/i, // Thread:
+ /\[thread\]/i, // [Thread]
+ /^thread about/i, // Thread about... (start of text)
+ /^a thread about/i, // A thread about...
+ ];
+
+ if (textMarkers.some((pattern) => pattern.test(content))) {
+ return true;
+ }
+
+ return false;
}
/**
@@ -69,6 +111,137 @@ function parseNextPageMaxId(linkHeader: string | null): string | null {
return null;
}
+/**
+ * Fetch the context (ancestors and descendants) for a given status
+ * Returns only the descendants array for thread building
+ */
+async function fetchStatusContext(
+ instanceUrl: string,
+ statusId: string,
+ logger: any,
+): Promise<PleromaStatus[]> {
+ try {
+ const contextUrl = `${instanceUrl}/api/v1/statuses/${statusId}/context`;
+ logger.info(`Fetching context for status: ${statusId}`);
+
+ const controller = new AbortController();
+ const timeoutId = setTimeout(() => controller.abort(), 10000);
+
+ const response = await fetch(contextUrl, {
+ headers: {
+ "User-Agent": "Astro Blog (pleroma-loader)",
+ },
+ signal: controller.signal,
+ });
+
+ clearTimeout(timeoutId);
+
+ if (!response.ok) {
+ logger.warn(`Failed to fetch context: HTTP ${response.status}`);
+ return [];
+ }
+
+ const context: { ancestors: PleromaStatus[]; descendants: PleromaStatus[] } =
+ await response.json();
+ logger.info(`Fetched ${context.descendants.length} descendants for status ${statusId}`);
+ return context.descendants;
+ } catch (error) {
+ logger.warn(`Failed to fetch status context: ${error}`);
+ return [];
+ }
+}
+
+/**
+ * Build a direct author-to-author reply chain from the thread starter
+ * Stops when encountering a reply from another user or a missing link
+ */
+function buildAuthorChain(starter: PleromaStatus, descendants: PleromaStatus[]): PleromaStatus[] {
+ const chain: PleromaStatus[] = [starter];
+ const authorAccountId = starter.account.id;
+ let currentId = starter.id;
+
+ // Keep following the chain as long as we find direct author replies
+ while (true) {
+ // Find the next post in the chain: it must be by the same author and reply to the current post
+ const nextPost = descendants.find(
+ (status) => status.in_reply_to_id === currentId && status.account.id === authorAccountId,
+ );
+
+ if (!nextPost) {
+ // No more direct author replies found, chain ends here
+ break;
+ }
+
+ chain.push(nextPost);
+ currentId = nextPost.id;
+ }
+
+ return chain;
+}
+
+/**
+ * Strip thread markers from content (1/n, 2/n, 3/4, etc.)
+ */
+function stripThreadMarkers(content: string): string {
+ return content
+ .replace(/\s*\d+\/[n*\d]+\s*/gi, " ")
+ .replace(/๐Ÿงต/g, "")
+ .trim();
+}
+
+/**
+ * Merge thread posts into a single content structure with image grids per segment
+ */
+function mergeThreadContent(chain: PleromaStatus[]): {
+ content: string;
+ attachments: Array<{ url: string; type: string }>;
+} {
+ const segments: string[] = [];
+ const allAttachments: Array<{ url: string; type: string }> = [];
+
+ for (const post of chain) {
+ // Clean and strip thread markers from content
+ const cleanedContent = cleanContent(post.content || "");
+ const contentWithoutMarkers = stripThreadMarkers(cleanedContent);
+
+ // Build segment with text
+ let segment = contentWithoutMarkers;
+
+ // Add image attachments as HTML grid after the text
+ const imageAttachments = post.media_attachments.filter(
+ (attachment) => attachment.type === "image",
+ );
+
+ if (imageAttachments.length > 0) {
+ // Build HTML grid for images
+ const imageGrid = `
+<div class="mt-4 mb-4 grid grid-cols-1 gap-4 sm:grid-cols-2">
+${imageAttachments
+ .map((attachment) => {
+ const description = attachment.description || "Image";
+ allAttachments.push({
+ url: attachment.url,
+ type: `image/${attachment.url.split(".").pop() || "jpeg"}`,
+ });
+ return `<a href="${attachment.url}" target="_blank" rel="noopener noreferrer" class="block overflow-hidden rounded-lg border border-gray-200 transition-colors hover:border-gray-300 dark:border-gray-700 dark:hover:border-gray-600">
+<img src="${attachment.url}" alt="${description}" class="h-48 w-full object-cover" loading="lazy" />
+</a>`;
+ })
+ .join("\n")}
+</div>`;
+
+ segment = `${segment}\n\n${imageGrid}`;
+ }
+
+ segments.push(segment);
+ }
+
+ // Join segments with horizontal rule separator
+ const content = segments.join("\n\n---\n\n");
+
+ return { content, attachments: [] }; // Return empty attachments to avoid duplicate grid at end
+}
+
async function getAccountId(
instanceUrl: string,
username: string,
@@ -351,22 +524,43 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader {
for (const status of validStatuses) {
try {
const content = status.content || "";
- const cleanedContent = cleanContent(content);
- const title = extractTitle(cleanedContent);
-
- // Extract post ID from status
- const postId = status.id;
+ let cleanedContent: string;
+ let attachments: Array<{ url: string; type: string }>;
+ let postId: string;
+ let sourceUrl: string;
+
+ // Check if this is a thread starter and thread merging is enabled
+ if (config.mergeThreads !== false && isThreadStarter(content)) {
+ logger.info(`Detected thread starter: ${status.id}`);
+
+ // Fetch context and build the author chain
+ const descendants = await fetchStatusContext(instanceUrl, status.id, logger);
+ const chain = buildAuthorChain(status, descendants);
+
+ logger.info(`Built chain with ${chain.length} post(s) for thread ${status.id}`);
+
+ // Merge thread content
+ const merged = mergeThreadContent(chain);
+ cleanedContent = merged.content;
+ attachments = merged.attachments;
+ postId = status.id;
+ sourceUrl = status.url;
+ } else {
+ // Process as single post
+ cleanedContent = cleanContent(content);
+ postId = status.id;
+ sourceUrl = status.url;
+
+ // Extract image attachments only
+ attachments = status.media_attachments
+ .filter((attachment) => attachment.type === "image")
+ .map((attachment) => ({
+ url: attachment.url,
+ type: `image/${attachment.url.split(".").pop() || "jpeg"}`,
+ }));
+ }
- // Use status URL as source
- const sourceUrl = status.url;
-
- // Extract image attachments only
- const attachments = status.media_attachments
- .filter((attachment) => attachment.type === "image")
- .map((attachment) => ({
- url: attachment.url,
- type: `image/${attachment.url.split(".").pop() || "jpeg"}`,
- }));
+ const title = extractTitle(cleanedContent);
// Create note entry
store.set({