diff options
| author | Dawid Rycerz <dawid@rycerz.xyz> | 2026-01-12 20:10:16 +0100 |
|---|---|---|
| committer | Dawid Rycerz <dawid@rycerz.xyz> | 2026-01-12 20:10:16 +0100 |
| commit | 64f47e09f29ea4e240c6ca1cf9fdfd2eb637c77e (patch) | |
| tree | 35e6e4a241f7c69f11c53ac3a5dfebb2a44875de | |
| parent | 6e19444bd32c271b53971eaad7377e340259100c (diff) | |
Fix tags showing
| -rw-r--r-- | src/loaders/pleroma.ts | 105 |
1 files changed, 95 insertions, 10 deletions
diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts index 0dbb26b..caeeb32 100644 --- a/src/loaders/pleroma.ts +++ b/src/loaders/pleroma.ts @@ -2,6 +2,11 @@ import type { Loader } from "astro/loaders"; import { marked } from "marked"; import TurndownService from "turndown"; +interface Logger { + info: (message: string) => void; + warn: (message: string) => void; +} + interface PleromaFeedConfig { instanceUrl: string; username: string; @@ -118,7 +123,7 @@ function parseNextPageMaxId(linkHeader: string | null): string | null { async function fetchStatusContext( instanceUrl: string, statusId: string, - logger: any, + logger: Logger, ): Promise<PleromaStatus[]> { try { const contextUrl = `${instanceUrl}/api/v1/statuses/${statusId}/context`; @@ -190,6 +195,73 @@ function stripThreadMarkers(content: string): string { } /** + * Extract trailing hashtags from content + * Handles both hashtag-only lines and hashtags at the end of text lines + * Returns the main content without trailing tags and the extracted tags + */ +function extractTrailingHashtags(content: string): { + mainContent: string; + tags: string[]; +} { + const tags: string[] = []; + let modifiedContent = content; + + // Regex patterns + const hashtagOnlyLine = /^\s*((\[#\w+\]\([^)]+\)|#\w+)\s*)+\s*$/; + const trailingHashtags = /((?:\s*(?:\[#\w+\]\([^)]+\)|#\w+))+)\s*$/; + const hashtagExtract = /\[#(\w+)\]\([^)]+\)|#(\w+)/g; + + // First, handle hashtag-only lines at the end + const lines = modifiedContent.split("\n"); + while (lines.length > 0) { + const lastLine = lines[lines.length - 1]?.trim() || ""; + if (!lastLine) { + lines.pop(); // Remove empty trailing lines + continue; + } + if (hashtagOnlyLine.test(lastLine)) { + // Extract tag names from this line + let match: RegExpExecArray | null = hashtagExtract.exec(lastLine); + while (match !== null) { + const tag = match[1] || match[2]; + if (tag) { + tags.push(tag.toLowerCase()); + } + match = hashtagExtract.exec(lastLine); + } + hashtagExtract.lastIndex = 0; + lines.pop(); + } else { + break; + } + } + modifiedContent = lines.join("\n"); + + // Second, handle trailing hashtags at the end of the last line (even if there's other text) + const trailingMatch = modifiedContent.match(trailingHashtags); + if (trailingMatch?.[1]) { + const trailingText = trailingMatch[1]; + // Extract tag names from trailing hashtags + let match: RegExpExecArray | null = hashtagExtract.exec(trailingText); + while (match !== null) { + const tag = match[1] || match[2]; + if (tag) { + tags.push(tag.toLowerCase()); + } + match = hashtagExtract.exec(trailingText); + } + hashtagExtract.lastIndex = 0; + // Remove trailing hashtags from content + modifiedContent = modifiedContent.replace(trailingHashtags, "").trim(); + } + + return { + mainContent: modifiedContent, + tags: [...new Set(tags)], // Deduplicate within this segment + }; +} + +/** * Merge thread posts into a single content structure with image grids per segment */ function mergeThreadContent(chain: PleromaStatus[]): { @@ -198,14 +270,19 @@ function mergeThreadContent(chain: PleromaStatus[]): { } { const segments: string[] = []; const allAttachments: Array<{ url: string; type: string }> = []; + const allTags = new Set<string>(); // Collect all tags from all segments for (const post of chain) { // Clean and strip thread markers from content const cleanedContent = cleanContent(post.content || ""); const contentWithoutMarkers = stripThreadMarkers(cleanedContent); - // Build segment with text - let segment = contentWithoutMarkers; + // Extract trailing hashtags from content + const { mainContent, tags } = extractTrailingHashtags(contentWithoutMarkers); + tags.forEach((tag) => allTags.add(tag)); + + // Build segment with text (without trailing hashtags) + let segment = mainContent; // Add image attachments as HTML grid after the text const imageAttachments = post.media_attachments.filter( @@ -237,7 +314,15 @@ ${imageAttachments } // Join segments with horizontal rule separator - const content = segments.join("\n\n---\n\n"); + let content = segments.join("\n\n---\n\n"); + + // Append consolidated tags at the end as markdown links + if (allTags.size > 0) { + // Get the instance URL from the first post to construct tag URLs + const instanceUrl = chain[0]?.account.url.split("/@")[0] || "https://social.craftknight.com"; + const tagLine = [...allTags].map((t) => `[#${t}](${instanceUrl}/tag/${t})`).join(" "); + content = `${content}\n\n${tagLine}`; + } return { content, attachments: [] }; // Return empty attachments to avoid duplicate grid at end } @@ -245,7 +330,7 @@ ${imageAttachments async function getAccountId( instanceUrl: string, username: string, - logger: any, + logger: Logger, ): Promise<string | null> { try { const searchUrl = `${instanceUrl}/api/v1/accounts/search?q=${encodeURIComponent(username)}&limit=1`; @@ -288,7 +373,7 @@ async function fetchAccountStatuses( instanceUrl: string, accountId: string, maxPosts: number, - logger: any, + logger: Logger, ): Promise<PleromaStatus[]> { const allStatuses: PleromaStatus[] = []; let maxId: string | null = null; @@ -539,10 +624,10 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader { logger.info(`Built chain with ${chain.length} post(s) for thread ${status.id}`); - // Merge thread content - const merged = mergeThreadContent(chain); - cleanedContent = merged.content; - attachments = merged.attachments; + // Merge thread content + const merged = mergeThreadContent(chain); + cleanedContent = merged.content; + attachments = merged.attachments; postId = status.id; sourceUrl = status.url; } else { |
