Add self references replacement

author: Dawid Rycerz <dawid@rycerz.xyz> 2026-01-12 20:22:33 +0100
committer: Dawid Rycerz <dawid@rycerz.xyz> 2026-01-12 20:23:51 +0100
commit: 6574383b53da66473013512762761862d26e686e (patch)
tree: b585540d56d751991838f4baa9f8ff9aaffe51be /src/loaders
parent: 64f47e09f29ea4e240c6ca1cf9fdfd2eb637c77e (diff)
1 files changed, 46 insertions, 4 deletions
diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts
index caeeb32..b2c9f81 100644
--- a/src/loaders/pleroma.ts
+++ b/src/loaders/pleroma.ts
@@ -539,6 +539,43 @@ function cleanContent(htmlContent: string): string {
 	return markdown.trim().replace(/\n\s*\n\s*\n/g, "\n\n");
 }
 
+/**
+ * Replace Pleroma notice links with internal links when the post exists in our collection
+ * Handles both markdown links and plain URLs
+ */
+function replacePleromaLinks(
+	content: string,
+	instanceUrl: string,
+	existingPostIds: Set<string>,
+): string {
+	// Escape special regex characters in instanceUrl
+	const escapedInstanceUrl = instanceUrl.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+
+	// Pattern to match notice URLs - captures the statusId
+	const noticePattern = `${escapedInstanceUrl}/notice/([A-Za-z0-9]+)`;
+
+	// Replace markdown links: [text](url)
+	const markdownLinkRegex = new RegExp(`\\[([^\\]]+)\\]\\(${noticePattern}\\)`, "g");
+	let modifiedContent = content.replace(markdownLinkRegex, (match, linkText, statusId) => {
+		if (existingPostIds.has(statusId)) {
+			return `[${linkText}](/micro/pleroma-${statusId}/)`;
+		}
+		return match; // Keep original if post doesn't exist
+	});
+
+	// Replace plain URLs (not already in markdown link format)
+	// Use negative lookbehind to avoid matching URLs already in markdown links
+	const plainUrlRegex = new RegExp(`(?<!\\()${noticePattern}(?!\\))`, "g");
+	modifiedContent = modifiedContent.replace(plainUrlRegex, (match, statusId) => {
+		if (existingPostIds.has(statusId)) {
+			return `/micro/pleroma-${statusId}/`;
+		}
+		return match; // Keep original if post doesn't exist
+	});
+
+	return modifiedContent;
+}
+
 function markdownToHtml(markdown: string): string {
 	// Configure marked options for safe rendering
 	marked.setOptions({
@@ -602,6 +639,9 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader {
 				});
 				logger.info(`After filtering: ${validStatuses.length} valid posts`);
 
+				// Collect all post IDs for link replacement
+				const allPostIds = new Set(validStatuses.map((status) => status.id));
+
 				// Clear existing entries
 				store.clear();
 
@@ -624,15 +664,17 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader {
 
 							logger.info(`Built chain with ${chain.length} post(s) for thread ${status.id}`);
 
-						// Merge thread content
-						const merged = mergeThreadContent(chain);
-						cleanedContent = merged.content;
-						attachments = merged.attachments;
+							// Merge thread content
+							const merged = mergeThreadContent(chain);
+							cleanedContent = merged.content;
+							cleanedContent = replacePleromaLinks(cleanedContent, instanceUrl, allPostIds);
+							attachments = merged.attachments;
 							postId = status.id;
 							sourceUrl = status.url;
 						} else {
 							// Process as single post
 							cleanedContent = cleanContent(content);
+							cleanedContent = replacePleromaLinks(cleanedContent, instanceUrl, allPostIds);
 							postId = status.id;
 							sourceUrl = status.url;
author	Dawid Rycerz <dawid@rycerz.xyz>	2026-01-12 20:22:33 +0100
committer	Dawid Rycerz <dawid@rycerz.xyz>	2026-01-12 20:23:51 +0100
commit	6574383b53da66473013512762761862d26e686e (patch)
tree	b585540d56d751991838f4baa9f8ff9aaffe51be /src/loaders
parent	64f47e09f29ea4e240c6ca1cf9fdfd2eb637c77e (diff)