summaryrefslogtreecommitdiff
path: root/src/loaders
diff options
context:
space:
mode:
authorDawid Rycerz <dawid@rycerz.xyz>2026-01-12 20:22:33 +0100
committerDawid Rycerz <dawid@rycerz.xyz>2026-01-12 20:23:51 +0100
commit6574383b53da66473013512762761862d26e686e (patch)
treeb585540d56d751991838f4baa9f8ff9aaffe51be /src/loaders
parent64f47e09f29ea4e240c6ca1cf9fdfd2eb637c77e (diff)
Add self references replacement
Diffstat (limited to 'src/loaders')
-rw-r--r--src/loaders/pleroma.ts50
1 files changed, 46 insertions, 4 deletions
diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts
index caeeb32..b2c9f81 100644
--- a/src/loaders/pleroma.ts
+++ b/src/loaders/pleroma.ts
@@ -539,6 +539,43 @@ function cleanContent(htmlContent: string): string {
return markdown.trim().replace(/\n\s*\n\s*\n/g, "\n\n");
}
+/**
+ * Replace Pleroma notice links with internal links when the post exists in our collection
+ * Handles both markdown links and plain URLs
+ */
+function replacePleromaLinks(
+ content: string,
+ instanceUrl: string,
+ existingPostIds: Set<string>,
+): string {
+ // Escape special regex characters in instanceUrl
+ const escapedInstanceUrl = instanceUrl.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+
+ // Pattern to match notice URLs - captures the statusId
+ const noticePattern = `${escapedInstanceUrl}/notice/([A-Za-z0-9]+)`;
+
+ // Replace markdown links: [text](url)
+ const markdownLinkRegex = new RegExp(`\\[([^\\]]+)\\]\\(${noticePattern}\\)`, "g");
+ let modifiedContent = content.replace(markdownLinkRegex, (match, linkText, statusId) => {
+ if (existingPostIds.has(statusId)) {
+ return `[${linkText}](/micro/pleroma-${statusId}/)`;
+ }
+ return match; // Keep original if post doesn't exist
+ });
+
+ // Replace plain URLs (not already in markdown link format)
+ // Use negative lookbehind to avoid matching URLs already in markdown links
+ const plainUrlRegex = new RegExp(`(?<!\\()${noticePattern}(?!\\))`, "g");
+ modifiedContent = modifiedContent.replace(plainUrlRegex, (match, statusId) => {
+ if (existingPostIds.has(statusId)) {
+ return `/micro/pleroma-${statusId}/`;
+ }
+ return match; // Keep original if post doesn't exist
+ });
+
+ return modifiedContent;
+}
+
function markdownToHtml(markdown: string): string {
// Configure marked options for safe rendering
marked.setOptions({
@@ -602,6 +639,9 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader {
});
logger.info(`After filtering: ${validStatuses.length} valid posts`);
+ // Collect all post IDs for link replacement
+ const allPostIds = new Set(validStatuses.map((status) => status.id));
+
// Clear existing entries
store.clear();
@@ -624,15 +664,17 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader {
logger.info(`Built chain with ${chain.length} post(s) for thread ${status.id}`);
- // Merge thread content
- const merged = mergeThreadContent(chain);
- cleanedContent = merged.content;
- attachments = merged.attachments;
+ // Merge thread content
+ const merged = mergeThreadContent(chain);
+ cleanedContent = merged.content;
+ cleanedContent = replacePleromaLinks(cleanedContent, instanceUrl, allPostIds);
+ attachments = merged.attachments;
postId = status.id;
sourceUrl = status.url;
} else {
// Process as single post
cleanedContent = cleanContent(content);
+ cleanedContent = replacePleromaLinks(cleanedContent, instanceUrl, allPostIds);
postId = status.id;
sourceUrl = status.url;