summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/loaders/pleroma.ts105
1 files changed, 95 insertions, 10 deletions
diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts
index 0dbb26b..caeeb32 100644
--- a/src/loaders/pleroma.ts
+++ b/src/loaders/pleroma.ts
@@ -2,6 +2,11 @@ import type { Loader } from "astro/loaders";
import { marked } from "marked";
import TurndownService from "turndown";
+interface Logger {
+ info: (message: string) => void;
+ warn: (message: string) => void;
+}
+
interface PleromaFeedConfig {
instanceUrl: string;
username: string;
@@ -118,7 +123,7 @@ function parseNextPageMaxId(linkHeader: string | null): string | null {
async function fetchStatusContext(
instanceUrl: string,
statusId: string,
- logger: any,
+ logger: Logger,
): Promise<PleromaStatus[]> {
try {
const contextUrl = `${instanceUrl}/api/v1/statuses/${statusId}/context`;
@@ -190,6 +195,73 @@ function stripThreadMarkers(content: string): string {
}
/**
+ * Extract trailing hashtags from content
+ * Handles both hashtag-only lines and hashtags at the end of text lines
+ * Returns the main content without trailing tags and the extracted tags
+ */
+function extractTrailingHashtags(content: string): {
+ mainContent: string;
+ tags: string[];
+} {
+ const tags: string[] = [];
+ let modifiedContent = content;
+
+ // Regex patterns
+ const hashtagOnlyLine = /^\s*((\[#\w+\]\([^)]+\)|#\w+)\s*)+\s*$/;
+ const trailingHashtags = /((?:\s*(?:\[#\w+\]\([^)]+\)|#\w+))+)\s*$/;
+ const hashtagExtract = /\[#(\w+)\]\([^)]+\)|#(\w+)/g;
+
+ // First, handle hashtag-only lines at the end
+ const lines = modifiedContent.split("\n");
+ while (lines.length > 0) {
+ const lastLine = lines[lines.length - 1]?.trim() || "";
+ if (!lastLine) {
+ lines.pop(); // Remove empty trailing lines
+ continue;
+ }
+ if (hashtagOnlyLine.test(lastLine)) {
+ // Extract tag names from this line
+ let match: RegExpExecArray | null = hashtagExtract.exec(lastLine);
+ while (match !== null) {
+ const tag = match[1] || match[2];
+ if (tag) {
+ tags.push(tag.toLowerCase());
+ }
+ match = hashtagExtract.exec(lastLine);
+ }
+ hashtagExtract.lastIndex = 0;
+ lines.pop();
+ } else {
+ break;
+ }
+ }
+ modifiedContent = lines.join("\n");
+
+ // Second, handle trailing hashtags at the end of the last line (even if there's other text)
+ const trailingMatch = modifiedContent.match(trailingHashtags);
+ if (trailingMatch?.[1]) {
+ const trailingText = trailingMatch[1];
+ // Extract tag names from trailing hashtags
+ let match: RegExpExecArray | null = hashtagExtract.exec(trailingText);
+ while (match !== null) {
+ const tag = match[1] || match[2];
+ if (tag) {
+ tags.push(tag.toLowerCase());
+ }
+ match = hashtagExtract.exec(trailingText);
+ }
+ hashtagExtract.lastIndex = 0;
+ // Remove trailing hashtags from content
+ modifiedContent = modifiedContent.replace(trailingHashtags, "").trim();
+ }
+
+ return {
+ mainContent: modifiedContent,
+ tags: [...new Set(tags)], // Deduplicate within this segment
+ };
+}
+
+/**
* Merge thread posts into a single content structure with image grids per segment
*/
function mergeThreadContent(chain: PleromaStatus[]): {
@@ -198,14 +270,19 @@ function mergeThreadContent(chain: PleromaStatus[]): {
} {
const segments: string[] = [];
const allAttachments: Array<{ url: string; type: string }> = [];
+ const allTags = new Set<string>(); // Collect all tags from all segments
for (const post of chain) {
// Clean and strip thread markers from content
const cleanedContent = cleanContent(post.content || "");
const contentWithoutMarkers = stripThreadMarkers(cleanedContent);
- // Build segment with text
- let segment = contentWithoutMarkers;
+ // Extract trailing hashtags from content
+ const { mainContent, tags } = extractTrailingHashtags(contentWithoutMarkers);
+ tags.forEach((tag) => allTags.add(tag));
+
+ // Build segment with text (without trailing hashtags)
+ let segment = mainContent;
// Add image attachments as HTML grid after the text
const imageAttachments = post.media_attachments.filter(
@@ -237,7 +314,15 @@ ${imageAttachments
}
// Join segments with horizontal rule separator
- const content = segments.join("\n\n---\n\n");
+ let content = segments.join("\n\n---\n\n");
+
+ // Append consolidated tags at the end as markdown links
+ if (allTags.size > 0) {
+ // Get the instance URL from the first post to construct tag URLs
+ const instanceUrl = chain[0]?.account.url.split("/@")[0] || "https://social.craftknight.com";
+ const tagLine = [...allTags].map((t) => `[#${t}](${instanceUrl}/tag/${t})`).join(" ");
+ content = `${content}\n\n${tagLine}`;
+ }
return { content, attachments: [] }; // Return empty attachments to avoid duplicate grid at end
}
@@ -245,7 +330,7 @@ ${imageAttachments
async function getAccountId(
instanceUrl: string,
username: string,
- logger: any,
+ logger: Logger,
): Promise<string | null> {
try {
const searchUrl = `${instanceUrl}/api/v1/accounts/search?q=${encodeURIComponent(username)}&limit=1`;
@@ -288,7 +373,7 @@ async function fetchAccountStatuses(
instanceUrl: string,
accountId: string,
maxPosts: number,
- logger: any,
+ logger: Logger,
): Promise<PleromaStatus[]> {
const allStatuses: PleromaStatus[] = [];
let maxId: string | null = null;
@@ -539,10 +624,10 @@ export function pleromaLoader(config: PleromaFeedConfig): Loader {
logger.info(`Built chain with ${chain.length} post(s) for thread ${status.id}`);
- // Merge thread content
- const merged = mergeThreadContent(chain);
- cleanedContent = merged.content;
- attachments = merged.attachments;
+ // Merge thread content
+ const merged = mergeThreadContent(chain);
+ cleanedContent = merged.content;
+ attachments = merged.attachments;
postId = status.id;
sourceUrl = status.url;
} else {