Add infinite posts downloads

author: Dawid Rycerz <dawid@rycerz.xyz> 2026-01-12 18:21:12 +0100
committer: Dawid Rycerz <dawid@rycerz.xyz> 2026-01-12 19:11:50 +0100
commit: 51aa63873681216026d518cde4abeca307818a4b (patch)
tree: d57b19a6ed8ce2b7303191a668de6ed7f878e7a8 /src
parent: 4ed3a82a8b8f111bed88559bf8d601bb7d947df9 (diff)
6 files changed, 170 insertions, 49 deletions
diff --git a/src/components/BaseHead.astro b/src/components/BaseHead.astro
index d65dd9a..cb39d4a 100644
--- a/src/components/BaseHead.astro
+++ b/src/components/BaseHead.astro
@@ -86,6 +86,7 @@ const socialImageURL = new URL(ogImage ? ogImage : "/social-card.png", Astro.url
 {/* Plausible Analytics */}
 <link rel="dns-prefetch" href="//analytics.craftknight.com" />
 <script
+	is:inline
 	type="text/javascript"
 	defer
 	data-domain="rycerz.xyz"
@@ -93,7 +94,7 @@ const socialImageURL = new URL(ogImage ? ogImage : "/social-card.png", Astro.url
 	data-cfasync="false"
 	src="https://analytics.craftknight.com/js/plausible.outbound-links.js?ver=2.1.3"
 	id="plausible"></script>
-<script type="text/javascript" id="plausible-analytics-js-after">
+<script is:inline type="text/javascript" id="plausible-analytics-js-after">
 	/* <![CDATA[ */
 	window.plausible =
 		window.plausible ||
diff --git a/src/content.config.ts b/src/content.config.ts
index 4b7b230..2ae9fbf 100644
--- a/src/content.config.ts
+++ b/src/content.config.ts
@@ -41,12 +41,21 @@ const micro = defineCollection({
 	loader: pleromaLoader({
 		instanceUrl: "https://social.craftknight.com",
 		username: "dawid",
-		maxPosts: 50,
+		maxPosts: -1, // Fetch all posts
 		allowedTags: ["#miniblog", "#vanlife", "#microblog", "#giereczkowo"],
 	}),
 	schema: baseSchema.extend({
 		description: z.string().optional(),
 		publishDate: z.date().or(z.string().transform((val) => new Date(val))),
+		sourceUrl: z.string().optional(),
+		attachments: z
+			.array(
+				z.object({
+					url: z.string(),
+					type: z.string(),
+				}),
+			)
+			.optional(),
 	}),
 });
 
diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts
index 73d11da..7e1ccb8 100644
--- a/src/loaders/pleroma.ts
+++ b/src/loaders/pleroma.ts
@@ -38,6 +38,37 @@ interface PleromaStatus {
 	visibility: string;
 }
 
+/**
+ * Parse the Link header to extract the max_id for the next page
+ * Link header format: <url?max_id=123>; rel="next", <url?since_id=456>; rel="prev"
+ */
+function parseNextPageMaxId(linkHeader: string | null): string | null {
+	if (!linkHeader) {
+		return null;
+	}
+
+	// Split by comma to get individual links
+	const links = linkHeader.split(",");
+
+	for (const link of links) {
+		// Check if this is the "next" rel link
+		if (link.includes('rel="next"')) {
+			// Extract URL from angle brackets
+			const urlMatch = link.match(/<([^>]+)>/);
+			if (urlMatch?.[1]) {
+				// Parse the URL to extract max_id parameter
+				try {
+					const url = new URL(urlMatch[1]);
+					const maxId = url.searchParams.get("max_id");
+					return maxId;
+				} catch {}
+			}
+		}
+	}
+
+	return null;
+}
+
 async function getAccountId(
 	instanceUrl: string,
 	username: string,
@@ -86,50 +117,108 @@ async function fetchAccountStatuses(
 	maxPosts: number,
 	logger: any,
 ): Promise<PleromaStatus[]> {
-	let response: Response | undefined;
-	let lastError: unknown;
+	const allStatuses: PleromaStatus[] = [];
+	let maxId: string | null = null;
+	let pageCount = 0;
+	const pageLimit = 40; // Mastodon/Pleroma API max per page
+	const fetchAll = maxPosts === -1;
+
+	// Fetch pages until we have enough posts or no more pages available
+	while (fetchAll || allStatuses.length < maxPosts) {
+		pageCount++;
+		let response: Response | undefined;
+		let lastError: unknown;
+
+		// Build URL with pagination parameters
+		// If fetching all, always use pageLimit; otherwise calculate remaining
+		const requestLimit = fetchAll ? pageLimit : Math.min(pageLimit, maxPosts - allStatuses.length);
+		const params = new URLSearchParams({
+			limit: String(requestLimit),
+			exclude_replies: "true",
+			exclude_reblogs: "true",
+		});
 
-	// Add retry logic for network issues
-	for (let attempt = 1; attempt <= 3; attempt++) {
-		try {
-			logger.info(`Attempt ${attempt} to fetch statuses...`);
+		if (maxId) {
+			params.set("max_id", maxId);
+		}
 
-			const statusesUrl = `${instanceUrl}/api/v1/accounts/${accountId}/statuses?limit=${maxPosts}&exclude_replies=true&exclude_reblogs=true`;
+		const statusesUrl = `${instanceUrl}/api/v1/accounts/${accountId}/statuses?${params.toString()}`;
 
-			// Create timeout controller
-			const controller = new AbortController();
-			const timeoutId = setTimeout(() => controller.abort(), 10000);
+		// Add retry logic for network issues
+		for (let attempt = 1; attempt <= 3; attempt++) {
+			try {
+				const modeMsg = fetchAll ? " [fetching all posts]" : ` [target: ${maxPosts}]`;
+				logger.info(
+					`Attempt ${attempt} to fetch statuses page ${pageCount}${maxId ? ` (max_id: ${maxId})` : ""}${modeMsg}...`,
+				);
+
+				// Create timeout controller
+				const controller = new AbortController();
+				const timeoutId = setTimeout(() => controller.abort(), 10000);
+
+				response = await fetch(statusesUrl, {
+					headers: {
+						"User-Agent": "Astro Blog (pleroma-loader)",
+					},
+					signal: controller.signal,
+				});
 
-			response = await fetch(statusesUrl, {
-				headers: {
-					"User-Agent": "Astro Blog (pleroma-loader)",
-				},
-				signal: controller.signal,
-			});
+				clearTimeout(timeoutId);
 
-			clearTimeout(timeoutId);
+				if (response.ok) {
+					break; // Success, exit retry loop
+				}
+				throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+			} catch (error) {
+				lastError = error;
+				logger.warn(`Attempt ${attempt} failed: ${error}`);
 
-			if (response.ok) {
-				break; // Success, exit retry loop
-			}
-			throw new Error(`HTTP ${response.status}: ${response.statusText}`);
-		} catch (error) {
-			lastError = error;
-			logger.warn(`Attempt ${attempt} failed: ${error}`);
-
-			if (attempt < 3) {
-				logger.info("Retrying in 2 seconds...");
-				await new Promise((resolve) => setTimeout(resolve, 2000));
+				if (attempt < 3) {
+					logger.info("Retrying in 2 seconds...");
+					await new Promise((resolve) => setTimeout(resolve, 2000));
+				}
 			}
 		}
-	}
 
-	if (!response || !response.ok) {
-		throw new Error(`Failed to fetch statuses after 3 attempts. Last error: ${lastError}`);
+		if (!response || !response.ok) {
+			throw new Error(`Failed to fetch statuses after 3 attempts. Last error: ${lastError}`);
+		}
+
+		const statuses: PleromaStatus[] = await response.json();
+		logger.info(`Fetched ${statuses.length} statuses from page ${pageCount}`);
+
+		// If no statuses returned, we've reached the end
+		if (statuses.length === 0) {
+			logger.info("No more statuses available");
+			break;
+		}
+
+		// Add statuses to our accumulated list
+		allStatuses.push(...statuses);
+
+		// Parse Link header to get next page max_id
+		const linkHeader = response.headers.get("link");
+		const nextMaxId = parseNextPageMaxId(linkHeader);
+
+		if (!nextMaxId) {
+			logger.info("No more pages available (no next link in header)");
+			break;
+		}
+
+		// If the max_id hasn't changed, we're stuck in a loop - break
+		if (nextMaxId === maxId) {
+			logger.warn("Pagination returned same max_id, stopping to prevent infinite loop");
+			break;
+		}
+
+		maxId = nextMaxId;
 	}
 
-	const statuses: PleromaStatus[] = await response.json();
-	return statuses;
+	const summaryMsg = fetchAll
+		? `Total fetched: ${allStatuses.length} statuses (all available) across ${pageCount} page(s)`
+		: `Total fetched: ${allStatuses.length} statuses (target: ${maxPosts}) across ${pageCount} page(s)`;
+	logger.info(summaryMsg);
+	return allStatuses;
 }
 
 function isFilteredStatus(status: PleromaStatus): boolean {
diff --git a/src/pages/micro/[...page].astro b/src/pages/micro/[...page].astro
index 93a35de..edfecab 100644
--- a/src/pages/micro/[...page].astro
+++ b/src/pages/micro/[...page].astro
@@ -51,11 +51,7 @@ const paginationProps = {
 <PageLayout meta={meta}>
 	<section>
 		<h1 class="title mb-6 flex items-center gap-3">
-			Micro <a
-				class="text-accent"
-				href="https://social.craftknight.com/users/dawid.rss"
-				target="_blank"
-			>
+			Micro <a class="text-accent" href="/micro/rss.xml" target="_blank">
 				<span class="sr-only">RSS feed</span>
 				<Icon aria-hidden="true" class="h-6 w-6" focusable="false" name="mdi:rss" />
 			</a>
diff --git a/src/pages/micro/rss.xml.ts b/src/pages/micro/rss.xml.ts
index ce25129..1fd6f53 100644
--- a/src/pages/micro/rss.xml.ts
+++ b/src/pages/micro/rss.xml.ts
@@ -1,8 +1,9 @@
 import { getCollection } from "astro:content";
 import rss from "@astrojs/rss";
+import type { APIContext } from "astro";
 import { siteConfig } from "@/site.config";
 
-export const GET = async () => {
+export const GET = async (context: APIContext) => {
 	// Get only Pleroma posts
 	const allMicro = await getCollection("micro").catch(() => []); // Fallback to empty array if micro collection fails
 
@@ -11,15 +12,38 @@ export const GET = async () => {
 		(a, b) => b.data.publishDate.getTime() - a.data.publishDate.getTime(),
 	);
 
-	return rss({
-		title: siteConfig.title,
-		description: siteConfig.description,
-		site: import.meta.env.SITE,
-		items: allMicroPosts.map((post) => ({
+	// Generate RSS items with full content and images
+	const items = allMicroPosts.map((post) => {
+		// Get the pre-rendered HTML from the post
+		let fullContent = post.rendered?.html || post.body || "";
+
+		// Append images if available
+		if (post.data.attachments && post.data.attachments.length > 0) {
+			const imagesHtml = post.data.attachments
+				.map(
+					(att: { url: string; type: string }) =>
+						`<p><img src="${att.url}" alt="Attachment" style="max-width: 100%; height: auto;" /></p>`,
+				)
+				.join("");
+			fullContent += imagesHtml;
+		}
+
+		return {
 			title: post.data.title,
 			pubDate: post.data.publishDate,
 			link: `micro/${post.id}/`,
 			description: post.data.description,
-		})),
+			content: fullContent,
+		};
+	});
+
+	const site = context.site || import.meta.env.SITE;
+
+	return rss({
+		title: siteConfig.title,
+		description: siteConfig.description,
+		site,
+		items,
+		customData: `<atom:link href="${site}micro/rss.xml" rel="self" type="application/rss+xml" xmlns:atom="http://www.w3.org/2005/Atom" />`,
 	});
 };
diff --git a/src/pages/rss.xml.ts b/src/pages/rss.xml.ts
index 8a6525d..39f3964 100644
--- a/src/pages/rss.xml.ts
+++ b/src/pages/rss.xml.ts
@@ -1,19 +1,21 @@
 import rss from "@astrojs/rss";
+import type { APIContext } from "astro";
 import { getAllPosts } from "@/data/post";
 import { siteConfig } from "@/site.config";
 
-export const GET = async () => {
+export const GET = async (context: APIContext) => {
 	const posts = await getAllPosts();
 
 	return rss({
 		title: siteConfig.title,
 		description: siteConfig.description,
-		site: import.meta.env.SITE,
+		site: context.site || import.meta.env.SITE,
 		items: posts.map((post) => ({
 			title: post.data.title,
 			description: post.data.description,
 			pubDate: post.data.publishDate,
 			link: `posts/${post.id}/`,
 		})),
+		customData: `<atom:link href="${context.site}rss.xml" rel="self" type="application/rss+xml" xmlns:atom="http://www.w3.org/2005/Atom" />`,
 	});
 };
author	Dawid Rycerz <dawid@rycerz.xyz>	2026-01-12 18:21:12 +0100
committer	Dawid Rycerz <dawid@rycerz.xyz>	2026-01-12 19:11:50 +0100
commit	51aa63873681216026d518cde4abeca307818a4b (patch)
tree	d57b19a6ed8ce2b7303191a668de6ed7f878e7a8 /src
parent	4ed3a82a8b8f111bed88559bf8d601bb7d947df9 (diff)