summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDawid Rycerz <dawid@rycerz.xyz>2026-01-12 18:21:12 +0100
committerDawid Rycerz <dawid@rycerz.xyz>2026-01-12 19:11:50 +0100
commit51aa63873681216026d518cde4abeca307818a4b (patch)
treed57b19a6ed8ce2b7303191a668de6ed7f878e7a8
parent4ed3a82a8b8f111bed88559bf8d601bb7d947df9 (diff)
Add infinite posts downloads
-rw-r--r--lychee.toml1
-rw-r--r--src/components/BaseHead.astro3
-rw-r--r--src/content.config.ts11
-rw-r--r--src/loaders/pleroma.ts155
-rw-r--r--src/pages/micro/[...page].astro6
-rw-r--r--src/pages/micro/rss.xml.ts38
-rw-r--r--src/pages/rss.xml.ts6
7 files changed, 170 insertions, 50 deletions
diff --git a/lychee.toml b/lychee.toml
index 82493f0..e683dd3 100644
--- a/lychee.toml
+++ b/lychee.toml
@@ -1,2 +1 @@
exclude = ["https://devops.com/using-calms-to-assess-organizations-devops/"]
-
diff --git a/src/components/BaseHead.astro b/src/components/BaseHead.astro
index d65dd9a..cb39d4a 100644
--- a/src/components/BaseHead.astro
+++ b/src/components/BaseHead.astro
@@ -86,6 +86,7 @@ const socialImageURL = new URL(ogImage ? ogImage : "/social-card.png", Astro.url
{/* Plausible Analytics */}
<link rel="dns-prefetch" href="//analytics.craftknight.com" />
<script
+ is:inline
type="text/javascript"
defer
data-domain="rycerz.xyz"
@@ -93,7 +94,7 @@ const socialImageURL = new URL(ogImage ? ogImage : "/social-card.png", Astro.url
data-cfasync="false"
src="https://analytics.craftknight.com/js/plausible.outbound-links.js?ver=2.1.3"
id="plausible"></script>
-<script type="text/javascript" id="plausible-analytics-js-after">
+<script is:inline type="text/javascript" id="plausible-analytics-js-after">
/* <![CDATA[ */
window.plausible =
window.plausible ||
diff --git a/src/content.config.ts b/src/content.config.ts
index 4b7b230..2ae9fbf 100644
--- a/src/content.config.ts
+++ b/src/content.config.ts
@@ -41,12 +41,21 @@ const micro = defineCollection({
loader: pleromaLoader({
instanceUrl: "https://social.craftknight.com",
username: "dawid",
- maxPosts: 50,
+ maxPosts: -1, // Fetch all posts
allowedTags: ["#miniblog", "#vanlife", "#microblog", "#giereczkowo"],
}),
schema: baseSchema.extend({
description: z.string().optional(),
publishDate: z.date().or(z.string().transform((val) => new Date(val))),
+ sourceUrl: z.string().optional(),
+ attachments: z
+ .array(
+ z.object({
+ url: z.string(),
+ type: z.string(),
+ }),
+ )
+ .optional(),
}),
});
diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts
index 73d11da..7e1ccb8 100644
--- a/src/loaders/pleroma.ts
+++ b/src/loaders/pleroma.ts
@@ -38,6 +38,37 @@ interface PleromaStatus {
visibility: string;
}
+/**
+ * Parse the Link header to extract the max_id for the next page
+ * Link header format: <url?max_id=123>; rel="next", <url?since_id=456>; rel="prev"
+ */
+function parseNextPageMaxId(linkHeader: string | null): string | null {
+ if (!linkHeader) {
+ return null;
+ }
+
+ // Split by comma to get individual links
+ const links = linkHeader.split(",");
+
+ for (const link of links) {
+ // Check if this is the "next" rel link
+ if (link.includes('rel="next"')) {
+ // Extract URL from angle brackets
+ const urlMatch = link.match(/<([^>]+)>/);
+ if (urlMatch?.[1]) {
+ // Parse the URL to extract max_id parameter
+ try {
+ const url = new URL(urlMatch[1]);
+ const maxId = url.searchParams.get("max_id");
+ return maxId;
+ } catch {}
+ }
+ }
+ }
+
+ return null;
+}
+
async function getAccountId(
instanceUrl: string,
username: string,
@@ -86,50 +117,108 @@ async function fetchAccountStatuses(
maxPosts: number,
logger: any,
): Promise<PleromaStatus[]> {
- let response: Response | undefined;
- let lastError: unknown;
+ const allStatuses: PleromaStatus[] = [];
+ let maxId: string | null = null;
+ let pageCount = 0;
+ const pageLimit = 40; // Mastodon/Pleroma API max per page
+ const fetchAll = maxPosts === -1;
+
+ // Fetch pages until we have enough posts or no more pages available
+ while (fetchAll || allStatuses.length < maxPosts) {
+ pageCount++;
+ let response: Response | undefined;
+ let lastError: unknown;
+
+ // Build URL with pagination parameters
+ // If fetching all, always use pageLimit; otherwise calculate remaining
+ const requestLimit = fetchAll ? pageLimit : Math.min(pageLimit, maxPosts - allStatuses.length);
+ const params = new URLSearchParams({
+ limit: String(requestLimit),
+ exclude_replies: "true",
+ exclude_reblogs: "true",
+ });
- // Add retry logic for network issues
- for (let attempt = 1; attempt <= 3; attempt++) {
- try {
- logger.info(`Attempt ${attempt} to fetch statuses...`);
+ if (maxId) {
+ params.set("max_id", maxId);
+ }
- const statusesUrl = `${instanceUrl}/api/v1/accounts/${accountId}/statuses?limit=${maxPosts}&exclude_replies=true&exclude_reblogs=true`;
+ const statusesUrl = `${instanceUrl}/api/v1/accounts/${accountId}/statuses?${params.toString()}`;
- // Create timeout controller
- const controller = new AbortController();
- const timeoutId = setTimeout(() => controller.abort(), 10000);
+ // Add retry logic for network issues
+ for (let attempt = 1; attempt <= 3; attempt++) {
+ try {
+ const modeMsg = fetchAll ? " [fetching all posts]" : ` [target: ${maxPosts}]`;
+ logger.info(
+ `Attempt ${attempt} to fetch statuses page ${pageCount}${maxId ? ` (max_id: ${maxId})` : ""}${modeMsg}...`,
+ );
+
+ // Create timeout controller
+ const controller = new AbortController();
+ const timeoutId = setTimeout(() => controller.abort(), 10000);
+
+ response = await fetch(statusesUrl, {
+ headers: {
+ "User-Agent": "Astro Blog (pleroma-loader)",
+ },
+ signal: controller.signal,
+ });
- response = await fetch(statusesUrl, {
- headers: {
- "User-Agent": "Astro Blog (pleroma-loader)",
- },
- signal: controller.signal,
- });
+ clearTimeout(timeoutId);
- clearTimeout(timeoutId);
+ if (response.ok) {
+ break; // Success, exit retry loop
+ }
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+ } catch (error) {
+ lastError = error;
+ logger.warn(`Attempt ${attempt} failed: ${error}`);
- if (response.ok) {
- break; // Success, exit retry loop
- }
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
- } catch (error) {
- lastError = error;
- logger.warn(`Attempt ${attempt} failed: ${error}`);
-
- if (attempt < 3) {
- logger.info("Retrying in 2 seconds...");
- await new Promise((resolve) => setTimeout(resolve, 2000));
+ if (attempt < 3) {
+ logger.info("Retrying in 2 seconds...");
+ await new Promise((resolve) => setTimeout(resolve, 2000));
+ }
}
}
- }
- if (!response || !response.ok) {
- throw new Error(`Failed to fetch statuses after 3 attempts. Last error: ${lastError}`);
+ if (!response || !response.ok) {
+ throw new Error(`Failed to fetch statuses after 3 attempts. Last error: ${lastError}`);
+ }
+
+ const statuses: PleromaStatus[] = await response.json();
+ logger.info(`Fetched ${statuses.length} statuses from page ${pageCount}`);
+
+ // If no statuses returned, we've reached the end
+ if (statuses.length === 0) {
+ logger.info("No more statuses available");
+ break;
+ }
+
+ // Add statuses to our accumulated list
+ allStatuses.push(...statuses);
+
+ // Parse Link header to get next page max_id
+ const linkHeader = response.headers.get("link");
+ const nextMaxId = parseNextPageMaxId(linkHeader);
+
+ if (!nextMaxId) {
+ logger.info("No more pages available (no next link in header)");
+ break;
+ }
+
+ // If the max_id hasn't changed, we're stuck in a loop - break
+ if (nextMaxId === maxId) {
+ logger.warn("Pagination returned same max_id, stopping to prevent infinite loop");
+ break;
+ }
+
+ maxId = nextMaxId;
}
- const statuses: PleromaStatus[] = await response.json();
- return statuses;
+ const summaryMsg = fetchAll
+ ? `Total fetched: ${allStatuses.length} statuses (all available) across ${pageCount} page(s)`
+ : `Total fetched: ${allStatuses.length} statuses (target: ${maxPosts}) across ${pageCount} page(s)`;
+ logger.info(summaryMsg);
+ return allStatuses;
}
function isFilteredStatus(status: PleromaStatus): boolean {
diff --git a/src/pages/micro/[...page].astro b/src/pages/micro/[...page].astro
index 93a35de..edfecab 100644
--- a/src/pages/micro/[...page].astro
+++ b/src/pages/micro/[...page].astro
@@ -51,11 +51,7 @@ const paginationProps = {
<PageLayout meta={meta}>
<section>
<h1 class="title mb-6 flex items-center gap-3">
- Micro <a
- class="text-accent"
- href="https://social.craftknight.com/users/dawid.rss"
- target="_blank"
- >
+ Micro <a class="text-accent" href="/micro/rss.xml" target="_blank">
<span class="sr-only">RSS feed</span>
<Icon aria-hidden="true" class="h-6 w-6" focusable="false" name="mdi:rss" />
</a>
diff --git a/src/pages/micro/rss.xml.ts b/src/pages/micro/rss.xml.ts
index ce25129..1fd6f53 100644
--- a/src/pages/micro/rss.xml.ts
+++ b/src/pages/micro/rss.xml.ts
@@ -1,8 +1,9 @@
import { getCollection } from "astro:content";
import rss from "@astrojs/rss";
+import type { APIContext } from "astro";
import { siteConfig } from "@/site.config";
-export const GET = async () => {
+export const GET = async (context: APIContext) => {
// Get only Pleroma posts
const allMicro = await getCollection("micro").catch(() => []); // Fallback to empty array if micro collection fails
@@ -11,15 +12,38 @@ export const GET = async () => {
(a, b) => b.data.publishDate.getTime() - a.data.publishDate.getTime(),
);
- return rss({
- title: siteConfig.title,
- description: siteConfig.description,
- site: import.meta.env.SITE,
- items: allMicroPosts.map((post) => ({
+ // Generate RSS items with full content and images
+ const items = allMicroPosts.map((post) => {
+ // Get the pre-rendered HTML from the post
+ let fullContent = post.rendered?.html || post.body || "";
+
+ // Append images if available
+ if (post.data.attachments && post.data.attachments.length > 0) {
+ const imagesHtml = post.data.attachments
+ .map(
+ (att: { url: string; type: string }) =>
+ `<p><img src="${att.url}" alt="Attachment" style="max-width: 100%; height: auto;" /></p>`,
+ )
+ .join("");
+ fullContent += imagesHtml;
+ }
+
+ return {
title: post.data.title,
pubDate: post.data.publishDate,
link: `micro/${post.id}/`,
description: post.data.description,
- })),
+ content: fullContent,
+ };
+ });
+
+ const site = context.site || import.meta.env.SITE;
+
+ return rss({
+ title: siteConfig.title,
+ description: siteConfig.description,
+ site,
+ items,
+ customData: `<atom:link href="${site}micro/rss.xml" rel="self" type="application/rss+xml" xmlns:atom="http://www.w3.org/2005/Atom" />`,
});
};
diff --git a/src/pages/rss.xml.ts b/src/pages/rss.xml.ts
index 8a6525d..39f3964 100644
--- a/src/pages/rss.xml.ts
+++ b/src/pages/rss.xml.ts
@@ -1,19 +1,21 @@
import rss from "@astrojs/rss";
+import type { APIContext } from "astro";
import { getAllPosts } from "@/data/post";
import { siteConfig } from "@/site.config";
-export const GET = async () => {
+export const GET = async (context: APIContext) => {
const posts = await getAllPosts();
return rss({
title: siteConfig.title,
description: siteConfig.description,
- site: import.meta.env.SITE,
+ site: context.site || import.meta.env.SITE,
items: posts.map((post) => ({
title: post.data.title,
description: post.data.description,
pubDate: post.data.publishDate,
link: `posts/${post.id}/`,
})),
+ customData: `<atom:link href="${context.site}rss.xml" rel="self" type="application/rss+xml" xmlns:atom="http://www.w3.org/2005/Atom" />`,
});
};