diff options
| author | Dawid Rycerz <dawid@rycerz.xyz> | 2026-01-12 18:21:12 +0100 |
|---|---|---|
| committer | Dawid Rycerz <dawid@rycerz.xyz> | 2026-01-12 19:11:50 +0100 |
| commit | 51aa63873681216026d518cde4abeca307818a4b (patch) | |
| tree | d57b19a6ed8ce2b7303191a668de6ed7f878e7a8 /src | |
| parent | 4ed3a82a8b8f111bed88559bf8d601bb7d947df9 (diff) | |
Add infinite posts downloads
Diffstat (limited to 'src')
| -rw-r--r-- | src/components/BaseHead.astro | 3 | ||||
| -rw-r--r-- | src/content.config.ts | 11 | ||||
| -rw-r--r-- | src/loaders/pleroma.ts | 155 | ||||
| -rw-r--r-- | src/pages/micro/[...page].astro | 6 | ||||
| -rw-r--r-- | src/pages/micro/rss.xml.ts | 38 | ||||
| -rw-r--r-- | src/pages/rss.xml.ts | 6 |
6 files changed, 170 insertions, 49 deletions
diff --git a/src/components/BaseHead.astro b/src/components/BaseHead.astro index d65dd9a..cb39d4a 100644 --- a/src/components/BaseHead.astro +++ b/src/components/BaseHead.astro @@ -86,6 +86,7 @@ const socialImageURL = new URL(ogImage ? ogImage : "/social-card.png", Astro.url {/* Plausible Analytics */} <link rel="dns-prefetch" href="//analytics.craftknight.com" /> <script + is:inline type="text/javascript" defer data-domain="rycerz.xyz" @@ -93,7 +94,7 @@ const socialImageURL = new URL(ogImage ? ogImage : "/social-card.png", Astro.url data-cfasync="false" src="https://analytics.craftknight.com/js/plausible.outbound-links.js?ver=2.1.3" id="plausible"></script> -<script type="text/javascript" id="plausible-analytics-js-after"> +<script is:inline type="text/javascript" id="plausible-analytics-js-after"> /* <![CDATA[ */ window.plausible = window.plausible || diff --git a/src/content.config.ts b/src/content.config.ts index 4b7b230..2ae9fbf 100644 --- a/src/content.config.ts +++ b/src/content.config.ts @@ -41,12 +41,21 @@ const micro = defineCollection({ loader: pleromaLoader({ instanceUrl: "https://social.craftknight.com", username: "dawid", - maxPosts: 50, + maxPosts: -1, // Fetch all posts allowedTags: ["#miniblog", "#vanlife", "#microblog", "#giereczkowo"], }), schema: baseSchema.extend({ description: z.string().optional(), publishDate: z.date().or(z.string().transform((val) => new Date(val))), + sourceUrl: z.string().optional(), + attachments: z + .array( + z.object({ + url: z.string(), + type: z.string(), + }), + ) + .optional(), }), }); diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts index 73d11da..7e1ccb8 100644 --- a/src/loaders/pleroma.ts +++ b/src/loaders/pleroma.ts @@ -38,6 +38,37 @@ interface PleromaStatus { visibility: string; } +/** + * Parse the Link header to extract the max_id for the next page + * Link header format: <url?max_id=123>; rel="next", <url?since_id=456>; rel="prev" + */ +function parseNextPageMaxId(linkHeader: string | null): string | null { + if (!linkHeader) { + return null; + } + + // Split by comma to get individual links + const links = linkHeader.split(","); + + for (const link of links) { + // Check if this is the "next" rel link + if (link.includes('rel="next"')) { + // Extract URL from angle brackets + const urlMatch = link.match(/<([^>]+)>/); + if (urlMatch?.[1]) { + // Parse the URL to extract max_id parameter + try { + const url = new URL(urlMatch[1]); + const maxId = url.searchParams.get("max_id"); + return maxId; + } catch {} + } + } + } + + return null; +} + async function getAccountId( instanceUrl: string, username: string, @@ -86,50 +117,108 @@ async function fetchAccountStatuses( maxPosts: number, logger: any, ): Promise<PleromaStatus[]> { - let response: Response | undefined; - let lastError: unknown; + const allStatuses: PleromaStatus[] = []; + let maxId: string | null = null; + let pageCount = 0; + const pageLimit = 40; // Mastodon/Pleroma API max per page + const fetchAll = maxPosts === -1; + + // Fetch pages until we have enough posts or no more pages available + while (fetchAll || allStatuses.length < maxPosts) { + pageCount++; + let response: Response | undefined; + let lastError: unknown; + + // Build URL with pagination parameters + // If fetching all, always use pageLimit; otherwise calculate remaining + const requestLimit = fetchAll ? pageLimit : Math.min(pageLimit, maxPosts - allStatuses.length); + const params = new URLSearchParams({ + limit: String(requestLimit), + exclude_replies: "true", + exclude_reblogs: "true", + }); - // Add retry logic for network issues - for (let attempt = 1; attempt <= 3; attempt++) { - try { - logger.info(`Attempt ${attempt} to fetch statuses...`); + if (maxId) { + params.set("max_id", maxId); + } - const statusesUrl = `${instanceUrl}/api/v1/accounts/${accountId}/statuses?limit=${maxPosts}&exclude_replies=true&exclude_reblogs=true`; + const statusesUrl = `${instanceUrl}/api/v1/accounts/${accountId}/statuses?${params.toString()}`; - // Create timeout controller - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), 10000); + // Add retry logic for network issues + for (let attempt = 1; attempt <= 3; attempt++) { + try { + const modeMsg = fetchAll ? " [fetching all posts]" : ` [target: ${maxPosts}]`; + logger.info( + `Attempt ${attempt} to fetch statuses page ${pageCount}${maxId ? ` (max_id: ${maxId})` : ""}${modeMsg}...`, + ); + + // Create timeout controller + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 10000); + + response = await fetch(statusesUrl, { + headers: { + "User-Agent": "Astro Blog (pleroma-loader)", + }, + signal: controller.signal, + }); - response = await fetch(statusesUrl, { - headers: { - "User-Agent": "Astro Blog (pleroma-loader)", - }, - signal: controller.signal, - }); + clearTimeout(timeoutId); - clearTimeout(timeoutId); + if (response.ok) { + break; // Success, exit retry loop + } + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } catch (error) { + lastError = error; + logger.warn(`Attempt ${attempt} failed: ${error}`); - if (response.ok) { - break; // Success, exit retry loop - } - throw new Error(`HTTP ${response.status}: ${response.statusText}`); - } catch (error) { - lastError = error; - logger.warn(`Attempt ${attempt} failed: ${error}`); - - if (attempt < 3) { - logger.info("Retrying in 2 seconds..."); - await new Promise((resolve) => setTimeout(resolve, 2000)); + if (attempt < 3) { + logger.info("Retrying in 2 seconds..."); + await new Promise((resolve) => setTimeout(resolve, 2000)); + } } } - } - if (!response || !response.ok) { - throw new Error(`Failed to fetch statuses after 3 attempts. Last error: ${lastError}`); + if (!response || !response.ok) { + throw new Error(`Failed to fetch statuses after 3 attempts. Last error: ${lastError}`); + } + + const statuses: PleromaStatus[] = await response.json(); + logger.info(`Fetched ${statuses.length} statuses from page ${pageCount}`); + + // If no statuses returned, we've reached the end + if (statuses.length === 0) { + logger.info("No more statuses available"); + break; + } + + // Add statuses to our accumulated list + allStatuses.push(...statuses); + + // Parse Link header to get next page max_id + const linkHeader = response.headers.get("link"); + const nextMaxId = parseNextPageMaxId(linkHeader); + + if (!nextMaxId) { + logger.info("No more pages available (no next link in header)"); + break; + } + + // If the max_id hasn't changed, we're stuck in a loop - break + if (nextMaxId === maxId) { + logger.warn("Pagination returned same max_id, stopping to prevent infinite loop"); + break; + } + + maxId = nextMaxId; } - const statuses: PleromaStatus[] = await response.json(); - return statuses; + const summaryMsg = fetchAll + ? `Total fetched: ${allStatuses.length} statuses (all available) across ${pageCount} page(s)` + : `Total fetched: ${allStatuses.length} statuses (target: ${maxPosts}) across ${pageCount} page(s)`; + logger.info(summaryMsg); + return allStatuses; } function isFilteredStatus(status: PleromaStatus): boolean { diff --git a/src/pages/micro/[...page].astro b/src/pages/micro/[...page].astro index 93a35de..edfecab 100644 --- a/src/pages/micro/[...page].astro +++ b/src/pages/micro/[...page].astro @@ -51,11 +51,7 @@ const paginationProps = { <PageLayout meta={meta}> <section> <h1 class="title mb-6 flex items-center gap-3"> - Micro <a - class="text-accent" - href="https://social.craftknight.com/users/dawid.rss" - target="_blank" - > + Micro <a class="text-accent" href="/micro/rss.xml" target="_blank"> <span class="sr-only">RSS feed</span> <Icon aria-hidden="true" class="h-6 w-6" focusable="false" name="mdi:rss" /> </a> diff --git a/src/pages/micro/rss.xml.ts b/src/pages/micro/rss.xml.ts index ce25129..1fd6f53 100644 --- a/src/pages/micro/rss.xml.ts +++ b/src/pages/micro/rss.xml.ts @@ -1,8 +1,9 @@ import { getCollection } from "astro:content"; import rss from "@astrojs/rss"; +import type { APIContext } from "astro"; import { siteConfig } from "@/site.config"; -export const GET = async () => { +export const GET = async (context: APIContext) => { // Get only Pleroma posts const allMicro = await getCollection("micro").catch(() => []); // Fallback to empty array if micro collection fails @@ -11,15 +12,38 @@ export const GET = async () => { (a, b) => b.data.publishDate.getTime() - a.data.publishDate.getTime(), ); - return rss({ - title: siteConfig.title, - description: siteConfig.description, - site: import.meta.env.SITE, - items: allMicroPosts.map((post) => ({ + // Generate RSS items with full content and images + const items = allMicroPosts.map((post) => { + // Get the pre-rendered HTML from the post + let fullContent = post.rendered?.html || post.body || ""; + + // Append images if available + if (post.data.attachments && post.data.attachments.length > 0) { + const imagesHtml = post.data.attachments + .map( + (att: { url: string; type: string }) => + `<p><img src="${att.url}" alt="Attachment" style="max-width: 100%; height: auto;" /></p>`, + ) + .join(""); + fullContent += imagesHtml; + } + + return { title: post.data.title, pubDate: post.data.publishDate, link: `micro/${post.id}/`, description: post.data.description, - })), + content: fullContent, + }; + }); + + const site = context.site || import.meta.env.SITE; + + return rss({ + title: siteConfig.title, + description: siteConfig.description, + site, + items, + customData: `<atom:link href="${site}micro/rss.xml" rel="self" type="application/rss+xml" xmlns:atom="http://www.w3.org/2005/Atom" />`, }); }; diff --git a/src/pages/rss.xml.ts b/src/pages/rss.xml.ts index 8a6525d..39f3964 100644 --- a/src/pages/rss.xml.ts +++ b/src/pages/rss.xml.ts @@ -1,19 +1,21 @@ import rss from "@astrojs/rss"; +import type { APIContext } from "astro"; import { getAllPosts } from "@/data/post"; import { siteConfig } from "@/site.config"; -export const GET = async () => { +export const GET = async (context: APIContext) => { const posts = await getAllPosts(); return rss({ title: siteConfig.title, description: siteConfig.description, - site: import.meta.env.SITE, + site: context.site || import.meta.env.SITE, items: posts.map((post) => ({ title: post.data.title, description: post.data.description, pubDate: post.data.publishDate, link: `posts/${post.id}/`, })), + customData: `<atom:link href="${context.site}rss.xml" rel="self" type="application/rss+xml" xmlns:atom="http://www.w3.org/2005/Atom" />`, }); }; |
