summaryrefslogtreecommitdiff
path: root/src/loaders
diff options
context:
space:
mode:
Diffstat (limited to 'src/loaders')
-rw-r--r--src/loaders/pleroma.ts155
1 files changed, 122 insertions, 33 deletions
diff --git a/src/loaders/pleroma.ts b/src/loaders/pleroma.ts
index 73d11da..7e1ccb8 100644
--- a/src/loaders/pleroma.ts
+++ b/src/loaders/pleroma.ts
@@ -38,6 +38,37 @@ interface PleromaStatus {
visibility: string;
}
+/**
+ * Parse the Link header to extract the max_id for the next page
+ * Link header format: <url?max_id=123>; rel="next", <url?since_id=456>; rel="prev"
+ */
+function parseNextPageMaxId(linkHeader: string | null): string | null {
+ if (!linkHeader) {
+ return null;
+ }
+
+ // Split by comma to get individual links
+ const links = linkHeader.split(",");
+
+ for (const link of links) {
+ // Check if this is the "next" rel link
+ if (link.includes('rel="next"')) {
+ // Extract URL from angle brackets
+ const urlMatch = link.match(/<([^>]+)>/);
+ if (urlMatch?.[1]) {
+ // Parse the URL to extract max_id parameter
+ try {
+ const url = new URL(urlMatch[1]);
+ const maxId = url.searchParams.get("max_id");
+ return maxId;
+ } catch {}
+ }
+ }
+ }
+
+ return null;
+}
+
async function getAccountId(
instanceUrl: string,
username: string,
@@ -86,50 +117,108 @@ async function fetchAccountStatuses(
maxPosts: number,
logger: any,
): Promise<PleromaStatus[]> {
- let response: Response | undefined;
- let lastError: unknown;
+ const allStatuses: PleromaStatus[] = [];
+ let maxId: string | null = null;
+ let pageCount = 0;
+ const pageLimit = 40; // Mastodon/Pleroma API max per page
+ const fetchAll = maxPosts === -1;
+
+ // Fetch pages until we have enough posts or no more pages available
+ while (fetchAll || allStatuses.length < maxPosts) {
+ pageCount++;
+ let response: Response | undefined;
+ let lastError: unknown;
+
+ // Build URL with pagination parameters
+ // If fetching all, always use pageLimit; otherwise calculate remaining
+ const requestLimit = fetchAll ? pageLimit : Math.min(pageLimit, maxPosts - allStatuses.length);
+ const params = new URLSearchParams({
+ limit: String(requestLimit),
+ exclude_replies: "true",
+ exclude_reblogs: "true",
+ });
- // Add retry logic for network issues
- for (let attempt = 1; attempt <= 3; attempt++) {
- try {
- logger.info(`Attempt ${attempt} to fetch statuses...`);
+ if (maxId) {
+ params.set("max_id", maxId);
+ }
- const statusesUrl = `${instanceUrl}/api/v1/accounts/${accountId}/statuses?limit=${maxPosts}&exclude_replies=true&exclude_reblogs=true`;
+ const statusesUrl = `${instanceUrl}/api/v1/accounts/${accountId}/statuses?${params.toString()}`;
- // Create timeout controller
- const controller = new AbortController();
- const timeoutId = setTimeout(() => controller.abort(), 10000);
+ // Add retry logic for network issues
+ for (let attempt = 1; attempt <= 3; attempt++) {
+ try {
+ const modeMsg = fetchAll ? " [fetching all posts]" : ` [target: ${maxPosts}]`;
+ logger.info(
+ `Attempt ${attempt} to fetch statuses page ${pageCount}${maxId ? ` (max_id: ${maxId})` : ""}${modeMsg}...`,
+ );
+
+ // Create timeout controller
+ const controller = new AbortController();
+ const timeoutId = setTimeout(() => controller.abort(), 10000);
+
+ response = await fetch(statusesUrl, {
+ headers: {
+ "User-Agent": "Astro Blog (pleroma-loader)",
+ },
+ signal: controller.signal,
+ });
- response = await fetch(statusesUrl, {
- headers: {
- "User-Agent": "Astro Blog (pleroma-loader)",
- },
- signal: controller.signal,
- });
+ clearTimeout(timeoutId);
- clearTimeout(timeoutId);
+ if (response.ok) {
+ break; // Success, exit retry loop
+ }
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+ } catch (error) {
+ lastError = error;
+ logger.warn(`Attempt ${attempt} failed: ${error}`);
- if (response.ok) {
- break; // Success, exit retry loop
- }
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
- } catch (error) {
- lastError = error;
- logger.warn(`Attempt ${attempt} failed: ${error}`);
-
- if (attempt < 3) {
- logger.info("Retrying in 2 seconds...");
- await new Promise((resolve) => setTimeout(resolve, 2000));
+ if (attempt < 3) {
+ logger.info("Retrying in 2 seconds...");
+ await new Promise((resolve) => setTimeout(resolve, 2000));
+ }
}
}
- }
- if (!response || !response.ok) {
- throw new Error(`Failed to fetch statuses after 3 attempts. Last error: ${lastError}`);
+ if (!response || !response.ok) {
+ throw new Error(`Failed to fetch statuses after 3 attempts. Last error: ${lastError}`);
+ }
+
+ const statuses: PleromaStatus[] = await response.json();
+ logger.info(`Fetched ${statuses.length} statuses from page ${pageCount}`);
+
+ // If no statuses returned, we've reached the end
+ if (statuses.length === 0) {
+ logger.info("No more statuses available");
+ break;
+ }
+
+ // Add statuses to our accumulated list
+ allStatuses.push(...statuses);
+
+ // Parse Link header to get next page max_id
+ const linkHeader = response.headers.get("link");
+ const nextMaxId = parseNextPageMaxId(linkHeader);
+
+ if (!nextMaxId) {
+ logger.info("No more pages available (no next link in header)");
+ break;
+ }
+
+ // If the max_id hasn't changed, we're stuck in a loop - break
+ if (nextMaxId === maxId) {
+ logger.warn("Pagination returned same max_id, stopping to prevent infinite loop");
+ break;
+ }
+
+ maxId = nextMaxId;
}
- const statuses: PleromaStatus[] = await response.json();
- return statuses;
+ const summaryMsg = fetchAll
+ ? `Total fetched: ${allStatuses.length} statuses (all available) across ${pageCount} page(s)`
+ : `Total fetched: ${allStatuses.length} statuses (target: ${maxPosts}) across ${pageCount} page(s)`;
+ logger.info(summaryMsg);
+ return allStatuses;
}
function isFilteredStatus(status: PleromaStatus): boolean {