use crate::state; use anyhow::{Context as _, Result}; use log::{debug, info, warn}; use std::path::Path; /// Result of a cleanup operation. #[derive(Debug, Default)] pub struct CleanupResult { /// Number of build directories removed. pub builds_removed: u32, /// Number of log files removed. pub logs_removed: u32, } /// Clean up old build directories and their corresponding log files. /// /// Keeps the `max_to_keep` most recent builds and removes older ones. /// Also removes the corresponding log files for each removed build. /// /// # Arguments /// * `base_dir` - Base witryna directory (e.g., /var/lib/witryna) /// * `log_dir` - Log directory (e.g., /var/log/witryna) /// * `site_name` - The site name /// * `max_to_keep` - Maximum number of builds to keep (0 = keep all) /// /// # Errors /// /// Returns an error if the builds directory cannot be listed. Individual /// removal failures are logged as warnings but do not cause the function /// to return an error. pub async fn cleanup_old_builds( base_dir: &Path, log_dir: &Path, site_name: &str, max_to_keep: u32, ) -> Result { // If max_to_keep is 0, keep all builds if max_to_keep == 0 { debug!("[{site_name}] max_builds_to_keep is 0, skipping cleanup"); return Ok(CleanupResult::default()); } let builds_dir = base_dir.join("builds").join(site_name); let site_log_dir = log_dir.join(site_name); // Check if builds directory exists if !builds_dir.exists() { debug!("[{site_name}] builds directory does not exist, skipping cleanup"); return Ok(CleanupResult::default()); } // List all build directories (excluding 'current' symlink) let mut build_timestamps = list_build_timestamps(&builds_dir).await?; // Sort in descending order (newest first) build_timestamps.sort_by(|a, b| b.cmp(a)); let mut result = CleanupResult::default(); // Calculate how many to remove let to_remove = build_timestamps.len().saturating_sub(max_to_keep as usize); if to_remove == 0 { debug!( "[{site_name}] no builds to remove: count={} max={max_to_keep}", build_timestamps.len() ); } // Remove oldest builds (they're at the end after reverse sort) let mut removed_timestamps = Vec::new(); for timestamp in build_timestamps.iter().skip(max_to_keep as usize) { let build_path = builds_dir.join(timestamp); let log_path = site_log_dir.join(format!("{timestamp}.log")); // Remove build directory match tokio::fs::remove_dir_all(&build_path).await { Ok(()) => { debug!("removed old build: {}", build_path.display()); result.builds_removed += 1; removed_timestamps.push(timestamp.clone()); } Err(e) => { warn!( "failed to remove old build: path={} error={e}", build_path.display() ); } } // Remove corresponding log file (if exists) if log_path.exists() { match tokio::fs::remove_file(&log_path).await { Ok(()) => { debug!("removed old log: {}", log_path.display()); result.logs_removed += 1; } Err(e) => { warn!( "failed to remove old log: path={} error={e}", log_path.display() ); } } } // Remove corresponding hook log file (if exists) let hook_log_path = site_log_dir.join(format!("{timestamp}-hook.log")); match tokio::fs::remove_file(&hook_log_path).await { Ok(()) => { debug!("removed old hook log: {}", hook_log_path.display()); result.logs_removed += 1; } Err(e) if e.kind() == std::io::ErrorKind::NotFound => { // Not every build has a hook — silently skip } Err(e) => { warn!( "failed to remove old hook log: path={} error={e}", hook_log_path.display() ); } } } // Prune removed builds from state.json state::remove_builds(base_dir, site_name, &removed_timestamps).await; // Remove orphaned temp files (crash recovery) if site_log_dir.exists() && let Ok(mut entries) = tokio::fs::read_dir(&site_log_dir).await { while let Ok(Some(entry)) = entries.next_entry().await { let name = entry.file_name(); if name.to_string_lossy().ends_with(".tmp") { let path = entry.path(); match tokio::fs::remove_file(&path).await { Ok(()) => { debug!("removed orphaned temp file: {}", path.display()); } Err(e) => { warn!( "failed to remove orphaned temp file: path={} error={e}", path.display() ); } } } } } if result.builds_removed > 0 || result.logs_removed > 0 { info!( "[{site_name}] cleanup completed: builds_removed={} logs_removed={}", result.builds_removed, result.logs_removed ); } Ok(result) } /// List all build timestamps in a builds directory. /// /// Returns directory names that look like timestamps, excluding 'current' symlink. /// /// # Errors /// /// Returns an error if the builds directory cannot be read or entries cannot be inspected. pub async fn list_build_timestamps(builds_dir: &Path) -> Result> { let mut timestamps = Vec::new(); let mut entries = tokio::fs::read_dir(builds_dir) .await .with_context(|| format!("failed to read builds directory: {}", builds_dir.display()))?; while let Some(entry) = entries.next_entry().await? { let name = entry.file_name(); let name_str = name.to_string_lossy(); // Skip 'current' symlink and any other non-timestamp entries if name_str == "current" { continue; } // Verify it's a directory (not a file or broken symlink) let file_type = entry.file_type().await?; if !file_type.is_dir() { continue; } // Basic timestamp format validation: YYYYMMDD-HHMMSS-... if looks_like_timestamp(&name_str) { timestamps.push(name_str.to_string()); } } Ok(timestamps) } /// Check if a string looks like a valid timestamp format. /// /// Expected format: YYYYMMDD-HHMMSS-microseconds (e.g., 20260126-143000-123456) #[must_use] pub fn looks_like_timestamp(s: &str) -> bool { let parts: Vec<&str> = s.split('-').collect(); let [date, time, micros, ..] = parts.as_slice() else { return false; }; // First part should be 8 digits (YYYYMMDD) if date.len() != 8 || !date.chars().all(|c| c.is_ascii_digit()) { return false; } // Second part should be 6 digits (HHMMSS) if time.len() != 6 || !time.chars().all(|c| c.is_ascii_digit()) { return false; } // Third part should be microseconds (digits) if micros.is_empty() || !micros.chars().all(|c| c.is_ascii_digit()) { return false; } true } #[cfg(test)] #[allow(clippy::unwrap_used, clippy::indexing_slicing)] mod tests { use super::*; use crate::test_support::{cleanup, temp_dir}; use tokio::fs; async fn create_build_and_log(base_dir: &Path, log_dir: &Path, site: &str, timestamp: &str) { let build_dir = base_dir.join("builds").join(site).join(timestamp); let site_log_dir = log_dir.join(site); let log_file = site_log_dir.join(format!("{timestamp}.log")); fs::create_dir_all(&build_dir).await.unwrap(); fs::create_dir_all(&site_log_dir).await.unwrap(); fs::write(&log_file, "test log content").await.unwrap(); fs::write(build_dir.join("index.html"), "") .await .unwrap(); } #[tokio::test] async fn cleanup_removes_old_builds_and_logs() { let base_dir = temp_dir("cleanup-test").await; let log_dir = base_dir.join("logs"); let site = "test-site"; // Create 7 builds (keep 5, remove 2) let timestamps = [ "20260126-100000-000001", "20260126-100000-000002", "20260126-100000-000003", "20260126-100000-000004", "20260126-100000-000005", "20260126-100000-000006", "20260126-100000-000007", ]; for ts in ×tamps { create_build_and_log(&base_dir, &log_dir, site, ts).await; } let result = cleanup_old_builds(&base_dir, &log_dir, site, 5).await; assert!(result.is_ok(), "cleanup should succeed: {result:?}"); let result = result.unwrap(); assert_eq!(result.builds_removed, 2, "should remove 2 builds"); assert_eq!(result.logs_removed, 2, "should remove 2 logs"); // Verify oldest 2 are gone let builds_dir = base_dir.join("builds").join(site); assert!(!builds_dir.join("20260126-100000-000001").exists()); assert!(!builds_dir.join("20260126-100000-000002").exists()); // Verify newest 5 remain assert!(builds_dir.join("20260126-100000-000003").exists()); assert!(builds_dir.join("20260126-100000-000007").exists()); // Verify log cleanup let site_logs = log_dir.join(site); assert!(!site_logs.join("20260126-100000-000001.log").exists()); assert!(site_logs.join("20260126-100000-000003.log").exists()); cleanup(&base_dir).await; } #[tokio::test] async fn cleanup_with_fewer_builds_than_max() { let base_dir = temp_dir("cleanup-test").await; let log_dir = base_dir.join("logs"); let site = "test-site"; // Create only 3 builds (max is 5) for ts in &[ "20260126-100000-000001", "20260126-100000-000002", "20260126-100000-000003", ] { create_build_and_log(&base_dir, &log_dir, site, ts).await; } let result = cleanup_old_builds(&base_dir, &log_dir, site, 5).await; assert!(result.is_ok()); let result = result.unwrap(); assert_eq!(result.builds_removed, 0, "should not remove any builds"); assert_eq!(result.logs_removed, 0, "should not remove any logs"); // Verify all builds remain let builds_dir = base_dir.join("builds").join(site); assert!(builds_dir.join("20260126-100000-000001").exists()); assert!(builds_dir.join("20260126-100000-000002").exists()); assert!(builds_dir.join("20260126-100000-000003").exists()); cleanup(&base_dir).await; } #[tokio::test] async fn cleanup_preserves_current_symlink() { let base_dir = temp_dir("cleanup-test").await; let log_dir = base_dir.join("logs"); let site = "test-site"; // Create builds create_build_and_log(&base_dir, &log_dir, site, "20260126-100000-000001").await; create_build_and_log(&base_dir, &log_dir, site, "20260126-100000-000002").await; create_build_and_log(&base_dir, &log_dir, site, "20260126-100000-000003").await; // Create 'current' symlink let builds_dir = base_dir.join("builds").join(site); let current = builds_dir.join("current"); let target = builds_dir.join("20260126-100000-000003"); tokio::fs::symlink(&target, ¤t).await.unwrap(); let result = cleanup_old_builds(&base_dir, &log_dir, site, 2).await; assert!(result.is_ok()); let result = result.unwrap(); assert_eq!(result.builds_removed, 1, "should remove 1 build"); // Verify symlink still exists and points correctly assert!(current.exists(), "current symlink should exist"); let link_target = fs::read_link(¤t).await.unwrap(); assert_eq!(link_target, target); cleanup(&base_dir).await; } #[tokio::test] async fn cleanup_handles_missing_logs_gracefully() { let base_dir = temp_dir("cleanup-test").await; let log_dir = base_dir.join("logs"); let site = "test-site"; // Create builds but only some logs let builds_dir = base_dir.join("builds").join(site); fs::create_dir_all(builds_dir.join("20260126-100000-000001")) .await .unwrap(); fs::create_dir_all(builds_dir.join("20260126-100000-000002")) .await .unwrap(); fs::create_dir_all(builds_dir.join("20260126-100000-000003")) .await .unwrap(); // Only create log for one build let site_logs = log_dir.join(site); fs::create_dir_all(&site_logs).await.unwrap(); fs::write(site_logs.join("20260126-100000-000001.log"), "log") .await .unwrap(); let result = cleanup_old_builds(&base_dir, &log_dir, site, 2).await; assert!(result.is_ok(), "should succeed even with missing logs"); let result = result.unwrap(); assert_eq!(result.builds_removed, 1, "should remove 1 build"); assert_eq!(result.logs_removed, 1, "should remove 1 log"); cleanup(&base_dir).await; } #[tokio::test] async fn cleanup_with_max_zero_keeps_all() { let base_dir = temp_dir("cleanup-test").await; let log_dir = base_dir.join("logs"); let site = "test-site"; // Create builds for ts in &[ "20260126-100000-000001", "20260126-100000-000002", "20260126-100000-000003", ] { create_build_and_log(&base_dir, &log_dir, site, ts).await; } let result = cleanup_old_builds(&base_dir, &log_dir, site, 0).await; assert!(result.is_ok()); let result = result.unwrap(); assert_eq!(result.builds_removed, 0, "max 0 should keep all"); assert_eq!(result.logs_removed, 0); // Verify all builds remain let builds_dir = base_dir.join("builds").join(site); assert!(builds_dir.join("20260126-100000-000001").exists()); assert!(builds_dir.join("20260126-100000-000002").exists()); assert!(builds_dir.join("20260126-100000-000003").exists()); cleanup(&base_dir).await; } #[tokio::test] async fn cleanup_nonexistent_builds_dir() { let base_dir = temp_dir("cleanup-test").await; let site = "nonexistent-site"; let log_dir = base_dir.join("logs"); let result = cleanup_old_builds(&base_dir, &log_dir, site, 5).await; assert!(result.is_ok(), "should succeed for nonexistent dir"); let result = result.unwrap(); assert_eq!(result.builds_removed, 0); assert_eq!(result.logs_removed, 0); cleanup(&base_dir).await; } #[tokio::test] async fn cleanup_does_not_delete_state_json() { let base_dir = temp_dir("cleanup-test").await; let log_dir = base_dir.join("logs"); let site = "test-site"; // Create 3 builds (keep 1 → remove 2) for ts in &[ "20260126-100000-000001", "20260126-100000-000002", "20260126-100000-000003", ] { create_build_and_log(&base_dir, &log_dir, site, ts).await; } // Write a state.json in the builds dir let state_path = base_dir.join("builds").join(site).join("state.json"); fs::write(&state_path, r#"{"status":"success"}"#) .await .unwrap(); let result = cleanup_old_builds(&base_dir, &log_dir, site, 1).await; assert!(result.is_ok()); let result = result.unwrap(); assert_eq!(result.builds_removed, 2); // state.json must still exist assert!(state_path.exists(), "state.json must not be deleted"); cleanup(&base_dir).await; } #[tokio::test] async fn cleanup_removes_orphaned_tmp_files() { let base_dir = temp_dir("cleanup-test").await; let log_dir = base_dir.join("logs"); let site = "test-site"; // Create a build so cleanup runs create_build_and_log(&base_dir, &log_dir, site, "20260126-100000-000001").await; // Create orphaned temp files in site log dir let site_log_dir = log_dir.join(site); fs::write( site_log_dir.join("20260126-100000-000001-stdout.tmp"), "orphan", ) .await .unwrap(); fs::write( site_log_dir.join("20260126-100000-000001-stderr.tmp"), "orphan", ) .await .unwrap(); fs::write(site_log_dir.join("random.tmp"), "orphan") .await .unwrap(); assert!( site_log_dir .join("20260126-100000-000001-stdout.tmp") .exists() ); // Run cleanup (max_to_keep=5 means no builds removed, but tmp files should go) let result = cleanup_old_builds(&base_dir, &log_dir, site, 5).await; assert!(result.is_ok()); // Temp files should be gone assert!( !site_log_dir .join("20260126-100000-000001-stdout.tmp") .exists() ); assert!( !site_log_dir .join("20260126-100000-000001-stderr.tmp") .exists() ); assert!(!site_log_dir.join("random.tmp").exists()); // Log file should still exist assert!(site_log_dir.join("20260126-100000-000001.log").exists()); cleanup(&base_dir).await; } }