diff options
Diffstat (limited to 'src/git.rs')
| -rw-r--r-- | src/git.rs | 1320 |
1 files changed, 1320 insertions, 0 deletions
diff --git a/src/git.rs b/src/git.rs new file mode 100644 index 0000000..2193add --- /dev/null +++ b/src/git.rs @@ -0,0 +1,1320 @@ +use anyhow::{Context as _, Result, bail}; +use std::path::Path; +use std::time::Duration; +use tokio::process::Command; +use tracing::{debug, error, info, warn}; + +/// Default timeout for git operations (used when not configured). +pub const GIT_TIMEOUT_DEFAULT: Duration = Duration::from_secs(60); + +/// Default git clone depth (shallow clone with 1 commit). +pub const GIT_DEPTH_DEFAULT: u32 = 1; + +/// Timeout for LFS operations (longer due to large file downloads) +const LFS_TIMEOUT: Duration = Duration::from_secs(300); + +/// LFS pointer file signature (per Git LFS spec) +const LFS_POINTER_SIGNATURE: &str = "version https://git-lfs.github.com/spec/v1"; + +/// Maximum size for a valid LFS pointer file (per spec) +const LFS_POINTER_MAX_SIZE: u64 = 1024; + +/// Create a git Command with clean environment isolation. +/// +/// Strips `GIT_DIR`, `GIT_WORK_TREE`, and `GIT_INDEX_FILE` so that git +/// discovers the repository from the working directory set via +/// `.current_dir()`, not from inherited environment variables. +/// +/// This is defensive: in production these vars are never set, but it +/// prevents failures when tests run inside git hooks (e.g., a pre-commit +/// hook that invokes `cargo test`). +fn git_command() -> Command { + let mut cmd = Command::new("git"); + cmd.env_remove("GIT_DIR") + .env_remove("GIT_WORK_TREE") + .env_remove("GIT_INDEX_FILE"); + cmd +} + +/// Create a git Command that allows the file:// protocol. +/// +/// Git ≥ 2.38.1 disables file:// by default (CVE-2022-39253), but the +/// restriction targets local-clone hardlink attacks, not file:// transport. +/// Submodule URLs come from the trusted config, so this is safe. +/// Used only for submodule operations whose internal clones may use file://. +fn git_command_allow_file_transport() -> Command { + let mut cmd = git_command(); + cmd.env("GIT_CONFIG_COUNT", "1") + .env("GIT_CONFIG_KEY_0", "protocol.file.allow") + .env("GIT_CONFIG_VALUE_0", "always"); + cmd +} + +/// Run a git command with timeout and standard error handling. +/// +/// Builds a `git` `Command`, optionally sets the working directory, +/// enforces a timeout, and converts non-zero exit into an `anyhow` error. +async fn run_git(args: &[&str], dir: Option<&Path>, timeout: Duration, op: &str) -> Result<()> { + run_git_cmd(git_command(), args, dir, timeout, op).await +} + +/// Like [`run_git`] but uses a pre-built `Command` (e.g. one that allows +/// the file:// protocol for submodule clones). +async fn run_git_cmd( + mut cmd: Command, + args: &[&str], + dir: Option<&Path>, + timeout: Duration, + op: &str, +) -> Result<()> { + cmd.args(args); + if let Some(d) = dir { + cmd.current_dir(d); + } + + let output = tokio::time::timeout(timeout, cmd.output()) + .await + .with_context(|| format!("{op} timed out"))? + .with_context(|| format!("failed to execute {op}"))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!("{op} failed: {}", stderr.trim()); + } + + Ok(()) +} + +/// Synchronize a Git repository: clone if not exists, pull if exists. +/// Automatically initializes submodules and fetches LFS objects if needed. +/// +/// # Errors +/// +/// Returns an error if the clone, pull, submodule init, or LFS fetch fails. +pub async fn sync_repo( + repo_url: &str, + branch: &str, + clone_dir: &Path, + timeout: Duration, + depth: u32, +) -> Result<()> { + let is_pull = clone_dir.exists(); + + if is_pull { + pull(clone_dir, branch, timeout, depth).await?; + } else if let Err(e) = clone(repo_url, branch, clone_dir, timeout, depth).await { + if clone_dir.exists() { + warn!(path = %clone_dir.display(), "cleaning up partial clone after failure"); + if let Err(cleanup_err) = tokio::fs::remove_dir_all(clone_dir).await { + error!(path = %clone_dir.display(), error = %cleanup_err, + "failed to clean up partial clone"); + } + } + return Err(e); + } + + // Initialize submodules before LFS (submodule files may contain LFS pointers) + maybe_init_submodules(clone_dir, timeout, depth, is_pull).await?; + + // Handle LFS after clone/pull + submodules + maybe_fetch_lfs(clone_dir).await?; + + Ok(()) +} + +/// Check if the remote branch has new commits compared to local HEAD. +/// Returns `Ok(true)` if new commits are available, `Ok(false)` if up-to-date. +/// +/// This function: +/// 1. Returns true if `clone_dir` doesn't exist (needs initial clone) +/// 2. Runs `git fetch` to update remote refs (with `--depth` if depth > 0) +/// 3. Compares local HEAD with `origin/{branch}` +/// 4. Does NOT modify the working directory (no reset/checkout) +/// +/// # Errors +/// +/// Returns an error if git fetch or rev-parse fails. +pub async fn has_remote_changes( + clone_dir: &Path, + branch: &str, + timeout: Duration, + depth: u32, +) -> Result<bool> { + // If clone directory doesn't exist, treat as "needs update" + if !clone_dir.exists() { + debug!(path = %clone_dir.display(), "clone directory does not exist, needs initial clone"); + return Ok(true); + } + + // Fetch from remote (update refs only, no working tree changes) + debug!(path = %clone_dir.display(), branch, "fetching remote refs"); + let depth_str = depth.to_string(); + let mut fetch_args = vec!["fetch"]; + if depth > 0 { + fetch_args.push("--depth"); + fetch_args.push(&depth_str); + } + fetch_args.extend_from_slice(&["origin", branch]); + run_git(&fetch_args, Some(clone_dir), timeout, "git fetch").await?; + + // Get local HEAD commit + let local_head = get_commit_hash(clone_dir, "HEAD").await?; + + // Get remote branch commit + let remote_ref = format!("origin/{branch}"); + let remote_head = get_commit_hash(clone_dir, &remote_ref).await?; + + debug!( + path = %clone_dir.display(), + local = %local_head, + remote = %remote_head, + "comparing commits" + ); + + Ok(local_head != remote_head) +} + +/// Get the full commit hash for a ref (HEAD, branch name, etc.) +async fn get_commit_hash(clone_dir: &Path, ref_name: &str) -> Result<String> { + let output = git_command() + .args(["rev-parse", ref_name]) + .current_dir(clone_dir) + .output() + .await + .context("failed to execute git rev-parse")?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!("git rev-parse {} failed: {}", ref_name, stderr.trim()); + } + + Ok(String::from_utf8_lossy(&output.stdout).trim().to_owned()) +} + +async fn clone( + repo_url: &str, + branch: &str, + clone_dir: &Path, + timeout: Duration, + depth: u32, +) -> Result<()> { + info!(repo_url, branch, path = %clone_dir.display(), "cloning repository"); + + // Create parent directory if needed + if let Some(parent) = clone_dir.parent() { + tokio::fs::create_dir_all(parent) + .await + .with_context(|| format!("failed to create parent directory: {}", parent.display()))?; + } + + let clone_dir_str = clone_dir.display().to_string(); + let depth_str = depth.to_string(); + let mut args = vec!["clone", "--branch", branch, "--single-branch"]; + if depth > 0 { + args.push("--depth"); + args.push(&depth_str); + } + args.push(repo_url); + args.push(clone_dir_str.as_str()); + run_git(&args, None, timeout, "git clone").await?; + + debug!(path = %clone_dir.display(), "clone completed"); + Ok(()) +} + +async fn pull(clone_dir: &Path, branch: &str, timeout: Duration, depth: u32) -> Result<()> { + info!(branch, path = %clone_dir.display(), "pulling latest changes"); + + // Fetch from origin (shallow or full depending on depth) + let depth_str = depth.to_string(); + let mut fetch_args = vec!["fetch"]; + if depth > 0 { + fetch_args.push("--depth"); + fetch_args.push(&depth_str); + } + fetch_args.extend_from_slice(&["origin", branch]); + run_git(&fetch_args, Some(clone_dir), timeout, "git fetch").await?; + + // Reset to origin/branch to discard any local changes + let reset_ref = format!("origin/{branch}"); + run_git( + &["reset", "--hard", &reset_ref], + Some(clone_dir), + timeout, + "git reset", + ) + .await?; + + debug!(path = %clone_dir.display(), "pull completed"); + Ok(()) +} + +/// Check if the repository has LFS configured via .gitattributes. +async fn has_lfs_configured(clone_dir: &Path) -> bool { + let gitattributes = clone_dir.join(".gitattributes"); + + tokio::fs::read_to_string(&gitattributes) + .await + .is_ok_and(|content| content.contains("filter=lfs")) +} + +/// Scan repository for LFS pointer files. +/// Returns true if any tracked file matches the LFS pointer signature. +async fn has_lfs_pointers(clone_dir: &Path) -> Result<bool> { + // Use git ls-files to get tracked files + let output = git_command() + .args(["ls-files", "-z"]) // -z for null-separated output + .current_dir(clone_dir) + .output() + .await + .context("failed to list git files")?; + + if !output.status.success() { + // If ls-files fails, assume pointers might exist (conservative) + return Ok(true); + } + + let files_str = String::from_utf8_lossy(&output.stdout); + + for file_path in files_str.split('\0').filter(|s| !s.is_empty()) { + let full_path = clone_dir.join(file_path); + + // Check file size first (pointer files are < 1024 bytes) + let Ok(metadata) = tokio::fs::metadata(&full_path).await else { + continue; + }; + if metadata.len() >= LFS_POINTER_MAX_SIZE || !metadata.is_file() { + continue; + } + + // Read and check for LFS signature + let Ok(content) = tokio::fs::read_to_string(&full_path).await else { + continue; + }; + if content.starts_with(LFS_POINTER_SIGNATURE) { + debug!(file = %file_path, "found LFS pointer"); + return Ok(true); + } + } + + Ok(false) +} + +async fn is_lfs_available() -> bool { + git_command() + .args(["lfs", "version"]) + .output() + .await + .map(|o| o.status.success()) + .unwrap_or(false) +} + +async fn lfs_pull(clone_dir: &Path) -> Result<()> { + info!(path = %clone_dir.display(), "fetching LFS objects"); + + run_git( + &["lfs", "pull"], + Some(clone_dir), + LFS_TIMEOUT, + "git lfs pull", + ) + .await?; + + debug!(path = %clone_dir.display(), "LFS pull completed"); + Ok(()) +} + +/// Detect and fetch LFS objects if needed. +/// +/// Detection strategy: +/// 1. Check .gitattributes for `filter=lfs` +/// 2. If configured, scan for actual pointer files +/// 3. If pointers exist, verify git-lfs is available +/// 4. Run `git lfs pull` to fetch objects +async fn maybe_fetch_lfs(clone_dir: &Path) -> Result<()> { + // Step 1: Quick check for LFS configuration + if !has_lfs_configured(clone_dir).await { + debug!(path = %clone_dir.display(), "no LFS configuration found"); + return Ok(()); + } + + info!(path = %clone_dir.display(), "LFS configured, checking for pointers"); + + // Step 2: Scan for actual pointer files + match has_lfs_pointers(clone_dir).await { + Ok(true) => { + // Pointers found, need to fetch + } + Ok(false) => { + debug!(path = %clone_dir.display(), "no LFS pointers found"); + return Ok(()); + } + Err(e) => { + // If scan fails, try to fetch anyway (conservative approach) + debug!(error = %e, "LFS pointer scan failed, attempting fetch"); + } + } + + // Step 3: Verify git-lfs is available + if !is_lfs_available().await { + bail!("repository requires git-lfs but git-lfs is not installed"); + } + + // Step 4: Fetch LFS objects + lfs_pull(clone_dir).await +} + +/// Check if the repository has submodules configured via .gitmodules. +async fn has_submodules(clone_dir: &Path) -> bool { + let gitmodules = clone_dir.join(".gitmodules"); + tokio::fs::read_to_string(&gitmodules) + .await + .is_ok_and(|content| !content.trim().is_empty()) +} + +/// Detect and initialize submodules if needed. +/// +/// Detection: checks for `.gitmodules` (single stat call when absent). +/// On pull: runs `git submodule sync --recursive` first to handle URL changes. +/// Then: `git submodule update --init --recursive [--depth 1]`. +async fn maybe_init_submodules( + clone_dir: &Path, + timeout: Duration, + depth: u32, + is_pull: bool, +) -> Result<()> { + if !has_submodules(clone_dir).await { + debug!(path = %clone_dir.display(), "no submodules configured"); + return Ok(()); + } + + info!(path = %clone_dir.display(), "submodules detected, initializing"); + + // On pull, sync URLs first (handles upstream submodule URL changes) + if is_pull { + run_git( + &["submodule", "sync", "--recursive"], + Some(clone_dir), + timeout, + "git submodule sync", + ) + .await?; + } + + // Initialize and update submodules. + // Uses file-transport-allowing command because `git submodule update` + // internally clones each submodule, and URLs may use the file:// scheme. + let depth_str = depth.to_string(); + let mut args = vec!["submodule", "update", "--init", "--recursive"]; + if depth > 0 { + args.push("--depth"); + args.push(&depth_str); + } + run_git_cmd( + git_command_allow_file_transport(), + &args, + Some(clone_dir), + timeout, + "git submodule update", + ) + .await?; + + debug!(path = %clone_dir.display(), "submodule initialization completed"); + Ok(()) +} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::expect_used)] +mod tests { + use super::*; + use crate::test_support::{cleanup, temp_dir}; + use tokio::fs; + use tokio::process::Command; + + /// Alias for `git_command_allow_file_transport()` — tests use file:// + /// URLs for bare repos, so the file protocol must be allowed. + fn git_cmd() -> Command { + git_command_allow_file_transport() + } + + async fn configure_test_git_user(dir: &Path) { + git_cmd() + .args(["config", "user.email", "test@test.com"]) + .current_dir(dir) + .output() + .await + .unwrap(); + git_cmd() + .args(["config", "user.name", "Test"]) + .current_dir(dir) + .output() + .await + .unwrap(); + } + + /// Create a local bare git repository with an initial commit on the specified branch. + /// Returns a file:// URL that works with git clone --depth 1. + async fn create_local_repo(temp: &Path, branch: &str) -> String { + let bare_repo = temp.join("origin.git"); + fs::create_dir_all(&bare_repo).await.unwrap(); + + // Initialize bare repo with explicit initial branch + let output = git_cmd() + .args(["init", "--bare", "--initial-branch", branch]) + .current_dir(&bare_repo) + .output() + .await + .unwrap(); + assert!(output.status.success(), "git init failed"); + + // Create a working copy to make initial commit + let work_dir = temp.join("work"); + let output = git_cmd() + .args([ + "clone", + bare_repo.to_str().unwrap(), + work_dir.to_str().unwrap(), + ]) + .output() + .await + .unwrap(); + assert!( + output.status.success(), + "git clone failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + // Configure git user for commit + configure_test_git_user(&work_dir).await; + + // Checkout the target branch (in case clone defaulted to something else) + let output = git_cmd() + .args(["checkout", "-B", branch]) + .current_dir(&work_dir) + .output() + .await + .unwrap(); + assert!( + output.status.success(), + "git checkout failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + // Create initial commit + fs::write(work_dir.join("README.md"), "# Test Repo") + .await + .unwrap(); + let output = git_cmd() + .args(["add", "README.md"]) + .current_dir(&work_dir) + .output() + .await + .unwrap(); + assert!(output.status.success(), "git add failed"); + + let output = git_cmd() + .args(["commit", "-m", "Initial commit"]) + .current_dir(&work_dir) + .output() + .await + .unwrap(); + assert!( + output.status.success(), + "git commit failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + // Push to origin + let output = git_cmd() + .args(["push", "-u", "origin", branch]) + .current_dir(&work_dir) + .output() + .await + .unwrap(); + assert!( + output.status.success(), + "git push failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + // Clean up working copy + let _ = fs::remove_dir_all(&work_dir).await; + + // Return file:// URL so --depth works correctly + format!("file://{}", bare_repo.to_str().unwrap()) + } + + #[tokio::test] + async fn clone_creates_directory_and_clones_repo() { + let temp = temp_dir("git-test").await; + let repo_url = create_local_repo(&temp, "main").await; + let clone_dir = temp.join("test-repo"); + + let result = clone(&repo_url, "main", &clone_dir, GIT_TIMEOUT_DEFAULT, 1).await; + + assert!(result.is_ok(), "clone should succeed: {result:?}"); + assert!(clone_dir.exists(), "clone directory should exist"); + assert!( + clone_dir.join(".git").exists(), + ".git directory should exist" + ); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn clone_invalid_url_returns_error() { + let temp = temp_dir("git-test").await; + let clone_dir = temp.join("invalid-repo"); + + let result = clone( + "/nonexistent/path/to/repo.git", + "main", + &clone_dir, + GIT_TIMEOUT_DEFAULT, + 1, + ) + .await; + + assert!(result.is_err(), "clone should fail for invalid URL"); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn clone_invalid_branch_returns_error() { + let temp = temp_dir("git-test").await; + let repo_url = create_local_repo(&temp, "main").await; + let clone_dir = temp.join("invalid-branch"); + + let result = clone( + &repo_url, + "nonexistent-branch-xyz", + &clone_dir, + GIT_TIMEOUT_DEFAULT, + 1, + ) + .await; + + assert!(result.is_err(), "clone should fail for invalid branch"); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn pull_updates_existing_repo() { + let temp = temp_dir("git-test").await; + let repo_url = create_local_repo(&temp, "main").await; + let clone_dir = temp.join("pull-test"); + + // First clone + clone(&repo_url, "main", &clone_dir, GIT_TIMEOUT_DEFAULT, 1) + .await + .expect("initial clone should succeed"); + + // Push a new commit to origin + let work_dir = temp.join("work-pull"); + push_new_commit(&repo_url, &work_dir, "pulled.txt", "pulled content").await; + + // Pull should fetch the new commit + pull(&clone_dir, "main", GIT_TIMEOUT_DEFAULT, 1) + .await + .expect("pull should succeed"); + + // Verify the new file appeared in the working copy + let pulled_file = clone_dir.join("pulled.txt"); + assert!(pulled_file.exists(), "pulled file should exist after pull"); + let content = fs::read_to_string(&pulled_file).await.unwrap(); + assert_eq!(content, "pulled content"); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn pull_invalid_branch_returns_error() { + let temp = temp_dir("git-test").await; + let repo_url = create_local_repo(&temp, "main").await; + let clone_dir = temp.join("pull-invalid-branch"); + + // First clone + clone(&repo_url, "main", &clone_dir, GIT_TIMEOUT_DEFAULT, 1) + .await + .expect("initial clone should succeed"); + + // Pull with invalid branch + let result = pull(&clone_dir, "nonexistent-branch-xyz", GIT_TIMEOUT_DEFAULT, 1).await; + + assert!(result.is_err(), "pull should fail for invalid branch"); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn sync_repo_clones_when_not_exists() { + let temp = temp_dir("git-test").await; + let repo_url = create_local_repo(&temp, "main").await; + let clone_dir = temp.join("sync-clone"); + + let result = sync_repo(&repo_url, "main", &clone_dir, GIT_TIMEOUT_DEFAULT, 1).await; + + assert!(result.is_ok(), "sync should succeed: {result:?}"); + assert!(clone_dir.exists(), "clone directory should exist"); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn sync_repo_pulls_when_exists() { + let temp = temp_dir("git-test").await; + let repo_url = create_local_repo(&temp, "main").await; + let clone_dir = temp.join("sync-pull"); + + // First sync (clone) + sync_repo(&repo_url, "main", &clone_dir, GIT_TIMEOUT_DEFAULT, 1) + .await + .expect("initial sync should succeed"); + + // Push a new commit to origin + let work_dir = temp.join("work-sync"); + push_new_commit(&repo_url, &work_dir, "synced.txt", "synced content").await; + + // Second sync should pull the new commit + sync_repo(&repo_url, "main", &clone_dir, GIT_TIMEOUT_DEFAULT, 1) + .await + .expect("second sync should succeed"); + + // Verify the new file appeared + let synced_file = clone_dir.join("synced.txt"); + assert!(synced_file.exists(), "synced file should exist after pull"); + let content = fs::read_to_string(&synced_file).await.unwrap(); + assert_eq!(content, "synced content"); + + cleanup(&temp).await; + } + + // LFS tests + + #[tokio::test] + async fn has_lfs_configured_with_lfs() { + let temp = temp_dir("git-test").await; + fs::write( + temp.join(".gitattributes"), + "*.bin filter=lfs diff=lfs merge=lfs -text\n", + ) + .await + .unwrap(); + + assert!(has_lfs_configured(&temp).await); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn has_lfs_configured_without_lfs() { + let temp = temp_dir("git-test").await; + fs::write(temp.join(".gitattributes"), "*.txt text\n") + .await + .unwrap(); + + assert!(!has_lfs_configured(&temp).await); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn has_lfs_configured_no_file() { + let temp = temp_dir("git-test").await; + // No .gitattributes file + + assert!(!has_lfs_configured(&temp).await); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn has_lfs_pointers_detects_pointer() { + let temp = temp_dir("git-test").await; + + // Initialize git repo + init_git_repo(&temp).await; + + // Create LFS pointer file + let pointer_content = "version https://git-lfs.github.com/spec/v1\n\ + oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\n\ + size 12345\n"; + fs::write(temp.join("large.bin"), pointer_content) + .await + .unwrap(); + + // Stage the file + stage_file(&temp, "large.bin").await; + + let result = has_lfs_pointers(&temp).await; + assert!(result.is_ok()); + assert!(result.unwrap()); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn has_lfs_pointers_ignores_non_pointers() { + let temp = temp_dir("git-test").await; + + // Initialize git repo + init_git_repo(&temp).await; + + // Create normal small file + fs::write(temp.join("readme.txt"), "Hello World") + .await + .unwrap(); + stage_file(&temp, "readme.txt").await; + + let result = has_lfs_pointers(&temp).await; + assert!(result.is_ok()); + assert!(!result.unwrap()); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn has_lfs_pointers_ignores_large_files() { + let temp = temp_dir("git-test").await; + + init_git_repo(&temp).await; + + // Create large file that starts with LFS signature (edge case) + let mut content = String::from("version https://git-lfs.github.com/spec/v1\n"); + content.push_str(&"x".repeat(2000)); // > 1024 bytes + fs::write(temp.join("large.txt"), &content).await.unwrap(); + stage_file(&temp, "large.txt").await; + + let result = has_lfs_pointers(&temp).await; + assert!(result.is_ok()); + assert!(!result.unwrap()); // Should be ignored due to size + + cleanup(&temp).await; + } + + #[tokio::test] + async fn maybe_fetch_lfs_no_config() { + let temp = temp_dir("git-test").await; + init_git_repo(&temp).await; + + // No .gitattributes = no LFS + let result = maybe_fetch_lfs(&temp).await; + assert!(result.is_ok()); + + cleanup(&temp).await; + } + + // Helper functions for LFS tests + + async fn init_git_repo(dir: &Path) { + git_cmd() + .args(["init"]) + .current_dir(dir) + .output() + .await + .unwrap(); + configure_test_git_user(dir).await; + } + + async fn stage_file(dir: &Path, filename: &str) { + git_cmd() + .args(["add", filename]) + .current_dir(dir) + .output() + .await + .unwrap(); + } + + /// Clone a bare repo into `work_dir`, commit a new file, and push it. + async fn push_new_commit(repo_url: &str, work_dir: &Path, filename: &str, content: &str) { + git_cmd() + .args(["clone", repo_url, work_dir.to_str().unwrap()]) + .output() + .await + .unwrap(); + configure_test_git_user(work_dir).await; + + fs::write(work_dir.join(filename), content).await.unwrap(); + + git_cmd() + .args(["add", filename]) + .current_dir(work_dir) + .output() + .await + .unwrap(); + + git_cmd() + .args(["commit", "-m", "New commit"]) + .current_dir(work_dir) + .output() + .await + .unwrap(); + + git_cmd() + .args(["push"]) + .current_dir(work_dir) + .output() + .await + .unwrap(); + } + + // has_remote_changes tests + + #[tokio::test] + async fn has_remote_changes_nonexistent_dir_returns_true() { + let temp = temp_dir("git-test").await; + let nonexistent = temp.join("does-not-exist"); + + let result = has_remote_changes(&nonexistent, "main", GIT_TIMEOUT_DEFAULT, 1).await; + assert!(result.is_ok()); + assert!(result.unwrap(), "nonexistent directory should return true"); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn has_remote_changes_up_to_date_returns_false() { + let temp = temp_dir("git-test").await; + let repo_url = create_local_repo(&temp, "main").await; + let clone_dir = temp.join("test-clone"); + + // Clone the repo + clone(&repo_url, "main", &clone_dir, GIT_TIMEOUT_DEFAULT, 1) + .await + .unwrap(); + + // Check for changes - should be false since we just cloned + let result = has_remote_changes(&clone_dir, "main", GIT_TIMEOUT_DEFAULT, 1).await; + assert!(result.is_ok(), "has_remote_changes failed: {result:?}"); + assert!(!result.unwrap(), "freshly cloned repo should be up-to-date"); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn has_remote_changes_detects_new_commits() { + let temp = temp_dir("git-test").await; + let repo_url = create_local_repo(&temp, "main").await; + let clone_dir = temp.join("test-clone"); + + // Clone the repo + clone(&repo_url, "main", &clone_dir, GIT_TIMEOUT_DEFAULT, 1) + .await + .unwrap(); + + // Push a new commit to the origin + let work_dir = temp.join("work2"); + push_new_commit(&repo_url, &work_dir, "new-file.txt", "new content").await; + + // Now check for changes - should detect the new commit + let result = has_remote_changes(&clone_dir, "main", GIT_TIMEOUT_DEFAULT, 1).await; + assert!(result.is_ok(), "has_remote_changes failed: {result:?}"); + assert!(result.unwrap(), "should detect new commits on remote"); + + cleanup(&temp).await; + } + + // git_depth tests + + #[tokio::test] + async fn clone_full_depth_creates_complete_history() { + let temp = temp_dir("git-test").await; + let repo_url = create_local_repo(&temp, "main").await; + + // Push a second commit so we have more than 1 commit in history + let work_dir = temp.join("work-depth"); + push_new_commit(&repo_url, &work_dir, "second.txt", "second commit").await; + + let clone_dir = temp.join("full-clone"); + + // Clone with depth=0 (full clone) + clone(&repo_url, "main", &clone_dir, GIT_TIMEOUT_DEFAULT, 0) + .await + .expect("full clone should succeed"); + + // Verify we have more than 1 commit (full history) + let output = git_cmd() + .args(["rev-list", "--count", "HEAD"]) + .current_dir(&clone_dir) + .output() + .await + .unwrap(); + let count: u32 = String::from_utf8_lossy(&output.stdout) + .trim() + .parse() + .unwrap(); + assert!( + count > 1, + "full clone should have multiple commits, got {count}" + ); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn sync_repo_full_depth_preserves_history() { + let temp = temp_dir("git-test").await; + let repo_url = create_local_repo(&temp, "main").await; + + // Push a second commit + let work_dir = temp.join("work-depth2"); + push_new_commit(&repo_url, &work_dir, "second.txt", "second").await; + + let clone_dir = temp.join("sync-full"); + + // sync_repo with depth=0 should do a full clone + sync_repo(&repo_url, "main", &clone_dir, GIT_TIMEOUT_DEFAULT, 0) + .await + .expect("sync with full depth should succeed"); + + let output = git_cmd() + .args(["rev-list", "--count", "HEAD"]) + .current_dir(&clone_dir) + .output() + .await + .unwrap(); + let count: u32 = String::from_utf8_lossy(&output.stdout) + .trim() + .parse() + .unwrap(); + assert!( + count > 1, + "full sync should have multiple commits, got {count}" + ); + + cleanup(&temp).await; + } + + // Submodule tests + + #[tokio::test] + async fn has_submodules_with_gitmodules_file() { + let temp = temp_dir("git-test").await; + fs::write( + temp.join(".gitmodules"), + "[submodule \"lib\"]\n\tpath = lib\n\turl = ../lib.git\n", + ) + .await + .unwrap(); + + assert!(has_submodules(&temp).await); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn has_submodules_without_gitmodules() { + let temp = temp_dir("git-test").await; + + assert!(!has_submodules(&temp).await); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn has_submodules_empty_gitmodules() { + let temp = temp_dir("git-test").await; + fs::write(temp.join(".gitmodules"), "").await.unwrap(); + + assert!(!has_submodules(&temp).await); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn maybe_init_submodules_no_submodules_is_noop() { + let temp = temp_dir("git-test").await; + let repo_url = create_local_repo(&temp, "main").await; + let clone_dir = temp.join("no-submodules"); + + clone(&repo_url, "main", &clone_dir, GIT_TIMEOUT_DEFAULT, 1) + .await + .expect("clone should succeed"); + + // No .gitmodules → should be a no-op + let result = maybe_init_submodules(&clone_dir, GIT_TIMEOUT_DEFAULT, 1, false).await; + assert!( + result.is_ok(), + "noop submodule init should succeed: {result:?}" + ); + + cleanup(&temp).await; + } + + /// Create a parent repo with a submodule wired up. + /// Returns (parent_url, submodule_url). + async fn create_repo_with_submodule(temp: &Path, branch: &str) -> (String, String) { + // 1. Create bare submodule repo with a file + let sub_bare = temp.join("sub.git"); + fs::create_dir_all(&sub_bare).await.unwrap(); + git_cmd() + .args(["init", "--bare", "--initial-branch", branch]) + .current_dir(&sub_bare) + .output() + .await + .unwrap(); + + let sub_work = temp.join("sub-work"); + git_cmd() + .args([ + "clone", + sub_bare.to_str().unwrap(), + sub_work.to_str().unwrap(), + ]) + .output() + .await + .unwrap(); + configure_test_git_user(&sub_work).await; + git_cmd() + .args(["checkout", "-B", branch]) + .current_dir(&sub_work) + .output() + .await + .unwrap(); + fs::write(sub_work.join("sub-file.txt"), "submodule content") + .await + .unwrap(); + git_cmd() + .args(["add", "sub-file.txt"]) + .current_dir(&sub_work) + .output() + .await + .unwrap(); + let output = git_cmd() + .args(["commit", "-m", "sub initial"]) + .current_dir(&sub_work) + .output() + .await + .unwrap(); + assert!( + output.status.success(), + "sub commit failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + let output = git_cmd() + .args(["push", "-u", "origin", branch]) + .current_dir(&sub_work) + .output() + .await + .unwrap(); + assert!( + output.status.success(), + "sub push failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + // 2. Create bare parent repo with a submodule reference + let parent_bare = temp.join("parent.git"); + fs::create_dir_all(&parent_bare).await.unwrap(); + git_cmd() + .args(["init", "--bare", "--initial-branch", branch]) + .current_dir(&parent_bare) + .output() + .await + .unwrap(); + + let parent_work = temp.join("parent-work"); + git_cmd() + .args([ + "clone", + parent_bare.to_str().unwrap(), + parent_work.to_str().unwrap(), + ]) + .output() + .await + .unwrap(); + configure_test_git_user(&parent_work).await; + git_cmd() + .args(["checkout", "-B", branch]) + .current_dir(&parent_work) + .output() + .await + .unwrap(); + fs::write(parent_work.join("README.md"), "# Parent") + .await + .unwrap(); + git_cmd() + .args(["add", "README.md"]) + .current_dir(&parent_work) + .output() + .await + .unwrap(); + + // Add submodule using file:// URL + let sub_url = format!("file://{}", sub_bare.to_str().unwrap()); + let output = git_cmd() + .args(["submodule", "add", &sub_url, "lib"]) + .current_dir(&parent_work) + .output() + .await + .unwrap(); + assert!( + output.status.success(), + "git submodule add failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + git_cmd() + .args(["commit", "-m", "add submodule"]) + .current_dir(&parent_work) + .output() + .await + .unwrap(); + let output = git_cmd() + .args(["push", "-u", "origin", branch]) + .current_dir(&parent_work) + .output() + .await + .unwrap(); + assert!( + output.status.success(), + "git push failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let _ = fs::remove_dir_all(&sub_work).await; + let _ = fs::remove_dir_all(&parent_work).await; + + let parent_url = format!("file://{}", parent_bare.to_str().unwrap()); + (parent_url, sub_url) + } + + #[tokio::test] + async fn sync_repo_initializes_submodules() { + let temp = temp_dir("git-test").await; + let (parent_url, _sub_url) = create_repo_with_submodule(&temp, "main").await; + let clone_dir = temp.join("clone-with-sub"); + + sync_repo(&parent_url, "main", &clone_dir, GIT_TIMEOUT_DEFAULT, 1) + .await + .expect("sync should succeed"); + + // Verify submodule content is present + let sub_file = clone_dir.join("lib").join("sub-file.txt"); + assert!(sub_file.exists(), "submodule file should exist after sync"); + let content = fs::read_to_string(&sub_file).await.unwrap(); + assert_eq!(content, "submodule content"); + + cleanup(&temp).await; + } + + #[tokio::test] + async fn sync_repo_updates_submodules_on_pull() { + let temp = temp_dir("git-test").await; + let (parent_url, sub_url) = create_repo_with_submodule(&temp, "main").await; + let clone_dir = temp.join("pull-sub"); + + // First sync (clone + submodule init) + sync_repo(&parent_url, "main", &clone_dir, GIT_TIMEOUT_DEFAULT, 1) + .await + .expect("initial sync should succeed"); + + // Push a new commit to the submodule + let sub_work = temp.join("sub-update"); + git_cmd() + .args(["clone", &sub_url, sub_work.to_str().unwrap()]) + .output() + .await + .unwrap(); + configure_test_git_user(&sub_work).await; + fs::write(sub_work.join("new-sub-file.txt"), "updated submodule") + .await + .unwrap(); + git_cmd() + .args(["add", "new-sub-file.txt"]) + .current_dir(&sub_work) + .output() + .await + .unwrap(); + let output = git_cmd() + .args(["commit", "-m", "update sub"]) + .current_dir(&sub_work) + .output() + .await + .unwrap(); + assert!( + output.status.success(), + "sub commit failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + let output = git_cmd() + .args(["push"]) + .current_dir(&sub_work) + .output() + .await + .unwrap(); + assert!( + output.status.success(), + "sub push failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + // Update parent to point to new submodule commit + let parent_work = temp.join("parent-update"); + let parent_bare = temp.join("parent.git"); + git_cmd() + .args([ + "clone", + parent_bare.to_str().unwrap(), + parent_work.to_str().unwrap(), + ]) + .output() + .await + .unwrap(); + configure_test_git_user(&parent_work).await; + // Init submodule in parent work copy, then update to latest + git_cmd() + .args(["submodule", "update", "--init", "--remote", "lib"]) + .current_dir(&parent_work) + .output() + .await + .unwrap(); + git_cmd() + .args(["add", "lib"]) + .current_dir(&parent_work) + .output() + .await + .unwrap(); + let output = git_cmd() + .args(["commit", "-m", "bump submodule"]) + .current_dir(&parent_work) + .output() + .await + .unwrap(); + assert!( + output.status.success(), + "parent bump commit failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + let output = git_cmd() + .args(["push"]) + .current_dir(&parent_work) + .output() + .await + .unwrap(); + assert!( + output.status.success(), + "parent push failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + // Second sync (pull + submodule update) + sync_repo(&parent_url, "main", &clone_dir, GIT_TIMEOUT_DEFAULT, 1) + .await + .expect("second sync should succeed"); + + // Verify the new submodule content is present + let new_sub_file = clone_dir.join("lib").join("new-sub-file.txt"); + assert!( + new_sub_file.exists(), + "updated submodule file should exist after pull" + ); + let content = fs::read_to_string(&new_sub_file).await.unwrap(); + assert_eq!(content, "updated submodule"); + + cleanup(&temp).await; + } +} |
