From 126ce542dca21e62b38049f67c7f4c951284a2c2 Mon Sep 17 00:00:00 2001 From: Dawid Rycerz Date: Wed, 28 May 2025 15:58:05 +0200 Subject: feat(gitlab_glab): add diff content filtering to get_mr_diff - Add filter_diff_content method to filter out unwanted file extensions - Default filtering excludes .lock and .log files to reduce noise - Smart lock file detection handles package-lock.json, yarn.lock, etc. - Customizable filtering via filter_extensions parameter - Filtering can be disabled by passing empty list - Add comprehensive tests for all filtering scenarios - Update README.md with filtering documentation - Maintain backward compatibility with existing API --- servers/gitlab_glab/README.md | 13 +- .../src/mcp_server_gitlab_glab/server.py | 72 +++++- servers/gitlab_glab/tests/test_server.py | 287 +++++++++++++++++++++ 3 files changed, 369 insertions(+), 3 deletions(-) (limited to 'servers') diff --git a/servers/gitlab_glab/README.md b/servers/gitlab_glab/README.md index 19b6cd7..cef74c2 100644 --- a/servers/gitlab_glab/README.md +++ b/servers/gitlab_glab/README.md @@ -151,7 +151,7 @@ The function returns a dictionary containing: ### get_mr_diff -Get the diff for a merge request using `glab mr diff`. +Get the diff for a merge request using `glab mr diff` with optional content filtering. ```python result = use_mcp_tool( @@ -164,12 +164,14 @@ result = use_mcp_tool( "color": "never", # Use color in diff output: always, never, auto (default: never) "raw": True, # Use raw diff format (default: False) "repo": "group/project", # Project path with namespace - "max_size_kb": 100 # Maximum size in KB before saving to temp file (default: 100) + "max_size_kb": 100, # Maximum size in KB before saving to temp file (default: 100) + "filter_extensions": [".lock", ".log"] # File extensions to exclude from diff (default: [".lock", ".log"]) } ) ``` The function returns a dictionary containing: + - `diff`: The diff content (if small enough) - `size_kb`: The size of the diff in KB - `temp_file_path`: Path to temporary file if diff is too large (None otherwise) @@ -178,6 +180,13 @@ The function returns a dictionary containing: - `message`: Human-readable message about temp file creation (if applicable) - `error`: Error message if the operation failed +**Content Filtering**: By default, this tool filters out changes to files with `.lock` and `.log` extensions to reduce noise in diffs. This includes: + +- Lock files: `package-lock.json`, `yarn.lock`, `composer.lock`, etc. +- Log files: `debug.log`, `error.log`, `application.log`, etc. + +You can customize the filtering by providing a `filter_extensions` list, or disable filtering entirely by passing an empty list `[]`. + **Note on Large Diffs**: To prevent overwhelming LLMs with extremely large diffs, this tool automatically saves diffs larger than `max_size_kb` (default: 100KB) to a temporary file and returns the file path instead of the content. This allows you to process large merge request diffs without hitting token limits. ### run_ci_pipeline diff --git a/servers/gitlab_glab/src/mcp_server_gitlab_glab/server.py b/servers/gitlab_glab/src/mcp_server_gitlab_glab/server.py index b661255..f239246 100644 --- a/servers/gitlab_glab/src/mcp_server_gitlab_glab/server.py +++ b/servers/gitlab_glab/src/mcp_server_gitlab_glab/server.py @@ -310,6 +310,63 @@ class GitLabServer: logger.error(f"Failed to extract issue URL from output: {result}") return {"error": "Failed to extract issue URL from command output"} + def filter_diff_content( + self, + diff_content: str, + exclude_extensions: list[str], + ) -> str: + """Filter out files with specified extensions from diff content. + + Args: + diff_content: The original diff content. + exclude_extensions: List of file extensions to exclude + (e.g., [".lock", ".log"]). + + Returns: + Filtered diff content with excluded files removed. + """ + if not exclude_extensions: + return diff_content + + lines = diff_content.split('\n') + filtered_lines = [] + skip_block = False + + for line in lines: + if line.startswith("diff --git a/"): + parts = line.split() + if len(parts) >= 4: + file_a = parts[2] + file_b = parts[3] + # Check if either file should be excluded + should_exclude = False + for ext in exclude_extensions: + # Handle both exact extension matches and lock file patterns + if ext == ".lock": + # Special handling for lock files + # (package-lock.json, yarn.lock, etc.) + if (file_a.endswith('.lock') or + file_b.endswith('.lock') or + 'lock.' in file_a or 'lock.' in file_b or + file_a.endswith('-lock.json') or + file_b.endswith('-lock.json')): + should_exclude = True + break + else: + # Standard extension matching + if file_a.endswith(ext) or file_b.endswith(ext): + should_exclude = True + break + + skip_block = should_exclude + else: + skip_block = False + + if not skip_block: + filtered_lines.append(line) + + return '\n'.join(filtered_lines) + def get_mr_diff( self, working_directory: str, @@ -318,6 +375,7 @@ class GitLabServer: raw: bool = False, repo: str | None = None, max_size_kb: int = 100, + filter_extensions: list[str] | None = None, ) -> dict[str, Any]: """Get the diff for a merge request. @@ -329,11 +387,17 @@ class GitLabServer: repo: Select another repository (OWNER/REPO or GROUP/NAMESPACE/REPO format). max_size_kb: Maximum size in KB before saving to temporary file (default: 100). + filter_extensions: List of file extensions to exclude from diff + (default: [".lock", ".log"]). Returns: A dictionary containing the diff content or a path to temporary file if too large. """ + # Set default filter extensions if not provided + if filter_extensions is None: + filter_extensions = [".lock", ".log"] + # Build command arguments args = ["mr", "diff"] @@ -359,8 +423,10 @@ class GitLabServer: if not success: return result + # Apply filtering to remove unwanted file extensions + diff_content = self.filter_diff_content(result, filter_extensions) + # Check if the diff is too large - diff_content = result diff_size_kb = len(diff_content.encode('utf-8')) / 1024 if diff_size_kb > max_size_kb: @@ -664,6 +730,7 @@ def create_server(host: str = "127.0.0.1", port: int = 8080) -> FastMCP: raw: bool = False, repo: str | None = None, max_size_kb: int = 100, + filter_extensions: list[str] | None = None, ) -> dict[str, Any]: """Get the diff for a merge request. @@ -675,6 +742,8 @@ def create_server(host: str = "127.0.0.1", port: int = 8080) -> FastMCP: repo: Select another repository (OWNER/REPO or GROUP/NAMESPACE/REPO format). max_size_kb: Maximum size in KB before saving to temporary file (default: 100). + filter_extensions: List of file extensions to exclude from diff + (default: [".lock", ".log"]). Returns: A dictionary containing the diff content or a path to temporary file @@ -687,6 +756,7 @@ def create_server(host: str = "127.0.0.1", port: int = 8080) -> FastMCP: raw=raw, repo=repo, max_size_kb=max_size_kb, + filter_extensions=filter_extensions, ) @mcp.tool() diff --git a/servers/gitlab_glab/tests/test_server.py b/servers/gitlab_glab/tests/test_server.py index 867b2c0..13f90af 100644 --- a/servers/gitlab_glab/tests/test_server.py +++ b/servers/gitlab_glab/tests/test_server.py @@ -1020,3 +1020,290 @@ index 1234567..abcdefg 100644 ["ci", "run"], working_dir, ) + + @patch.object(GitLabServer, "execute_glab_command") + def test_get_mr_diff_with_default_filtering(self, mock_execute: MagicMock) -> None: + """Test MR diff with default filtering (.lock and .log files).""" + # Mock diff content with .lock and .log files + diff_content = """diff --git a/package-lock.json b/package-lock.json +index 1234567..abcdefg 100644 +--- a/package-lock.json ++++ b/package-lock.json +@@ -1,3 +1,4 @@ + { + "name": "test", ++ "version": "1.0.0", + "lockfileVersion": 1 +} +diff --git a/debug.log b/debug.log +index 1234567..abcdefg 100644 +--- a/debug.log ++++ b/debug.log +@@ -1,2 +1,3 @@ + [INFO] Starting application ++[INFO] Loading configuration + [INFO] Application ready +diff --git a/src/main.py b/src/main.py +index 1234567..abcdefg 100644 +--- a/src/main.py ++++ b/src/main.py +@@ -1,3 +1,4 @@ + def main(): + print("Hello") ++ print("World") + return 0""" + mock_execute.return_value = (True, diff_content) + + server = GitLabServer() + working_dir = "/test/directory" + result = server.get_mr_diff(working_directory=working_dir) + + # Check that .lock and .log files are filtered out + filtered_diff = result["diff"] + assert "package-lock.json" not in filtered_diff + assert "debug.log" not in filtered_diff + assert "src/main.py" in filtered_diff + assert 'print("World")' in filtered_diff + + @patch.object(GitLabServer, "execute_glab_command") + def test_get_mr_diff_with_custom_filtering(self, mock_execute: MagicMock) -> None: + """Test MR diff with custom filtering extensions.""" + # Mock diff content with various file types + diff_content = """diff --git a/test.txt b/test.txt +index 1234567..abcdefg 100644 +--- a/test.txt ++++ b/test.txt +@@ -1,2 +1,3 @@ + line 1 ++new line + line 2 +diff --git a/config.json b/config.json +index 1234567..abcdefg 100644 +--- a/config.json ++++ b/config.json +@@ -1,3 +1,4 @@ + { + "setting": "value" ++ "new_setting": "new_value" + } +diff --git a/temp.tmp b/temp.tmp +index 1234567..abcdefg 100644 +--- a/temp.tmp ++++ b/temp.tmp +@@ -1,2 +1,3 @@ + temp data ++more temp data + end""" + mock_execute.return_value = (True, diff_content) + + server = GitLabServer() + working_dir = "/test/directory" + result = server.get_mr_diff( + working_directory=working_dir, + filter_extensions=[".tmp", ".json"] + ) + + # Check that .tmp and .json files are filtered out + filtered_diff = result["diff"] + assert "temp.tmp" not in filtered_diff + assert "config.json" not in filtered_diff + assert "test.txt" in filtered_diff + assert "new line" in filtered_diff + + @patch.object(GitLabServer, "execute_glab_command") + def test_get_mr_diff_with_no_filtering(self, mock_execute: MagicMock) -> None: + """Test MR diff with filtering disabled (empty list).""" + # Mock diff content with .lock file + diff_content = """diff --git a/package-lock.json b/package-lock.json +index 1234567..abcdefg 100644 +--- a/package-lock.json ++++ b/package-lock.json +@@ -1,3 +1,4 @@ + { + "name": "test", ++ "version": "1.0.0", + "lockfileVersion": 1 +}""" + mock_execute.return_value = (True, diff_content) + + server = GitLabServer() + working_dir = "/test/directory" + result = server.get_mr_diff( + working_directory=working_dir, + filter_extensions=[] + ) + + # Check that no filtering occurred + filtered_diff = result["diff"] + assert "package-lock.json" in filtered_diff + assert '"version": "1.0.0"' in filtered_diff + + def test_filter_diff_content_basic(self) -> None: + """Test basic diff content filtering.""" + server = GitLabServer() + + diff_content = """diff --git a/src/main.py b/src/main.py +index 1234567..abcdefg 100644 +--- a/src/main.py ++++ b/src/main.py +@@ -1,3 +1,4 @@ + def main(): + print("Hello") ++ print("World") + return 0 +diff --git a/package-lock.json b/package-lock.json +index 1234567..abcdefg 100644 +--- a/package-lock.json ++++ b/package-lock.json +@@ -1,3 +1,4 @@ + { + "name": "test", ++ "version": "1.0.0", + "lockfileVersion": 1 +}""" + + filtered = server.filter_diff_content(diff_content, [".lock"]) + + assert "src/main.py" in filtered + assert 'print("World")' in filtered + assert "package-lock.json" not in filtered + assert '"version": "1.0.0"' not in filtered + + def test_filter_diff_content_multiple_extensions(self) -> None: + """Test filtering with multiple extensions.""" + server = GitLabServer() + + diff_content = """diff --git a/app.py b/app.py +index 1234567..abcdefg 100644 +--- a/app.py ++++ b/app.py +@@ -1,2 +1,3 @@ + import logging ++logging.basicConfig() + app = Flask(__name__) +diff --git a/debug.log b/debug.log +index 1234567..abcdefg 100644 +--- a/debug.log ++++ b/debug.log +@@ -1,2 +1,3 @@ + [INFO] Starting ++[DEBUG] Debug message + [INFO] Ready +diff --git a/yarn.lock b/yarn.lock +index 1234567..abcdefg 100644 +--- a/yarn.lock ++++ b/yarn.lock +@@ -1,3 +1,4 @@ + # THIS IS AN AUTOGENERATED FILE ++# Dependencies + package@1.0.0: + version "1.0.0""" + + filtered = server.filter_diff_content(diff_content, [".log", ".lock"]) + + assert "app.py" in filtered + assert "logging.basicConfig()" in filtered + assert "debug.log" not in filtered + assert "yarn.lock" not in filtered + assert "[DEBUG] Debug message" not in filtered + assert "# Dependencies" not in filtered + + def test_filter_diff_content_empty_extensions(self) -> None: + """Test filtering with empty extensions list.""" + server = GitLabServer() + + diff_content = """diff --git a/package-lock.json b/package-lock.json +index 1234567..abcdefg 100644 +--- a/package-lock.json ++++ b/package-lock.json +@@ -1,2 +1,3 @@ + { ++ "name": "test", + "lockfileVersion": 1 +}""" + + filtered = server.filter_diff_content(diff_content, []) + + # Should return original content when no extensions to filter + assert filtered == diff_content + + def test_filter_diff_content_malformed_diff_header(self) -> None: + """Test filtering with malformed diff headers.""" + server = GitLabServer() + + diff_content = """diff --git a/file.py +index 1234567..abcdefg 100644 +--- a/file.py ++++ b/file.py +@@ -1,2 +1,3 @@ + line 1 ++new line + line 2 +diff --git a/package-lock.json b/package-lock.json +index 1234567..abcdefg 100644 +--- a/package-lock.json ++++ b/package-lock.json +@@ -1,2 +1,3 @@ + { ++ "version": "1.0.0", + "lockfileVersion": 1 +}""" + + filtered = server.filter_diff_content(diff_content, [".lock"]) + + # Malformed header should not be filtered + assert "diff --git a/file.py" in filtered + assert "new line" in filtered + # Properly formed header with .lock extension should be filtered + assert "package-lock.json" not in filtered + assert '"version": "1.0.0"' not in filtered + + @patch.object(GitLabServer, "execute_glab_command") + def test_get_mr_diff_filtering_with_large_file(self, mock_execute: MagicMock) -> None: + """Test that filtering works correctly even when creating temp files.""" + # Create a large diff that will still be large after filtering + large_diff_base = """diff --git a/src/main.py b/src/main.py +index 1234567..abcdefg 100644 +--- a/src/main.py ++++ b/src/main.py +@@ -1,3 +1,4 @@ + def main(): + print("Hello") ++ print("World") + return 0 +""" + # Add a lot of content to main.py to make it large + large_diff = large_diff_base + "+ # " + "Large comment line\n" * 3000 + + # Add package-lock.json content that should be filtered out + large_diff += """diff --git a/package-lock.json b/package-lock.json +index 1234567..abcdefg 100644 +--- a/package-lock.json ++++ b/package-lock.json +@@ -1,500 +1,501 @@ + { + "name": "test", ++ "version": "1.0.0", +""" + " \"dependency\": \"^1.0.0\",\n" * 1000 + "}\n" + + mock_execute.return_value = (True, large_diff) + + server = GitLabServer() + working_dir = "/test/directory" + + with patch("tempfile.NamedTemporaryFile") as mock_temp: + mock_file = MagicMock() + mock_file.name = "/tmp/test_diff.diff" + mock_temp.return_value.__enter__.return_value = mock_file + + result = server.get_mr_diff( + working_directory=working_dir, + max_size_kb=1 # Force temp file creation + ) + + # Should still filter before saving to temp file + assert result["diff_too_large"] is True + # The filtered content should be written to temp file + written_content = mock_file.write.call_args[0][0] + assert "src/main.py" in written_content + assert "package-lock.json" not in written_content -- cgit v1.2.3