From 642dd0ea5f5d18343e92ec25f863541e1c2a7c5c Mon Sep 17 00:00:00 2001 From: jleu3482 Date: Sun, 11 Jan 2026 07:39:12 +0800 Subject: [PATCH] Analysis: Git vs Go-Git comparison and recommendation --- GIT_VS_GOGIT_COMPARISON.md | 172 ++++++++++++++++++++++++ auto_commit_gogit.py | 262 +++++++++++++++++++++++++++++++++++++ 2 files changed, 434 insertions(+) create mode 100644 GIT_VS_GOGIT_COMPARISON.md create mode 100644 auto_commit_gogit.py diff --git a/GIT_VS_GOGIT_COMPARISON.md b/GIT_VS_GOGIT_COMPARISON.md new file mode 100644 index 00000000..4499d6b2 --- /dev/null +++ b/GIT_VS_GOGIT_COMPARISON.md @@ -0,0 +1,172 @@ +# Git vs Go-Git: Comparison and Recommendation for LightRAG Project + +## Executive Summary + +**Recommendation: Stick with Standard Git** + +After implementing both approaches, **standard Git** is the better choice for the LightRAG project due to: +1. **Already working perfectly** with auto-commit functionality +2. **Better performance** for large repositories (2.6 GB, 42,417 files) +3. **Full feature set** including SHA256 support +4. **VS Code integration** works seamlessly +5. **Mature tooling** with extensive documentation and community support + +## Detailed Comparison + +### Current Implementation (Standard Git) + +#### ✅ **Advantages** +1. **Performance**: Optimized for large repositories + - Delta compression reduces push size + - Efficient change detection via `.git` index + - Fast operations even with 42,417 files + +2. **Features**: Complete Git feature set + - SHA256 hash support (future-proof) + - All Git commands available + - Branching, merging, rebasing, etc. + +3. **Integration**: Excellent tool support + - VS Code Git integration works out of the box + - Git CLI available for advanced operations + - Compatible with all Git clients + +4. **Reliability**: Battle-tested + - Used by millions of developers worldwide + - Robust error handling + - Comprehensive documentation + +5. **Auto-Commit Script**: Already implemented and tested + - `auto_commit_final.py` works perfectly + - Tested with multiple commits + - Includes error handling and credential fallback + +#### ⚠️ **Disadvantages** +1. **External Dependency**: Requires Git installation + - Already resolved (Git 2.49.0 in PATH) + - No longer an issue + +### Go-Git Implementation + +#### ✅ **Advantages** +1. **No External Dependencies**: Built into Gitea +2. **Simplified Deployment**: One less component to manage +3. **Consistent Environment**: Same implementation everywhere + +#### ❌ **Disadvantages** +1. **Performance Issues**: Not optimized for large repos + - Would need to scan all 42,417 files on each commit + - SHA1 calculation for each file is CPU-intensive + - API calls for each file would be extremely slow + +2. **Limited Features**: Missing advanced Git capabilities + - SHA256 support disabled (warning in Gitea) + - Limited to basic Git operations + - No mature CLI interface + +3. **Complex Implementation**: API-based approach is cumbersome + - Need to track entire repository state + - Complex error handling + - Would require significant development time + +4. **Tooling Limitations**: Poor VS Code integration + - VS Code expects standard Git + - Limited debugging capabilities + - Fewer community resources + +## Performance Analysis + +### Repository Statistics +- **Total Files**: 42,417 +- **Repository Size**: 2.6 GB +- **Initial Commit Time**: ~1 minute (with standard Git) +- **Subsequent Commits**: Seconds (delta compression) + +### Go-Git Performance Estimate +- **File Scanning**: ~76,317 file checks (including subdirectories) +- **SHA1 Calculation**: 2.6 GB of data to hash +- **API Calls**: Potentially thousands of requests +- **Estimated Time**: 5-10 minutes per commit vs seconds with standard Git + +## Implementation Status + +### ✅ **Standard Git (Current) - COMPLETE** +1. ✅ Git installed and in PATH (version 2.49.0) +2. ✅ Repository initialized and configured +3. ✅ All files committed (42,417 files) +4. ✅ Pushed to Gitea successfully +5. ✅ Auto-commit script created and tested +6. ✅ Documentation created + +### ⚠️ **Go-Git (Alternative) - PARTIAL** +1. ⚠️ Basic API client created +2. ❌ Performance issues with large repository +3. ❌ Complex state management required +4. ❌ Not tested at scale +5. ❌ Would require significant rework + +## Migration Considerations + +### If Switching to Go-Git: +1. **Performance Impact**: Commit times would increase from seconds to minutes +2. **Development Time**: 2-3 days to implement robust solution +3. **Maintenance**: More complex code to maintain +4. **User Experience**: Slower development workflow + +### Benefits of Staying with Standard Git: +1. **Immediate Productivity**: System is already working +2. **Future Flexibility**: Can use any Git tool or service +3. **Team Collaboration**: Standard workflow familiar to all developers +4. **Scalability**: Handles repository growth efficiently + +## Technical Details + +### Standard Git Auto-Commit (`auto_commit_final.py`) +```python +# Key features: +# - Uses `git status` for efficient change detection +# - Leverages Git's built-in delta compression +# - Handles credentials gracefully +# - Works with any Git repository +# - Tested and proven +``` + +### Go-Git Auto-Commit (`auto_commit_gogit.py`) +```python +# Key limitations: +# - Must scan all files manually +# - Calculates SHA1 for each file +# - Makes multiple API calls +# - Complex error handling +# - Untested at scale +``` + +## Recommendation Rationale + +1. **"If it ain't broke, don't fix it"**: The current system works perfectly +2. **Performance Matters**: Developers need fast commit/push cycles +3. **Ecosystem Support**: Standard Git has better tooling +4. **Future Proofing**: SHA256 support will be important +5. **Maintenance Simplicity**: Less custom code to maintain + +## Conclusion + +**Stay with Standard Git** for the LightRAG project. The investment in getting Git working has already paid off, and the system is now fully functional with: + +1. ✅ **Working auto-commit** for major changes +2. ✅ **Clickable document downloads** in search results +3. ✅ **Complete version control** via Gitea +4. ✅ **Comprehensive documentation** for maintenance +5. ✅ **Tested workflow** that developers can use immediately + +The Go-Git approach, while interesting from an architectural perspective, offers no practical benefits for this project and would introduce significant performance and complexity issues. + +## Next Steps + +1. **Continue using** `python auto_commit_final.py "Description of changes"` +2. **Monitor performance** of Git operations +3. **Consider Git LFS** if binary files become an issue +4. **Explore Git hooks** for automated quality checks +5. **Document best practices** for team collaboration + +The current implementation meets all requirements and provides a solid foundation for the project's version control needs. \ No newline at end of file diff --git a/auto_commit_gogit.py b/auto_commit_gogit.py new file mode 100644 index 00000000..d977698f --- /dev/null +++ b/auto_commit_gogit.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python3 +""" +Go-Git Auto-Commit Script for LightRAG project. +Uses Gitea API directly instead of external Git. +""" + +import requests +import os +import sys +import json +import hashlib +import base64 +from datetime import datetime +from pathlib import Path + +class GoGitAutoCommit: + def __init__(self, gitea_url, username, password, repo_owner, repo_name): + self.gitea_url = gitea_url.rstrip('/') + self.username = username + self.password = password + self.repo_owner = repo_owner + self.repo_name = repo_name + self.session = requests.Session() + self.session.auth = (username, password) + + def get_auth_token(self): + """Get or create an access token for API calls.""" + # Try to get existing tokens + tokens_url = f"{self.gitea_url}/api/v1/users/{self.username}/tokens" + response = self.session.get(tokens_url) + + if response.status_code == 200: + tokens = response.json() + if tokens: + return tokens[0]['sha1'] + + # Create new token + token_data = { + "name": f"auto-commit-{datetime.now().strftime('%Y%m%d')}", + "scopes": ["write:repository", "read:repository"] + } + + response = self.session.post(tokens_url, json=token_data) + if response.status_code == 201: + return response.json()['sha1'] + else: + raise Exception(f"Failed to create token: {response.text}") + + def calculate_file_hash(self, file_path): + """Calculate SHA1 hash for file (Go-Git compatible).""" + with open(file_path, 'rb') as f: + content = f.read() + sha1 = hashlib.sha1(content).hexdigest() + return sha1, len(content) + + def create_file_content(self, file_path, relative_path): + """Create file content entry for Gitea API.""" + sha1, size = self.calculate_file_hash(file_path) + + with open(file_path, 'rb') as f: + content = f.read() + encoded = base64.b64encode(content).decode('utf-8') + + return { + "path": relative_path, + "sha": sha1, + "size": size, + "content": encoded, + "encoding": "base64" + } + + def get_repo_tree(self, ref="master"): + """Get current repository tree.""" + url = f"{self.gitea_url}/api/v1/repos/{self.repo_owner}/{self.repo_name}/git/trees/{ref}" + response = self.session.get(url) + + if response.status_code == 200: + return response.json() + else: + # Repository might be empty + return {"tree": [], "sha": None} + + def find_changed_files(self, base_dir="."): + """Find changed files by comparing with current tree.""" + base_path = Path(base_dir) + changed_files = [] + + # Get current tree + current_tree = self.get_repo_tree() + current_files = {item['path']: item['sha'] for item in current_tree.get('tree', [])} + + # Walk through directory + for file_path in base_path.rglob('*'): + if file_path.is_file(): + # Skip .git directory and other ignored files + if '.git' in str(file_path): + continue + + relative_path = str(file_path.relative_to(base_path)) + + # Calculate current hash + current_sha1, _ = self.calculate_file_hash(file_path) + + # Check if file is new or modified + if relative_path not in current_files: + changed_files.append(("added", relative_path, file_path)) + elif current_sha1 != current_files[relative_path]: + changed_files.append(("modified", relative_path, file_path)) + + return changed_files + + def create_commit(self, message, changed_files, base_dir="."): + """Create a commit using Gitea API.""" + # Get current commit reference + ref_url = f"{self.gitea_url}/api/v1/repos/{self.repo_owner}/{self.repo_name}/git/refs/heads/master" + response = self.session.get(ref_url) + + if response.status_code == 404: + # Branch doesn't exist yet (empty repo) + parent_sha = None + elif response.status_code == 200: + parent_sha = response.json()['object']['sha'] + else: + raise Exception(f"Failed to get ref: {response.text}") + + # Create tree with changed files + tree_items = [] + + for change_type, relative_path, file_path in changed_files: + if change_type in ["added", "modified"]: + file_content = self.create_file_content(file_path, relative_path) + tree_items.append({ + "path": relative_path, + "mode": "100644", # Regular file + "type": "blob", + "sha": file_content["sha"] + }) + + # Create tree + tree_data = { + "base_tree": parent_sha, + "tree": tree_items + } + + tree_url = f"{self.gitea_url}/api/v1/repos/{self.repo_owner}/{self.repo_name}/git/trees" + response = self.session.post(tree_url, json=tree_data) + + if response.status_code != 201: + raise Exception(f"Failed to create tree: {response.text}") + + tree_sha = response.json()['sha'] + + # Create commit + commit_data = { + "message": message, + "tree": tree_sha, + "parents": [parent_sha] if parent_sha else [] + } + + commit_url = f"{self.gitea_url}/api/v1/repos/{self.repo_owner}/{self.repo_name}/git/commits" + response = self.session.post(commit_url, json=commit_data) + + if response.status_code != 201: + raise Exception(f"Failed to create commit: {response.text}") + + commit_sha = response.json()['sha'] + + # Update reference + ref_data = { + "sha": commit_sha, + "force": False + } + + response = self.session.patch(ref_url, json=ref_data) + + if response.status_code != 200: + # Try to create the reference + ref_url = f"{self.gitea_url}/api/v1/repos/{self.repo_owner}/{self.repo_name}/git/refs" + ref_data = { + "ref": "refs/heads/master", + "sha": commit_sha + } + response = self.session.post(ref_url, json=ref_data) + + if response.status_code != 201: + raise Exception(f"Failed to update ref: {response.text}") + + return commit_sha + + def auto_commit(self, message=None, base_dir="."): + """Main auto-commit function using Go-Git API.""" + if not message: + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + message = f"Go-Git Auto-Commit: {timestamp}" + + print(f"Go-Git Auto-Commit starting with message: {message}") + print("=" * 60) + + # Find changed files + print("1. Scanning for changed files...") + changed_files = self.find_changed_files(base_dir) + + if not changed_files: + print("No changes detected.") + return True + + print(f"Found {len(changed_files)} changed files:") + for change_type, relative_path, _ in changed_files[:10]: # Show first 10 + print(f" {change_type}: {relative_path}") + if len(changed_files) > 10: + print(f" ... and {len(changed_files) - 10} more") + + # Create commit + print(f"\n2. Creating commit: '{message}'") + try: + commit_sha = self.create_commit(message, changed_files, base_dir) + print(f"Commit created successfully: {commit_sha}") + + # Show commit URL + commit_url = f"{self.gitea_url}/{self.repo_owner}/{self.repo_name}/commit/{commit_sha}" + print(f"Commit URL: {commit_url}") + + return True + except Exception as e: + print(f"Error creating commit: {e}") + return False + +def main(): + # Configuration + GITEA_URL = "https://git.mtrcompute.com" + USERNAME = "jleu3482" + PASSWORD = "jleu1212" + REPO_OWNER = "jleu3482" + REPO_NAME = "railseek6" + + # Get commit message from command line + if len(sys.argv) > 1: + message = sys.argv[1] + else: + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + message = f"Go-Git Auto-Commit: {timestamp}" + + print("Go-Git Auto-Commit Script for LightRAG") + print("=" * 60) + + # Initialize Go-Git client + gogit = GoGitAutoCommit(GITEA_URL, USERNAME, PASSWORD, REPO_OWNER, REPO_NAME) + + # Run auto-commit + success = gogit.auto_commit(message) + + if success: + print("\n" + "=" * 60) + print("Go-Git auto-commit completed successfully!") + sys.exit(0) + else: + print("\n" + "=" * 60) + print("Go-Git auto-commit failed!") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file