Files
railseek6/git_setup.py

471 lines
16 KiB
Python

#!/usr/bin/env python3
"""
Git setup script for LightRAG project with Gitea integration.
This script sets up a Git repository on git.mtrcompute.com and implements
auto-commit functionality for major changes.
"""
import os
import sys
import json
import requests
import subprocess
import time
from pathlib import Path
from datetime import datetime
from typing import Optional, List, Dict, Any
# Set git executable path before importing git module
git_exe_path = r"C:\Program Files\gitea\git.exe"
if os.path.exists(git_exe_path):
os.environ['GIT_PYTHON_GIT_EXECUTABLE'] = git_exe_path
os.environ['GIT_PYTHON_REFRESH'] = 'quiet'
import git
class GiteaManager:
"""Manager for Gitea repository operations"""
def __init__(self, base_url: str, username: str, password: str):
self.base_url = base_url.rstrip('/')
self.username = username
self.password = password
self.auth = (username, password)
self.session = requests.Session()
self.session.auth = self.auth
def create_repository(self, repo_name: str, description: str = "", private: bool = False) -> Dict[str, Any]:
"""Create a new repository on Gitea"""
url = f"{self.base_url}/api/v1/user/repos"
data = {
"name": repo_name,
"description": description,
"private": private,
"auto_init": False,
"gitignores": "Python",
"license": "mit",
"readme": "Default"
}
try:
response = self.session.post(url, json=data)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print(f"Error creating repository: {e}")
if hasattr(e, 'response') and e.response is not None:
print(f"Response: {e.response.text}")
raise
def get_repository(self, repo_name: str) -> Optional[Dict[str, Any]]:
"""Get repository information"""
url = f"{self.base_url}/api/v1/repos/{self.username}/{repo_name}"
try:
response = self.session.get(url)
if response.status_code == 200:
return response.json()
return None
except requests.exceptions.RequestException as e:
print(f"Error getting repository: {e}")
return None
def delete_repository(self, repo_name: str) -> bool:
"""Delete a repository"""
url = f"{self.base_url}/api/v1/repos/{self.username}/{repo_name}"
try:
response = self.session.delete(url)
return response.status_code == 204
except requests.exceptions.RequestException as e:
print(f"Error deleting repository: {e}")
return False
class GitAutoCommit:
"""Auto-commit functionality for major changes"""
def __init__(self, repo_path: str, remote_url: str, username: str, password: str):
self.repo_path = Path(repo_path)
self.remote_url = remote_url
self.username = username
self.password = password
# Build authenticated remote URL
if "://" in remote_url:
# Extract protocol and path
if remote_url.startswith("https://"):
self.auth_remote_url = f"https://{username}:{password}@{remote_url[8:]}"
else:
self.auth_remote_url = remote_url
else:
self.auth_remote_url = remote_url
self.repo = None
self.initialize_repo()
def initialize_repo(self):
"""Initialize or open the git repository"""
try:
self.repo = git.Repo(self.repo_path)
print(f"Opened existing repository at {self.repo_path}")
except git.exc.InvalidGitRepositoryError:
print(f"Initializing new repository at {self.repo_path}")
self.repo = git.Repo.init(self.repo_path)
# Create initial .gitignore
gitignore_content = """# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Virtual Environment
venv/
env/
ENV/
env.bak/
venv.bak/
# IDE
.vscode/
.idea/
*.swp
*.swo
*~
# OS
.DS_Store
Thumbs.db
# LightRAG specific
rag_storage/
inputs/__enqueued__/
extracted_images/
extracted_images_test/
openclip_env/
openclip_gpu_env/
*.log
stderr.txt
stdout.txt
"""
gitignore_path = self.repo_path / ".gitignore"
gitignore_path.write_text(gitignore_content, encoding='utf-8')
# Add remote
if self.remote_url:
try:
self.repo.create_remote('origin', self.auth_remote_url)
print(f"Added remote origin: {self.remote_url}")
except Exception as e:
print(f"Warning: Could not add remote: {e}")
def setup_remote(self):
"""Set up remote repository connection"""
if not self.remote_url:
print("No remote URL provided")
return False
try:
# Check if remote exists
if 'origin' in self.repo.remotes:
origin = self.repo.remotes.origin
if origin.url != self.auth_remote_url:
origin.set_url(self.auth_remote_url)
print(f"Updated remote origin URL to: {self.remote_url}")
else:
self.repo.create_remote('origin', self.auth_remote_url)
print(f"Created remote origin: {self.remote_url}")
return True
except Exception as e:
print(f"Error setting up remote: {e}")
return False
def commit_changes(self, message: str, files: List[str] = None):
"""Commit changes to the repository"""
try:
# Stage files
if files:
for file in files:
file_path = self.repo_path / file
if file_path.exists():
self.repo.index.add([str(file_path)])
else:
print(f"Warning: File {file} does not exist")
else:
# Stage all changes
self.repo.git.add(A=True)
# Check if there are changes to commit
if not self.repo.index.diff("HEAD") and not self.repo.untracked_files:
print("No changes to commit")
return False
# Commit
self.repo.index.commit(message)
print(f"Committed: {message}")
return True
except Exception as e:
print(f"Error committing changes: {e}")
return False
def push_changes(self, branch: str = "main"):
"""Push changes to remote repository"""
try:
if 'origin' not in self.repo.remotes:
print("No remote 'origin' configured")
return False
# Set upstream branch if not set
self.repo.git.push('--set-upstream', 'origin', branch)
print(f"Pushed changes to origin/{branch}")
return True
except Exception as e:
print(f"Error pushing changes: {e}")
return False
def auto_commit_major_changes(self, change_description: str):
"""Auto-commit major changes with descriptive message"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
message = f"{change_description}\n\nTimestamp: {timestamp}\nAuto-commit by GitAutoCommit"
if self.commit_changes(message):
if self.push_changes():
print(f"Successfully auto-committed and pushed: {change_description}")
return True
else:
print(f"Committed but failed to push: {change_description}")
return False
return False
def get_status(self):
"""Get repository status"""
try:
status = {
'branch': str(self.repo.active_branch),
'dirty': self.repo.is_dirty(),
'untracked_files': len(self.repo.untracked_files),
'ahead': len(list(self.repo.iter_commits(f"{self.repo.active_branch}..origin/{self.repo.active_branch}"))),
'behind': len(list(self.repo.iter_commits(f"origin/{self.repo.active_branch}..{self.repo.active_branch}"))),
}
return status
except Exception as e:
print(f"Error getting status: {e}")
return None
def setup_gitea_repository():
"""Main function to set up Gitea repository and auto-commit"""
# Configuration
GITEA_URL = "https://git.mtrcompute.com"
USERNAME = "jleu3482"
PASSWORD = "jleu1212"
REPO_NAME = "lightrag-project"
REPO_DESCRIPTION = "LightRAG - GPU-accelerated RAG system with OCR and image classification"
PROJECT_PATH = os.path.dirname(os.path.abspath(__file__))
print("=" * 60)
print("Gitea Repository Setup for LightRAG Project")
print("=" * 60)
print(f"Gitea URL: {GITEA_URL}")
print(f"Username: {USERNAME}")
print(f"Project Path: {PROJECT_PATH}")
print(f"Repository Name: {REPO_NAME}")
print()
# Step 1: Create repository on Gitea
print("Step 1: Creating repository on Gitea...")
gitea = GiteaManager(GITEA_URL, USERNAME, PASSWORD)
# Check if repository already exists
existing_repo = gitea.get_repository(REPO_NAME)
if existing_repo:
print(f"Repository already exists: {existing_repo['html_url']}")
repo_url = existing_repo['clone_url']
else:
try:
repo = gitea.create_repository(REPO_NAME, REPO_DESCRIPTION, private=False)
print(f"Repository created successfully: {repo['html_url']}")
repo_url = repo['clone_url']
except Exception as e:
print(f"Failed to create repository: {e}")
print("Continuing with local repository only...")
repo_url = None
# Step 2: Initialize local git repository with auto-commit
print("\nStep 2: Initializing local git repository...")
git_manager = GitAutoCommit(PROJECT_PATH, repo_url, USERNAME, PASSWORD)
# Step 3: Set up remote
if repo_url:
if git_manager.setup_remote():
print("Remote repository configured successfully")
else:
print("Warning: Could not configure remote repository")
# Step 4: Initial commit
print("\nStep 3: Making initial commit...")
initial_message = "Initial commit: LightRAG project setup\n\nIncludes:\n- LightRAG main codebase\n- GPU-accelerated OCR pipeline\n- Image classification system\n- Web UI with document download functionality\n- API endpoints for search and document management"
if git_manager.commit_changes(initial_message):
print("Initial commit successful")
# Push to remote if configured
if repo_url and 'origin' in git_manager.repo.remotes:
if git_manager.push_changes():
print("Initial commit pushed to remote repository")
else:
print("Warning: Could not push initial commit to remote")
else:
print("No changes for initial commit (repository may already be initialized)")
# Step 5: Create auto-commit configuration
print("\nStep 4: Creating auto-commit configuration...")
config = {
'gitea_url': GITEA_URL,
'username': USERNAME,
'repository': REPO_NAME,
'project_path': PROJECT_PATH,
'auto_commit_enabled': True,
'major_change_patterns': [
'*.py', # Python source changes
'*.html', # Web UI changes
'*.js', # JavaScript changes
'*.md', # Documentation changes
'requirements.txt', # Dependency changes
'*.json', # Configuration changes
]
}
config_path = os.path.join(PROJECT_PATH, 'git_auto_commit.json')
with open(config_path, 'w', encoding='utf-8') as f:
json.dump(config, f, indent=2)
print(f"Auto-commit configuration saved to: {config_path}")
# Step 6: Create helper script for auto-committing
print("\nStep 5: Creating auto-commit helper script...")
helper_script = """#!/usr/bin/env python3
"""
helper_script += f'''
"""
Auto-commit helper for LightRAG project.
Usage: python git_auto_commit.py "Description of changes"
"""
import sys
import json
import os
from pathlib import Path
def auto_commit(description):
"""Perform auto-commit with given description"""
config_path = Path(__file__).parent / 'git_auto_commit.json'
if not config_path.exists():
print("Error: git_auto_commit.json not found")
return False
with open(config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
# Import git manager
sys.path.insert(0, str(Path(__file__).parent))
try:
from git_setup import GitAutoCommit
except ImportError:
print("Error: Could not import GitAutoCommit from git_setup.py")
return False
# Initialize git manager
git_manager = GitAutoCommit(
config['project_path'],
f"{config['gitea_url']}/{config['username']}/{config['repository']}.git",
config['username'],
'' # Password would need to be provided securely
)
# Perform auto-commit
return git_manager.auto_commit_major_changes(description)
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python git_auto_commit.py \"Description of changes\"")
sys.exit(1)
description = sys.argv[1]
if auto_commit(description):
print("Auto-commit completed successfully")
sys.exit(0)
else:
print("Auto-commit failed")
sys.exit(1)
'''
helper_path = os.path.join(PROJECT_PATH, 'git_auto_commit.py')
with open(helper_path, 'w', encoding='utf-8') as f:
f.write(helper_script)
print(f"Auto-commit helper script created: {helper_path}")
# Step 7: Show repository status
print("\nStep 6: Repository status:")
status = git_manager.get_status()
if status:
print(f" Branch: {status['branch']}")
print(f" Dirty: {status['dirty']}")
print(f" Untracked files: {status['untracked_files']}")
print(f" Ahead of remote: {status['ahead']}")
print(f" Behind remote: {status['behind']}")
print("\n" + "=" * 60)
print("Setup completed successfully!")
print("=" * 60)
print("\nNext steps:")
print("1. Use 'python git_auto_commit.py \"Description of changes\"' to commit major changes")
print("2. The system will automatically commit and push changes to Gitea")
print("3. Monitor your repository at: https://git.mtrcompute.com/jleu3482/lightrag-project")
print("\nExample usage after making changes:")
print(' python git_auto_commit.py "Added document download endpoint to API"')
print(' python git_auto_commit.py "Updated web UI with clickable document references"')
print(' python git_auto_commit.py "Fixed OCR processing pipeline for PDF files"')
return git_manager
if __name__ == "__main__":
try:
git_manager = setup_gitea_repository()
# Test auto-commit with current changes
print("\n" + "=" * 60)
print("Testing auto-commit with current changes...")
print("=" * 60)
test_message = "Setup Git repository and auto-commit functionality\n\nChanges include:\n- Created git_setup.py for Gitea integration\n- Added document download endpoint to LightRAG API\n- Updated web UI with clickable document references\n- Implemented auto-commit system for major changes"
if git_manager.auto_commit_major_changes(test_message):
print("\n✓ Auto-commit test successful!")
print("✓ Repository setup complete!")
print("✓ All changes committed and pushed to Gitea")
else:
print("\n⚠ Auto-commit test completed with warnings")
print("⚠ Some changes may not have been pushed")
print("⚠ Check repository status manually")
except Exception as e:
print(f"\n✗ Setup failed with error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)