Files
railseek6/test_workflow.py

398 lines
13 KiB
Python

#!/usr/bin/env python3
"""
Test script for the complete workflow with clickable document references.
This tests:
1. Search API to get document references
2. Download endpoint functionality
3. Web UI integration
"""
import requests
import json
import sys
from pathlib import Path
def test_search_api():
"""Test search API to get document references"""
print("Testing Search API...")
url = "http://localhost:3015/api/search"
headers = {
"Authorization": "Bearer jleu1212",
"Content-Type": "application/json"
}
# Test query
data = {
"query": "test document",
"top_k": 5,
"mode": "default"
}
try:
response = requests.post(url, headers=headers, json=data, timeout=10)
if response.status_code == 200:
result = response.json()
print(f"✓ Search API successful")
print(f" Found {len(result.get('results', []))} results")
# Extract document references
references = set()
for item in result.get('results', []):
metadata = item.get('metadata', {})
source = metadata.get('source')
if source:
references.add(source)
print(f" Unique document references: {len(references)}")
for ref in list(references)[:5]: # Show first 5
print(f" - {ref}")
return list(references)
else:
print(f"✗ Search API failed: {response.status_code}")
print(f" Response: {response.text}")
return []
except Exception as e:
print(f"✗ Search API error: {e}")
return []
def test_download_endpoint(filename):
"""Test download endpoint for a specific file"""
print(f"\nTesting Download Endpoint for: {filename}")
url = f"http://localhost:3015/api/documents/download/{filename}"
headers = {
"Authorization": "Bearer jleu1212"
}
try:
response = requests.get(url, headers=headers, timeout=10, stream=True)
if response.status_code == 200:
# Check content type
content_type = response.headers.get('content-type', '')
content_disposition = response.headers.get('content-disposition', '')
print(f"✓ Download endpoint successful")
print(f" Content-Type: {content_type}")
print(f" Content-Disposition: {content_disposition}")
print(f" Content-Length: {response.headers.get('content-length', 'unknown')}")
# Save a small sample to verify
test_dir = Path("download_test")
test_dir.mkdir(exist_ok=True)
sample_path = test_dir / f"sample_{filename}"
with open(sample_path, 'wb') as f:
# Read first 1024 bytes
chunk = next(response.iter_content(chunk_size=1024))
f.write(chunk)
print(f" Sample saved to: {sample_path}")
print(f" Sample size: {sample_path.stat().st_size} bytes")
return True
else:
print(f"✗ Download endpoint failed: {response.status_code}")
print(f" Response: {response.text[:200] if response.text else 'No response body'}")
return False
except Exception as e:
print(f"✗ Download endpoint error: {e}")
return False
def test_webui_integration():
"""Test Web UI integration by checking if clickable links are present"""
print("\nTesting Web UI Integration...")
# Check if web UI index.html has been updated
webui_path = Path("LightRAG-main/webui/index.html")
if not webui_path.exists():
print(f"✗ Web UI file not found: {webui_path}")
return False
try:
content = webui_path.read_text(encoding='utf-8')
# Check for clickable links in the displaySearchResults function
if 'downloadDocument' in content:
print("✓ downloadDocument function found in Web UI")
else:
print("✗ downloadDocument function not found in Web UI")
if 'href="http://localhost:3015/api/documents/download/' in content:
print("✓ Clickable download links found in Web UI")
else:
print("✗ Clickable download links not found in Web UI")
if 'References Section' in content or 'references-section' in content:
print("✓ References section found in Web UI")
else:
# Check for references container
if 'referencesContainer' in content or 'id="references"' in content:
print("✓ References container found in Web UI")
else:
print("✗ References section not found in Web UI")
return True
except Exception as e:
print(f"✗ Web UI check error: {e}")
return False
def test_complete_workflow():
"""Test the complete workflow"""
print("=" * 60)
print("Testing Complete Workflow with Clickable Document References")
print("=" * 60)
# Check if server is running
print("\n1. Checking if server is running...")
try:
response = requests.get("http://localhost:3015/", timeout=5)
if response.status_code in [200, 307]:
print("✓ Server is running on http://localhost:3015")
else:
print(f"✗ Server returned status: {response.status_code}")
return False
except Exception as e:
print(f"✗ Cannot connect to server: {e}")
print(" Make sure the server is running on port 3015")
return False
# Test Web UI updates
webui_ok = test_webui_integration()
# Test search API
references = test_search_api()
# Test download endpoint with a known file
if references:
# Try to download the first reference
test_file = references[0] if references else "ocr.pdf"
download_ok = test_download_endpoint(test_file)
else:
# Try with a known file
download_ok = test_download_endpoint("ocr.pdf")
# Summary
print("\n" + "=" * 60)
print("WORKFLOW TEST SUMMARY")
print("=" * 60)
all_tests_passed = webui_ok and (references or download_ok)
if all_tests_passed:
print("✓ All tests passed!")
print("\nThe workflow is working correctly:")
print("1. Web UI has been updated with clickable document references")
print("2. Search API returns document metadata with source information")
print("3. Download endpoint serves files correctly")
print("\nUsers can now:")
print("- Search for documents in the Web UI")
print("- See clickable document references in search results")
print("- Download original files by clicking the links")
else:
print("⚠ Some tests failed or had warnings")
print("\nIssues found:")
if not webui_ok:
print("- Web UI may not have been updated with clickable links")
if not references:
print("- Search API may not be returning document references")
if not download_ok:
print("- Download endpoint may not be working")
print("\nPossible solutions:")
print("1. Restart the LightRAG server to pick up new endpoint")
print("2. Check server logs for errors")
print("3. Verify the download endpoint is registered in document_routes.py")
return all_tests_passed
def create_gitea_repository_simple():
"""Create Gitea repository using API (simplified version)"""
print("\n" + "=" * 60)
print("Gitea Repository Setup (Simplified)")
print("=" * 60)
gitea_url = "https://git.mtrcompute.com"
username = "jleu3482"
password = "jleu1212"
repo_name = "lightrag-project"
print(f"Creating repository: {repo_name}")
print(f"URL: {gitea_url}")
print(f"Username: {username}")
# Create repository via API
api_url = f"{gitea_url}/api/v1/user/repos"
data = {
"name": repo_name,
"description": "LightRAG - GPU-accelerated RAG system with OCR and image classification",
"private": False,
"auto_init": True,
"gitignores": "Python",
"license": "mit",
"readme": "Default"
}
try:
response = requests.post(api_url, auth=(username, password), json=data, timeout=10)
if response.status_code == 201:
repo_info = response.json()
print(f"✓ Repository created successfully!")
print(f" URL: {repo_info.get('html_url')}")
print(f" Clone URL: {repo_info.get('clone_url')}")
# Create README with setup instructions
readme_content = f"""# LightRAG Project
GPU-accelerated RAG (Retrieval-Augmented Generation) system with OCR and image classification capabilities.
## Features
- **GPU-accelerated OCR**: Fast document processing with PaddleOCR
- **Image Classification**: OpenCLIP-based image analysis
- **Document Search**: Semantic search with vector embeddings
- **Web UI**: User-friendly interface with clickable document references
- **Auto-commit**: Automatic Git commits for major changes
## Setup
1. Clone the repository:
```bash
git clone {repo_info.get('clone_url')}
cd lightrag-project
```
2. Install dependencies:
```bash
pip install -r requirements.txt
```
3. Start the server:
```bash
python start_server_fixed.py
```
4. Access the Web UI:
- Open http://localhost:3015 in your browser
## Auto-commit System
The project includes an auto-commit system that automatically commits major changes:
- Use `python git_auto_commit.py "Description of changes"` to commit changes
- Changes are automatically pushed to this repository
## Recent Updates
- Added document download endpoint to API
- Updated Web UI with clickable document references
- Implemented auto-commit functionality for Git
- Fixed OCR processing pipeline for better performance
## API Documentation
### Search API
```
POST /api/search
Authorization: Bearer jleu1212
Content-Type: application/json
{{
"query": "search text",
"top_k": 5,
"mode": "default"
}}
```
### Document Download
```
GET /api/documents/download/{filename}
Authorization: Bearer jleu1212
```
Files are served from the `inputs/__enqueued__` directory.
"""
# Save README locally
readme_path = Path("README.md")
readme_path.write_text(readme_content, encoding='utf-8')
print(f"✓ README.md created with setup instructions")
return repo_info.get('clone_url')
elif response.status_code == 409:
print(f"⚠ Repository already exists")
# Get existing repository info
repo_url = f"{gitea_url}/api/v1/repos/{username}/{repo_name}"
repo_response = requests.get(repo_url, auth=(username, password), timeout=10)
if repo_response.status_code == 200:
repo_info = repo_response.json()
print(f" Existing repository: {repo_info.get('html_url')}")
return repo_info.get('clone_url')
return None
else:
print(f"✗ Failed to create repository: {response.status_code}")
print(f" Response: {response.text[:200]}")
return None
except Exception as e:
print(f"✗ Error creating repository: {e}")
return None
if __name__ == "__main__":
# Test the complete workflow
workflow_ok = test_complete_workflow()
# Create Gitea repository
print("\n" + "=" * 60)
print("Setting up Gitea Repository")
print("=" * 60)
clone_url = create_gitea_repository_simple()
if clone_url:
print(f"\n✓ Repository setup complete!")
print(f" Clone URL: {clone_url}")
print(f" Web URL: {clone_url.replace('.git', '').replace('git@', 'https://').replace('ssh://', 'https://')}")
# Create simple auto-commit instructions
instructions = f"""
## Next Steps for Git Setup:
1. Initialize local Git repository:
```
git init
git add .
git commit -m "Initial commit: LightRAG project with document download and auto-commit"
```
2. Add remote repository:
```
git remote add origin {clone_url}
```
3. Push to Gitea:
```
git push -u origin main
```
4. For auto-committing future changes:
```
python git_auto_commit.py "Description of changes made"
```
5. View your repository at:
{clone_url.replace('.git', '').replace('git@', 'https://').replace('ssh://', 'https://')}
"""
print(instructions)
# Save instructions to file
with open("GIT_SETUP_INSTRUCTIONS.txt", "w", encoding='utf-8') as f:
f.write(instructions)
print("✓ Git setup instructions saved to GIT_SETUP_INSTRUCTIONS.txt")
print("\n" + "=" * 60)
if workflow_ok:
print("✓ COMPLETE WORKFLOW TEST SUCCESSFUL!")
else:
print("⚠ WORKFLOW TEST COMPLETED WITH ISSUES")
print("=" * 60)
sys.exit(0 if workflow_ok else 1)