398 lines
13 KiB
Python
398 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script for the complete workflow with clickable document references.
|
|
This tests:
|
|
1. Search API to get document references
|
|
2. Download endpoint functionality
|
|
3. Web UI integration
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
def test_search_api():
|
|
"""Test search API to get document references"""
|
|
print("Testing Search API...")
|
|
|
|
url = "http://localhost:3015/api/search"
|
|
headers = {
|
|
"Authorization": "Bearer jleu1212",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
# Test query
|
|
data = {
|
|
"query": "test document",
|
|
"top_k": 5,
|
|
"mode": "default"
|
|
}
|
|
|
|
try:
|
|
response = requests.post(url, headers=headers, json=data, timeout=10)
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
print(f"✓ Search API successful")
|
|
print(f" Found {len(result.get('results', []))} results")
|
|
|
|
# Extract document references
|
|
references = set()
|
|
for item in result.get('results', []):
|
|
metadata = item.get('metadata', {})
|
|
source = metadata.get('source')
|
|
if source:
|
|
references.add(source)
|
|
|
|
print(f" Unique document references: {len(references)}")
|
|
for ref in list(references)[:5]: # Show first 5
|
|
print(f" - {ref}")
|
|
|
|
return list(references)
|
|
else:
|
|
print(f"✗ Search API failed: {response.status_code}")
|
|
print(f" Response: {response.text}")
|
|
return []
|
|
except Exception as e:
|
|
print(f"✗ Search API error: {e}")
|
|
return []
|
|
|
|
def test_download_endpoint(filename):
|
|
"""Test download endpoint for a specific file"""
|
|
print(f"\nTesting Download Endpoint for: {filename}")
|
|
|
|
url = f"http://localhost:3015/api/documents/download/{filename}"
|
|
headers = {
|
|
"Authorization": "Bearer jleu1212"
|
|
}
|
|
|
|
try:
|
|
response = requests.get(url, headers=headers, timeout=10, stream=True)
|
|
if response.status_code == 200:
|
|
# Check content type
|
|
content_type = response.headers.get('content-type', '')
|
|
content_disposition = response.headers.get('content-disposition', '')
|
|
|
|
print(f"✓ Download endpoint successful")
|
|
print(f" Content-Type: {content_type}")
|
|
print(f" Content-Disposition: {content_disposition}")
|
|
print(f" Content-Length: {response.headers.get('content-length', 'unknown')}")
|
|
|
|
# Save a small sample to verify
|
|
test_dir = Path("download_test")
|
|
test_dir.mkdir(exist_ok=True)
|
|
|
|
sample_path = test_dir / f"sample_{filename}"
|
|
with open(sample_path, 'wb') as f:
|
|
# Read first 1024 bytes
|
|
chunk = next(response.iter_content(chunk_size=1024))
|
|
f.write(chunk)
|
|
|
|
print(f" Sample saved to: {sample_path}")
|
|
print(f" Sample size: {sample_path.stat().st_size} bytes")
|
|
return True
|
|
else:
|
|
print(f"✗ Download endpoint failed: {response.status_code}")
|
|
print(f" Response: {response.text[:200] if response.text else 'No response body'}")
|
|
return False
|
|
except Exception as e:
|
|
print(f"✗ Download endpoint error: {e}")
|
|
return False
|
|
|
|
def test_webui_integration():
|
|
"""Test Web UI integration by checking if clickable links are present"""
|
|
print("\nTesting Web UI Integration...")
|
|
|
|
# Check if web UI index.html has been updated
|
|
webui_path = Path("LightRAG-main/webui/index.html")
|
|
if not webui_path.exists():
|
|
print(f"✗ Web UI file not found: {webui_path}")
|
|
return False
|
|
|
|
try:
|
|
content = webui_path.read_text(encoding='utf-8')
|
|
|
|
# Check for clickable links in the displaySearchResults function
|
|
if 'downloadDocument' in content:
|
|
print("✓ downloadDocument function found in Web UI")
|
|
else:
|
|
print("✗ downloadDocument function not found in Web UI")
|
|
|
|
if 'href="http://localhost:3015/api/documents/download/' in content:
|
|
print("✓ Clickable download links found in Web UI")
|
|
else:
|
|
print("✗ Clickable download links not found in Web UI")
|
|
|
|
if 'References Section' in content or 'references-section' in content:
|
|
print("✓ References section found in Web UI")
|
|
else:
|
|
# Check for references container
|
|
if 'referencesContainer' in content or 'id="references"' in content:
|
|
print("✓ References container found in Web UI")
|
|
else:
|
|
print("✗ References section not found in Web UI")
|
|
|
|
return True
|
|
except Exception as e:
|
|
print(f"✗ Web UI check error: {e}")
|
|
return False
|
|
|
|
def test_complete_workflow():
|
|
"""Test the complete workflow"""
|
|
print("=" * 60)
|
|
print("Testing Complete Workflow with Clickable Document References")
|
|
print("=" * 60)
|
|
|
|
# Check if server is running
|
|
print("\n1. Checking if server is running...")
|
|
try:
|
|
response = requests.get("http://localhost:3015/", timeout=5)
|
|
if response.status_code in [200, 307]:
|
|
print("✓ Server is running on http://localhost:3015")
|
|
else:
|
|
print(f"✗ Server returned status: {response.status_code}")
|
|
return False
|
|
except Exception as e:
|
|
print(f"✗ Cannot connect to server: {e}")
|
|
print(" Make sure the server is running on port 3015")
|
|
return False
|
|
|
|
# Test Web UI updates
|
|
webui_ok = test_webui_integration()
|
|
|
|
# Test search API
|
|
references = test_search_api()
|
|
|
|
# Test download endpoint with a known file
|
|
if references:
|
|
# Try to download the first reference
|
|
test_file = references[0] if references else "ocr.pdf"
|
|
download_ok = test_download_endpoint(test_file)
|
|
else:
|
|
# Try with a known file
|
|
download_ok = test_download_endpoint("ocr.pdf")
|
|
|
|
# Summary
|
|
print("\n" + "=" * 60)
|
|
print("WORKFLOW TEST SUMMARY")
|
|
print("=" * 60)
|
|
|
|
all_tests_passed = webui_ok and (references or download_ok)
|
|
|
|
if all_tests_passed:
|
|
print("✓ All tests passed!")
|
|
print("\nThe workflow is working correctly:")
|
|
print("1. Web UI has been updated with clickable document references")
|
|
print("2. Search API returns document metadata with source information")
|
|
print("3. Download endpoint serves files correctly")
|
|
print("\nUsers can now:")
|
|
print("- Search for documents in the Web UI")
|
|
print("- See clickable document references in search results")
|
|
print("- Download original files by clicking the links")
|
|
else:
|
|
print("⚠ Some tests failed or had warnings")
|
|
print("\nIssues found:")
|
|
if not webui_ok:
|
|
print("- Web UI may not have been updated with clickable links")
|
|
if not references:
|
|
print("- Search API may not be returning document references")
|
|
if not download_ok:
|
|
print("- Download endpoint may not be working")
|
|
|
|
print("\nPossible solutions:")
|
|
print("1. Restart the LightRAG server to pick up new endpoint")
|
|
print("2. Check server logs for errors")
|
|
print("3. Verify the download endpoint is registered in document_routes.py")
|
|
|
|
return all_tests_passed
|
|
|
|
def create_gitea_repository_simple():
|
|
"""Create Gitea repository using API (simplified version)"""
|
|
print("\n" + "=" * 60)
|
|
print("Gitea Repository Setup (Simplified)")
|
|
print("=" * 60)
|
|
|
|
gitea_url = "https://git.mtrcompute.com"
|
|
username = "jleu3482"
|
|
password = "jleu1212"
|
|
repo_name = "lightrag-project"
|
|
|
|
print(f"Creating repository: {repo_name}")
|
|
print(f"URL: {gitea_url}")
|
|
print(f"Username: {username}")
|
|
|
|
# Create repository via API
|
|
api_url = f"{gitea_url}/api/v1/user/repos"
|
|
data = {
|
|
"name": repo_name,
|
|
"description": "LightRAG - GPU-accelerated RAG system with OCR and image classification",
|
|
"private": False,
|
|
"auto_init": True,
|
|
"gitignores": "Python",
|
|
"license": "mit",
|
|
"readme": "Default"
|
|
}
|
|
|
|
try:
|
|
response = requests.post(api_url, auth=(username, password), json=data, timeout=10)
|
|
if response.status_code == 201:
|
|
repo_info = response.json()
|
|
print(f"✓ Repository created successfully!")
|
|
print(f" URL: {repo_info.get('html_url')}")
|
|
print(f" Clone URL: {repo_info.get('clone_url')}")
|
|
|
|
# Create README with setup instructions
|
|
readme_content = f"""# LightRAG Project
|
|
|
|
GPU-accelerated RAG (Retrieval-Augmented Generation) system with OCR and image classification capabilities.
|
|
|
|
## Features
|
|
|
|
- **GPU-accelerated OCR**: Fast document processing with PaddleOCR
|
|
- **Image Classification**: OpenCLIP-based image analysis
|
|
- **Document Search**: Semantic search with vector embeddings
|
|
- **Web UI**: User-friendly interface with clickable document references
|
|
- **Auto-commit**: Automatic Git commits for major changes
|
|
|
|
## Setup
|
|
|
|
1. Clone the repository:
|
|
```bash
|
|
git clone {repo_info.get('clone_url')}
|
|
cd lightrag-project
|
|
```
|
|
|
|
2. Install dependencies:
|
|
```bash
|
|
pip install -r requirements.txt
|
|
```
|
|
|
|
3. Start the server:
|
|
```bash
|
|
python start_server_fixed.py
|
|
```
|
|
|
|
4. Access the Web UI:
|
|
- Open http://localhost:3015 in your browser
|
|
|
|
## Auto-commit System
|
|
|
|
The project includes an auto-commit system that automatically commits major changes:
|
|
- Use `python git_auto_commit.py "Description of changes"` to commit changes
|
|
- Changes are automatically pushed to this repository
|
|
|
|
## Recent Updates
|
|
|
|
- Added document download endpoint to API
|
|
- Updated Web UI with clickable document references
|
|
- Implemented auto-commit functionality for Git
|
|
- Fixed OCR processing pipeline for better performance
|
|
|
|
## API Documentation
|
|
|
|
### Search API
|
|
```
|
|
POST /api/search
|
|
Authorization: Bearer jleu1212
|
|
Content-Type: application/json
|
|
|
|
{{
|
|
"query": "search text",
|
|
"top_k": 5,
|
|
"mode": "default"
|
|
}}
|
|
```
|
|
|
|
### Document Download
|
|
```
|
|
GET /api/documents/download/{filename}
|
|
Authorization: Bearer jleu1212
|
|
```
|
|
|
|
Files are served from the `inputs/__enqueued__` directory.
|
|
"""
|
|
|
|
# Save README locally
|
|
readme_path = Path("README.md")
|
|
readme_path.write_text(readme_content, encoding='utf-8')
|
|
print(f"✓ README.md created with setup instructions")
|
|
|
|
return repo_info.get('clone_url')
|
|
elif response.status_code == 409:
|
|
print(f"⚠ Repository already exists")
|
|
# Get existing repository info
|
|
repo_url = f"{gitea_url}/api/v1/repos/{username}/{repo_name}"
|
|
repo_response = requests.get(repo_url, auth=(username, password), timeout=10)
|
|
if repo_response.status_code == 200:
|
|
repo_info = repo_response.json()
|
|
print(f" Existing repository: {repo_info.get('html_url')}")
|
|
return repo_info.get('clone_url')
|
|
return None
|
|
else:
|
|
print(f"✗ Failed to create repository: {response.status_code}")
|
|
print(f" Response: {response.text[:200]}")
|
|
return None
|
|
except Exception as e:
|
|
print(f"✗ Error creating repository: {e}")
|
|
return None
|
|
|
|
if __name__ == "__main__":
|
|
# Test the complete workflow
|
|
workflow_ok = test_complete_workflow()
|
|
|
|
# Create Gitea repository
|
|
print("\n" + "=" * 60)
|
|
print("Setting up Gitea Repository")
|
|
print("=" * 60)
|
|
|
|
clone_url = create_gitea_repository_simple()
|
|
|
|
if clone_url:
|
|
print(f"\n✓ Repository setup complete!")
|
|
print(f" Clone URL: {clone_url}")
|
|
print(f" Web URL: {clone_url.replace('.git', '').replace('git@', 'https://').replace('ssh://', 'https://')}")
|
|
|
|
# Create simple auto-commit instructions
|
|
instructions = f"""
|
|
## Next Steps for Git Setup:
|
|
|
|
1. Initialize local Git repository:
|
|
```
|
|
git init
|
|
git add .
|
|
git commit -m "Initial commit: LightRAG project with document download and auto-commit"
|
|
```
|
|
|
|
2. Add remote repository:
|
|
```
|
|
git remote add origin {clone_url}
|
|
```
|
|
|
|
3. Push to Gitea:
|
|
```
|
|
git push -u origin main
|
|
```
|
|
|
|
4. For auto-committing future changes:
|
|
```
|
|
python git_auto_commit.py "Description of changes made"
|
|
```
|
|
|
|
5. View your repository at:
|
|
{clone_url.replace('.git', '').replace('git@', 'https://').replace('ssh://', 'https://')}
|
|
"""
|
|
print(instructions)
|
|
|
|
# Save instructions to file
|
|
with open("GIT_SETUP_INSTRUCTIONS.txt", "w", encoding='utf-8') as f:
|
|
f.write(instructions)
|
|
print("✓ Git setup instructions saved to GIT_SETUP_INSTRUCTIONS.txt")
|
|
|
|
print("\n" + "=" * 60)
|
|
if workflow_ok:
|
|
print("✓ COMPLETE WORKFLOW TEST SUCCESSFUL!")
|
|
else:
|
|
print("⚠ WORKFLOW TEST COMPLETED WITH ISSUES")
|
|
print("=" * 60)
|
|
|
|
sys.exit(0 if workflow_ok else 1) |