#!/usr/bin/env python3 """ Test script for the complete workflow with clickable document references. This tests: 1. Search API to get document references 2. Download endpoint functionality 3. Web UI integration """ import requests import json import sys from pathlib import Path def test_search_api(): """Test search API to get document references""" print("Testing Search API...") url = "http://localhost:3015/api/search" headers = { "Authorization": "Bearer jleu1212", "Content-Type": "application/json" } # Test query data = { "query": "test document", "top_k": 5, "mode": "default" } try: response = requests.post(url, headers=headers, json=data, timeout=10) if response.status_code == 200: result = response.json() print(f"✓ Search API successful") print(f" Found {len(result.get('results', []))} results") # Extract document references references = set() for item in result.get('results', []): metadata = item.get('metadata', {}) source = metadata.get('source') if source: references.add(source) print(f" Unique document references: {len(references)}") for ref in list(references)[:5]: # Show first 5 print(f" - {ref}") return list(references) else: print(f"✗ Search API failed: {response.status_code}") print(f" Response: {response.text}") return [] except Exception as e: print(f"✗ Search API error: {e}") return [] def test_download_endpoint(filename): """Test download endpoint for a specific file""" print(f"\nTesting Download Endpoint for: {filename}") url = f"http://localhost:3015/api/documents/download/{filename}" headers = { "Authorization": "Bearer jleu1212" } try: response = requests.get(url, headers=headers, timeout=10, stream=True) if response.status_code == 200: # Check content type content_type = response.headers.get('content-type', '') content_disposition = response.headers.get('content-disposition', '') print(f"✓ Download endpoint successful") print(f" Content-Type: {content_type}") print(f" Content-Disposition: {content_disposition}") print(f" Content-Length: {response.headers.get('content-length', 'unknown')}") # Save a small sample to verify test_dir = Path("download_test") test_dir.mkdir(exist_ok=True) sample_path = test_dir / f"sample_{filename}" with open(sample_path, 'wb') as f: # Read first 1024 bytes chunk = next(response.iter_content(chunk_size=1024)) f.write(chunk) print(f" Sample saved to: {sample_path}") print(f" Sample size: {sample_path.stat().st_size} bytes") return True else: print(f"✗ Download endpoint failed: {response.status_code}") print(f" Response: {response.text[:200] if response.text else 'No response body'}") return False except Exception as e: print(f"✗ Download endpoint error: {e}") return False def test_webui_integration(): """Test Web UI integration by checking if clickable links are present""" print("\nTesting Web UI Integration...") # Check if web UI index.html has been updated webui_path = Path("LightRAG-main/webui/index.html") if not webui_path.exists(): print(f"✗ Web UI file not found: {webui_path}") return False try: content = webui_path.read_text(encoding='utf-8') # Check for clickable links in the displaySearchResults function if 'downloadDocument' in content: print("✓ downloadDocument function found in Web UI") else: print("✗ downloadDocument function not found in Web UI") if 'href="http://localhost:3015/api/documents/download/' in content: print("✓ Clickable download links found in Web UI") else: print("✗ Clickable download links not found in Web UI") if 'References Section' in content or 'references-section' in content: print("✓ References section found in Web UI") else: # Check for references container if 'referencesContainer' in content or 'id="references"' in content: print("✓ References container found in Web UI") else: print("✗ References section not found in Web UI") return True except Exception as e: print(f"✗ Web UI check error: {e}") return False def test_complete_workflow(): """Test the complete workflow""" print("=" * 60) print("Testing Complete Workflow with Clickable Document References") print("=" * 60) # Check if server is running print("\n1. Checking if server is running...") try: response = requests.get("http://localhost:3015/", timeout=5) if response.status_code in [200, 307]: print("✓ Server is running on http://localhost:3015") else: print(f"✗ Server returned status: {response.status_code}") return False except Exception as e: print(f"✗ Cannot connect to server: {e}") print(" Make sure the server is running on port 3015") return False # Test Web UI updates webui_ok = test_webui_integration() # Test search API references = test_search_api() # Test download endpoint with a known file if references: # Try to download the first reference test_file = references[0] if references else "ocr.pdf" download_ok = test_download_endpoint(test_file) else: # Try with a known file download_ok = test_download_endpoint("ocr.pdf") # Summary print("\n" + "=" * 60) print("WORKFLOW TEST SUMMARY") print("=" * 60) all_tests_passed = webui_ok and (references or download_ok) if all_tests_passed: print("✓ All tests passed!") print("\nThe workflow is working correctly:") print("1. Web UI has been updated with clickable document references") print("2. Search API returns document metadata with source information") print("3. Download endpoint serves files correctly") print("\nUsers can now:") print("- Search for documents in the Web UI") print("- See clickable document references in search results") print("- Download original files by clicking the links") else: print("⚠ Some tests failed or had warnings") print("\nIssues found:") if not webui_ok: print("- Web UI may not have been updated with clickable links") if not references: print("- Search API may not be returning document references") if not download_ok: print("- Download endpoint may not be working") print("\nPossible solutions:") print("1. Restart the LightRAG server to pick up new endpoint") print("2. Check server logs for errors") print("3. Verify the download endpoint is registered in document_routes.py") return all_tests_passed def create_gitea_repository_simple(): """Create Gitea repository using API (simplified version)""" print("\n" + "=" * 60) print("Gitea Repository Setup (Simplified)") print("=" * 60) gitea_url = "https://git.mtrcompute.com" username = "jleu3482" password = "jleu1212" repo_name = "lightrag-project" print(f"Creating repository: {repo_name}") print(f"URL: {gitea_url}") print(f"Username: {username}") # Create repository via API api_url = f"{gitea_url}/api/v1/user/repos" data = { "name": repo_name, "description": "LightRAG - GPU-accelerated RAG system with OCR and image classification", "private": False, "auto_init": True, "gitignores": "Python", "license": "mit", "readme": "Default" } try: response = requests.post(api_url, auth=(username, password), json=data, timeout=10) if response.status_code == 201: repo_info = response.json() print(f"✓ Repository created successfully!") print(f" URL: {repo_info.get('html_url')}") print(f" Clone URL: {repo_info.get('clone_url')}") # Create README with setup instructions readme_content = f"""# LightRAG Project GPU-accelerated RAG (Retrieval-Augmented Generation) system with OCR and image classification capabilities. ## Features - **GPU-accelerated OCR**: Fast document processing with PaddleOCR - **Image Classification**: OpenCLIP-based image analysis - **Document Search**: Semantic search with vector embeddings - **Web UI**: User-friendly interface with clickable document references - **Auto-commit**: Automatic Git commits for major changes ## Setup 1. Clone the repository: ```bash git clone {repo_info.get('clone_url')} cd lightrag-project ``` 2. Install dependencies: ```bash pip install -r requirements.txt ``` 3. Start the server: ```bash python start_server_fixed.py ``` 4. Access the Web UI: - Open http://localhost:3015 in your browser ## Auto-commit System The project includes an auto-commit system that automatically commits major changes: - Use `python git_auto_commit.py "Description of changes"` to commit changes - Changes are automatically pushed to this repository ## Recent Updates - Added document download endpoint to API - Updated Web UI with clickable document references - Implemented auto-commit functionality for Git - Fixed OCR processing pipeline for better performance ## API Documentation ### Search API ``` POST /api/search Authorization: Bearer jleu1212 Content-Type: application/json {{ "query": "search text", "top_k": 5, "mode": "default" }} ``` ### Document Download ``` GET /api/documents/download/{filename} Authorization: Bearer jleu1212 ``` Files are served from the `inputs/__enqueued__` directory. """ # Save README locally readme_path = Path("README.md") readme_path.write_text(readme_content, encoding='utf-8') print(f"✓ README.md created with setup instructions") return repo_info.get('clone_url') elif response.status_code == 409: print(f"⚠ Repository already exists") # Get existing repository info repo_url = f"{gitea_url}/api/v1/repos/{username}/{repo_name}" repo_response = requests.get(repo_url, auth=(username, password), timeout=10) if repo_response.status_code == 200: repo_info = repo_response.json() print(f" Existing repository: {repo_info.get('html_url')}") return repo_info.get('clone_url') return None else: print(f"✗ Failed to create repository: {response.status_code}") print(f" Response: {response.text[:200]}") return None except Exception as e: print(f"✗ Error creating repository: {e}") return None if __name__ == "__main__": # Test the complete workflow workflow_ok = test_complete_workflow() # Create Gitea repository print("\n" + "=" * 60) print("Setting up Gitea Repository") print("=" * 60) clone_url = create_gitea_repository_simple() if clone_url: print(f"\n✓ Repository setup complete!") print(f" Clone URL: {clone_url}") print(f" Web URL: {clone_url.replace('.git', '').replace('git@', 'https://').replace('ssh://', 'https://')}") # Create simple auto-commit instructions instructions = f""" ## Next Steps for Git Setup: 1. Initialize local Git repository: ``` git init git add . git commit -m "Initial commit: LightRAG project with document download and auto-commit" ``` 2. Add remote repository: ``` git remote add origin {clone_url} ``` 3. Push to Gitea: ``` git push -u origin main ``` 4. For auto-committing future changes: ``` python git_auto_commit.py "Description of changes made" ``` 5. View your repository at: {clone_url.replace('.git', '').replace('git@', 'https://').replace('ssh://', 'https://')} """ print(instructions) # Save instructions to file with open("GIT_SETUP_INSTRUCTIONS.txt", "w", encoding='utf-8') as f: f.write(instructions) print("✓ Git setup instructions saved to GIT_SETUP_INSTRUCTIONS.txt") print("\n" + "=" * 60) if workflow_ok: print("✓ COMPLETE WORKFLOW TEST SUCCESSFUL!") else: print("⚠ WORKFLOW TEST COMPLETED WITH ISSUES") print("=" * 60) sys.exit(0 if workflow_ok else 1)