Auto-commit: OCR workflow improvements, performance optimizations, and bug fixes
This commit is contained in:
360
simple_api_test.py
Normal file
360
simple_api_test.py
Normal file
@@ -0,0 +1,360 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple API test for LightRAG workflow.
|
||||
Tests server startup, login, document status, and search without Selenium.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import subprocess
|
||||
import requests
|
||||
import json
|
||||
|
||||
# Configuration
|
||||
SERVER_URL = "http://localhost:3015"
|
||||
USERNAME = "jleu3482"
|
||||
PASSWORD = "jleu1212"
|
||||
WORKSPACE_DIR = "c:/aaWORK/railseek6"
|
||||
|
||||
def kill_existing_server():
|
||||
"""Kill any existing server on port 3015"""
|
||||
print("Killing existing server processes...")
|
||||
try:
|
||||
# Find and kill processes using port 3015
|
||||
subprocess.run(["netstat", "-ano"], capture_output=True, text=True)
|
||||
subprocess.run(["taskkill", "/F", "/IM", "python.exe"], capture_output=True)
|
||||
time.sleep(2)
|
||||
except:
|
||||
pass
|
||||
|
||||
def start_server():
|
||||
"""Start LightRAG server"""
|
||||
print("Starting LightRAG server...")
|
||||
|
||||
# Kill any existing server first
|
||||
kill_existing_server()
|
||||
|
||||
# Start server using the fixed Python script
|
||||
cmd = [sys.executable, "start_server_fixed.py"]
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
encoding='utf-8',
|
||||
bufsize=1,
|
||||
universal_newlines=True
|
||||
)
|
||||
|
||||
# Wait for server to start
|
||||
print("Waiting for server to start...")
|
||||
for i in range(30): # Wait up to 30 seconds
|
||||
try:
|
||||
response = requests.get(f"{SERVER_URL}/health", timeout=5)
|
||||
if response.status_code == 200:
|
||||
print(f"✓ Server started successfully (attempt {i+1})")
|
||||
# Read initial output
|
||||
try:
|
||||
output, _ = process.communicate(timeout=0.1)
|
||||
if output:
|
||||
print("Server output snippet:")
|
||||
for line in output.split('\n')[:20]:
|
||||
if line.strip():
|
||||
print(f" {line[:100]}")
|
||||
except:
|
||||
pass
|
||||
return process
|
||||
except:
|
||||
pass
|
||||
time.sleep(1)
|
||||
|
||||
print("✗ Server failed to start within 30 seconds")
|
||||
if process:
|
||||
process.terminate()
|
||||
return None
|
||||
|
||||
def check_server_health():
|
||||
"""Check if server is healthy"""
|
||||
try:
|
||||
response = requests.get(f"{SERVER_URL}/health", timeout=10)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
print(f"✓ Server health: {data.get('status', 'unknown')}")
|
||||
print(f" Auth mode: {data.get('auth_mode', 'unknown')}")
|
||||
print(f" LLM: {data.get('configuration', {}).get('llm_binding', 'unknown')} / {data.get('configuration', {}).get('llm_model', 'unknown')}")
|
||||
print(f" Embedding: {data.get('configuration', {}).get('embedding_binding', 'unknown')}")
|
||||
return True, data
|
||||
except Exception as e:
|
||||
print(f"✗ Health check failed: {e}")
|
||||
return False, None
|
||||
|
||||
def test_login():
|
||||
"""Test login via API"""
|
||||
print("Testing login...")
|
||||
|
||||
try:
|
||||
# First check auth status
|
||||
response = requests.get(f"{SERVER_URL}/auth-status", timeout=10)
|
||||
if response.status_code == 200:
|
||||
auth_status = response.json()
|
||||
print(f" Auth configured: {auth_status.get('auth_configured', 'unknown')}")
|
||||
|
||||
if auth_status.get('auth_configured'):
|
||||
# Try to login
|
||||
form_data = {
|
||||
"username": USERNAME,
|
||||
"password": PASSWORD
|
||||
}
|
||||
response = requests.post(f"{SERVER_URL}/login", data=form_data, timeout=10)
|
||||
if response.status_code == 200:
|
||||
token_data = response.json()
|
||||
print(f"✓ Login successful")
|
||||
print(f" Auth mode: {token_data.get('auth_mode', 'unknown')}")
|
||||
return True, token_data.get('access_token')
|
||||
else:
|
||||
print(f"✗ Login failed: {response.status_code}")
|
||||
return False, None
|
||||
else:
|
||||
print("✓ Auth not configured (guest access enabled)")
|
||||
return True, None
|
||||
except Exception as e:
|
||||
print(f"✗ Login test error: {e}")
|
||||
|
||||
return False, None
|
||||
|
||||
def test_endpoints():
|
||||
"""Test various API endpoints"""
|
||||
print("Testing API endpoints...")
|
||||
|
||||
endpoints = [
|
||||
("/health", "GET"),
|
||||
("/auth-status", "GET"),
|
||||
("/api/documents", "GET"),
|
||||
("/api/workspaces", "GET"),
|
||||
("/api/query", "POST"), # Will test with dummy query
|
||||
]
|
||||
|
||||
working_endpoints = []
|
||||
for endpoint, method in endpoints:
|
||||
try:
|
||||
if method == "GET":
|
||||
response = requests.get(f"{SERVER_URL}{endpoint}", timeout=10)
|
||||
else:
|
||||
# For POST to /api/query, send a simple test query
|
||||
if endpoint == "/api/query":
|
||||
response = requests.post(
|
||||
f"{SERVER_URL}{endpoint}",
|
||||
json={"query": "test", "top_k": 1},
|
||||
timeout=30
|
||||
)
|
||||
else:
|
||||
response = requests.post(f"{SERVER_URL}{endpoint}", timeout=10)
|
||||
|
||||
if response.status_code in [200, 201]:
|
||||
print(f"✓ {endpoint}: {response.status_code}")
|
||||
working_endpoints.append(endpoint)
|
||||
else:
|
||||
print(f"✗ {endpoint}: {response.status_code} - {response.text[:100]}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ {endpoint}: ERROR - {str(e)[:100]}")
|
||||
|
||||
return len(working_endpoints) >= 3 # At least 3 endpoints should work
|
||||
|
||||
def check_documents():
|
||||
"""Check existing documents"""
|
||||
print("Checking documents...")
|
||||
|
||||
try:
|
||||
response = requests.get(f"{SERVER_URL}/api/documents", timeout=10)
|
||||
if response.status_code == 200:
|
||||
documents = response.json()
|
||||
print(f"✓ Found {len(documents)} documents")
|
||||
for doc in documents[:3]: # Show first 3
|
||||
print(f" - {doc.get('filename', 'unknown')}: {doc.get('status', 'unknown')}")
|
||||
return len(documents) > 0
|
||||
else:
|
||||
print(f"✗ Failed to get documents: {response.status_code}")
|
||||
except Exception as e:
|
||||
print(f"✗ Error checking documents: {e}")
|
||||
|
||||
return False
|
||||
|
||||
def test_search():
|
||||
"""Test search functionality"""
|
||||
print("Testing search...")
|
||||
|
||||
test_queries = ["railway", "train", "transport", "test"]
|
||||
|
||||
for query in test_queries:
|
||||
try:
|
||||
print(f" Testing query: '{query}'")
|
||||
response = requests.post(
|
||||
f"{SERVER_URL}/api/query",
|
||||
json={"query": query, "top_k": 3},
|
||||
timeout=60 # Longer timeout for search
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
results = response.json()
|
||||
print(f" ✓ Search successful: {len(results.get('results', []))} results")
|
||||
|
||||
# Check for evidence of DeepSeek API usage
|
||||
if "llm_response" in results:
|
||||
print(f" ✓ DeepSeek API used (LLM response present)")
|
||||
return True
|
||||
elif "results" in results and len(results["results"]) > 0:
|
||||
print(f" ✓ Search returned results (may be using cached/indexed data)")
|
||||
return True
|
||||
else:
|
||||
print(f" ⚠ Search returned no results")
|
||||
else:
|
||||
print(f" ✗ Search failed: {response.status_code} - {response.text[:100]}")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Search error: {e}")
|
||||
|
||||
return False
|
||||
|
||||
def check_logs_for_components():
|
||||
"""Check server logs for evidence of indexing components"""
|
||||
print("Checking logs for indexing components...")
|
||||
|
||||
log_file = os.path.join(WORKSPACE_DIR, "LightRAG-main", "logs", "lightrag.log")
|
||||
components_found = {
|
||||
"openclip": False,
|
||||
"paddleocr": False,
|
||||
"spacy": False,
|
||||
"deepseek": False
|
||||
}
|
||||
|
||||
if os.path.exists(log_file):
|
||||
try:
|
||||
# Read last 1000 lines of log file
|
||||
with open(log_file, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
lines = f.readlines()
|
||||
last_lines = lines[-1000:] if len(lines) > 1000 else lines
|
||||
log_content = "".join(last_lines).lower()
|
||||
|
||||
# Check for component mentions
|
||||
components_found["openclip"] = "openclip" in log_content
|
||||
components_found["paddleocr"] = "paddleocr" in log_content or "ocr" in log_content
|
||||
components_found["spacy"] = "spacy" in log_content or "entity" in log_content
|
||||
components_found["deepseek"] = "deepseek" in log_content
|
||||
|
||||
print("Components found in logs:")
|
||||
for component, found in components_found.items():
|
||||
print(f" - {component}: {'✓' if found else '✗'}")
|
||||
|
||||
return components_found
|
||||
except Exception as e:
|
||||
print(f"✗ Error reading log file: {e}")
|
||||
else:
|
||||
print(f"✗ Log file not found: {log_file}")
|
||||
|
||||
return components_found
|
||||
|
||||
def main():
|
||||
"""Main test function"""
|
||||
print("=" * 60)
|
||||
print("LightRAG API Test")
|
||||
print("=" * 60)
|
||||
|
||||
# Change to workspace directory
|
||||
os.chdir(WORKSPACE_DIR)
|
||||
|
||||
test_results = {}
|
||||
|
||||
# Step 1: Start server
|
||||
server_process = start_server()
|
||||
test_results["server_started"] = server_process is not None
|
||||
|
||||
if not test_results["server_started"]:
|
||||
print("\n✗ FAILED: Could not start server")
|
||||
return False
|
||||
|
||||
# Give server time to fully initialize
|
||||
time.sleep(3)
|
||||
|
||||
# Step 2: Check server health
|
||||
health_ok, health_data = check_server_health()
|
||||
test_results["health_check"] = health_ok
|
||||
|
||||
# Step 3: Test login
|
||||
login_ok, token = test_login()
|
||||
test_results["login"] = login_ok
|
||||
|
||||
# Step 4: Test endpoints
|
||||
test_results["endpoints"] = test_endpoints()
|
||||
|
||||
# Step 5: Check documents
|
||||
test_results["documents_exist"] = check_documents()
|
||||
|
||||
# Step 6: Check logs for indexing components
|
||||
components = check_logs_for_components()
|
||||
test_results["indexing_components"] = any(components.values())
|
||||
test_results.update({f"component_{k}": v for k, v in components.items()})
|
||||
|
||||
# Step 7: Test search
|
||||
test_results["search_works"] = test_search()
|
||||
|
||||
# Step 8: Cleanup
|
||||
print("\nCleaning up...")
|
||||
if server_process:
|
||||
server_process.terminate()
|
||||
try:
|
||||
server_process.wait(timeout=5)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Step 9: Report results
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST SUMMARY")
|
||||
print("=" * 60)
|
||||
|
||||
all_passed = True
|
||||
for test_name, result in test_results.items():
|
||||
if isinstance(result, bool):
|
||||
status = "PASS" if result else "FAIL"
|
||||
if not result:
|
||||
all_passed = False
|
||||
print(f"{test_name:30} {status}")
|
||||
else:
|
||||
print(f"{test_name:30} {result}")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
|
||||
# Generate detailed report
|
||||
report = {
|
||||
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"overall_success": all_passed,
|
||||
"test_results": test_results,
|
||||
"server_config": health_data.get("configuration", {}) if health_data else {},
|
||||
"components_found": components
|
||||
}
|
||||
|
||||
report_file = "lightrag_test_report.json"
|
||||
with open(report_file, 'w') as f:
|
||||
json.dump(report, f, indent=2)
|
||||
|
||||
print(f"Detailed report saved to: {report_file}")
|
||||
|
||||
if all_passed:
|
||||
print("✓ SUCCESS: All critical tests passed!")
|
||||
return True
|
||||
else:
|
||||
print("⚠ WARNING: Some tests failed or had issues")
|
||||
print("\nRoot cause analysis:")
|
||||
print("1. Server startup issues: Fixed Unicode encoding in display_splash_screen()")
|
||||
print("2. OllamaAPI error: Fixed WorkspaceManager/LightRAG type mismatch")
|
||||
print("3. WorkspaceManager bug: Fixed lightrag_factory.create() call")
|
||||
print("\nRemaining issues may require:")
|
||||
print("- Checking if OCR.pdf exists in test/ directory")
|
||||
print("- Ensuring DeepSeek API key is valid in .env file")
|
||||
print("- Verifying Ollama is running for embeddings")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = main()
|
||||
sys.exit(0 if success else 1)
|
||||
Reference in New Issue
Block a user