railseek6/simple_api_test.py

#!/usr/bin/env python3
"""
Simple API test for LightRAG workflow.
Tests server startup, login, document status, and search without Selenium.
"""

import os
import sys
import time
import subprocess
import requests
import json

# Configuration
SERVER_URL = "http://localhost:3015"
USERNAME = "jleu3482"
PASSWORD = "jleu1212"
WORKSPACE_DIR = "c:/aaWORK/railseek6"

def kill_existing_server():
    """Kill any existing server on port 3015"""
    print("Killing existing server processes...")
    try:
        # Find and kill processes using port 3015
        subprocess.run(["netstat", "-ano"], capture_output=True, text=True)
        subprocess.run(["taskkill", "/F", "/IM", "python.exe"], capture_output=True)
        time.sleep(2)
    except:
        pass

def start_server():
    """Start LightRAG server"""
    print("Starting LightRAG server...")

    # Kill any existing server first
    kill_existing_server()

    # Start server using the fixed Python script
    cmd = [sys.executable, "start_server_fixed.py"]
    process = subprocess.Popen(
        cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        encoding='utf-8',
        bufsize=1,
        universal_newlines=True
    )

    # Wait for server to start
    print("Waiting for server to start...")
    for i in range(30):  # Wait up to 30 seconds
        try:
            response = requests.get(f"{SERVER_URL}/health", timeout=5)
            if response.status_code == 200:
                print(f"✓ Server started successfully (attempt {i+1})")
                # Read initial output
                try:
                    output, _ = process.communicate(timeout=0.1)
                    if output:
                        print("Server output snippet:")
                        for line in output.split('\n')[:20]:
                            if line.strip():
                                print(f"  {line[:100]}")
                except:
                    pass
                return process
        except:
            pass
        time.sleep(1)

    print("✗ Server failed to start within 30 seconds")
    if process:
        process.terminate()
    return None

def check_server_health():
    """Check if server is healthy"""
    try:
        response = requests.get(f"{SERVER_URL}/health", timeout=10)
        if response.status_code == 200:
            data = response.json()
            print(f"✓ Server health: {data.get('status', 'unknown')}")
            print(f"  Auth mode: {data.get('auth_mode', 'unknown')}")
            print(f"  LLM: {data.get('configuration', {}).get('llm_binding', 'unknown')} / {data.get('configuration', {}).get('llm_model', 'unknown')}")
            print(f"  Embedding: {data.get('configuration', {}).get('embedding_binding', 'unknown')}")
            return True, data
    except Exception as e:
        print(f"✗ Health check failed: {e}")
    return False, None

def test_login():
    """Test login via API"""
    print("Testing login...")

    try:
        # First check auth status
        response = requests.get(f"{SERVER_URL}/auth-status", timeout=10)
        if response.status_code == 200:
            auth_status = response.json()
            print(f"  Auth configured: {auth_status.get('auth_configured', 'unknown')}")

            if auth_status.get('auth_configured'):
                # Try to login
                form_data = {
                    "username": USERNAME,
                    "password": PASSWORD
                }
                response = requests.post(f"{SERVER_URL}/login", data=form_data, timeout=10)
                if response.status_code == 200:
                    token_data = response.json()
                    print(f"✓ Login successful")
                    print(f"  Auth mode: {token_data.get('auth_mode', 'unknown')}")
                    return True, token_data.get('access_token')
                else:
                    print(f"✗ Login failed: {response.status_code}")
                    return False, None
            else:
                print("✓ Auth not configured (guest access enabled)")
                return True, None
    except Exception as e:
        print(f"✗ Login test error: {e}")

    return False, None

def test_endpoints():
    """Test various API endpoints"""
    print("Testing API endpoints...")

    endpoints = [
        ("/health", "GET"),
        ("/auth-status", "GET"),
        ("/api/documents", "GET"),
        ("/api/workspaces", "GET"),
        ("/api/query", "POST"),  # Will test with dummy query
    ]

    working_endpoints = []
    for endpoint, method in endpoints:
        try:
            if method == "GET":
                response = requests.get(f"{SERVER_URL}{endpoint}", timeout=10)
            else:
                # For POST to /api/query, send a simple test query
                if endpoint == "/api/query":
                    response = requests.post(
                        f"{SERVER_URL}{endpoint}",
                        json={"query": "test", "top_k": 1},
                        timeout=30
                    )
                else:
                    response = requests.post(f"{SERVER_URL}{endpoint}", timeout=10)

            if response.status_code in [200, 201]:
                print(f"✓ {endpoint}: {response.status_code}")
                working_endpoints.append(endpoint)
            else:
                print(f"✗ {endpoint}: {response.status_code} - {response.text[:100]}")

        except Exception as e:
            print(f"✗ {endpoint}: ERROR - {str(e)[:100]}")

    return len(working_endpoints) >= 3  # At least 3 endpoints should work

def check_documents():
    """Check existing documents"""
    print("Checking documents...")

    try:
        response = requests.get(f"{SERVER_URL}/api/documents", timeout=10)
        if response.status_code == 200:
            documents = response.json()
            print(f"✓ Found {len(documents)} documents")
            for doc in documents[:3]:  # Show first 3
                print(f"  - {doc.get('filename', 'unknown')}: {doc.get('status', 'unknown')}")
            return len(documents) > 0
        else:
            print(f"✗ Failed to get documents: {response.status_code}")
    except Exception as e:
        print(f"✗ Error checking documents: {e}")

    return False

def test_search():
    """Test search functionality"""
    print("Testing search...")

    test_queries = ["railway", "train", "transport", "test"]

    for query in test_queries:
        try:
            print(f"  Testing query: '{query}'")
            response = requests.post(
                f"{SERVER_URL}/api/query",
                json={"query": query, "top_k": 3},
                timeout=60  # Longer timeout for search
            )

            if response.status_code == 200:
                results = response.json()
                print(f"    ✓ Search successful: {len(results.get('results', []))} results")

                # Check for evidence of DeepSeek API usage
                if "llm_response" in results:
                    print(f"    ✓ DeepSeek API used (LLM response present)")
                    return True
                elif "results" in results and len(results["results"]) > 0:
                    print(f"    ✓ Search returned results (may be using cached/indexed data)")
                    return True
                else:
                    print(f"    ⚠ Search returned no results")
            else:
                print(f"    ✗ Search failed: {response.status_code} - {response.text[:100]}")

        except Exception as e:
            print(f"    ✗ Search error: {e}")

    return False

def check_logs_for_components():
    """Check server logs for evidence of indexing components"""
    print("Checking logs for indexing components...")

    log_file = os.path.join(WORKSPACE_DIR, "LightRAG-main", "logs", "lightrag.log")
    components_found = {
        "openclip": False,
        "paddleocr": False,
        "spacy": False,
        "deepseek": False
    }

    if os.path.exists(log_file):
        try:
            # Read last 1000 lines of log file
            with open(log_file, 'r', encoding='utf-8', errors='ignore') as f:
                lines = f.readlines()
                last_lines = lines[-1000:] if len(lines) > 1000 else lines
                log_content = "".join(last_lines).lower()

            # Check for component mentions
            components_found["openclip"] = "openclip" in log_content
            components_found["paddleocr"] = "paddleocr" in log_content or "ocr" in log_content
            components_found["spacy"] = "spacy" in log_content or "entity" in log_content
            components_found["deepseek"] = "deepseek" in log_content

            print("Components found in logs:")
            for component, found in components_found.items():
                print(f"  - {component}: {'✓' if found else '✗'}")

            return components_found
        except Exception as e:
            print(f"✗ Error reading log file: {e}")
    else:
        print(f"✗ Log file not found: {log_file}")

    return components_found

def main():
    """Main test function"""
    print("=" * 60)
    print("LightRAG API Test")
    print("=" * 60)

    # Change to workspace directory
    os.chdir(WORKSPACE_DIR)

    test_results = {}

    # Step 1: Start server
    server_process = start_server()
    test_results["server_started"] = server_process is not None

    if not test_results["server_started"]:
        print("\n✗ FAILED: Could not start server")
        return False

    # Give server time to fully initialize
    time.sleep(3)

    # Step 2: Check server health
    health_ok, health_data = check_server_health()
    test_results["health_check"] = health_ok

    # Step 3: Test login
    login_ok, token = test_login()
    test_results["login"] = login_ok

    # Step 4: Test endpoints
    test_results["endpoints"] = test_endpoints()

    # Step 5: Check documents
    test_results["documents_exist"] = check_documents()

    # Step 6: Check logs for indexing components
    components = check_logs_for_components()
    test_results["indexing_components"] = any(components.values())
    test_results.update({f"component_{k}": v for k, v in components.items()})

    # Step 7: Test search
    test_results["search_works"] = test_search()

    # Step 8: Cleanup
    print("\nCleaning up...")
    if server_process:
        server_process.terminate()
        try:
            server_process.wait(timeout=5)
        except:
            pass

    # Step 9: Report results
    print("\n" + "=" * 60)
    print("TEST SUMMARY")
    print("=" * 60)

    all_passed = True
    for test_name, result in test_results.items():
        if isinstance(result, bool):
            status = "PASS" if result else "FAIL"
            if not result:
                all_passed = False
            print(f"{test_name:30} {status}")
        else:
            print(f"{test_name:30} {result}")

    print("\n" + "=" * 60)

    # Generate detailed report
    report = {
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
        "overall_success": all_passed,
        "test_results": test_results,
        "server_config": health_data.get("configuration", {}) if health_data else {},
        "components_found": components
    }

    report_file = "lightrag_test_report.json"
    with open(report_file, 'w') as f:
        json.dump(report, f, indent=2)

    print(f"Detailed report saved to: {report_file}")

    if all_passed:
        print("✓ SUCCESS: All critical tests passed!")
        return True
    else:
        print("⚠ WARNING: Some tests failed or had issues")
        print("\nRoot cause analysis:")
        print("1. Server startup issues: Fixed Unicode encoding in display_splash_screen()")
        print("2. OllamaAPI error: Fixed WorkspaceManager/LightRAG type mismatch")
        print("3. WorkspaceManager bug: Fixed lightrag_factory.create() call")
        print("\nRemaining issues may require:")
        print("- Checking if OCR.pdf exists in test/ directory")
        print("- Ensuring DeepSeek API key is valid in .env file")
        print("- Verifying Ollama is running for embeddings")
        return False

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)