railseek6/test_deepseek_retrieval.py

#!/usr/bin/env python3
"""
Test script to verify DeepSeek API retrieval functionality
"""

import requests
import json
import time

# Configuration
BASE_URL = "http://localhost:3015"
API_KEY = "jleu1212"

def test_health():
    """Test server health"""
    print("🔍 Testing server health...")
    try:
        response = requests.get(f"{BASE_URL}/api/health")
        if response.status_code == 200:
            print("✅ Server is healthy")
            return True
        else:
            print(f"❌ Server health check failed: {response.status_code}")
            return False
    except Exception as e:
        print(f"❌ Server health check error: {e}")
        return False

def test_search_with_deepseek():
    """Test search functionality with DeepSeek API"""
    print("\n🔍 Testing DeepSeek API retrieval...")

    test_queries = [
        "railway transportation",
        "table data",
        "document processing",
        "search functionality"
    ]

    for query in test_queries:
        print(f"\n  Testing query: '{query}'")
        try:
            headers = {
                'X-API-Key': API_KEY,
                'Content-Type': 'application/json'
            }
            data = {
                'query': query,
                'top_k': 5,
                'use_llm': True  # Force LLM usage to test DeepSeek integration
            }

            print(f"    Sending request to {BASE_URL}/api/search...")
            response = requests.post(f"{BASE_URL}/api/search", headers=headers, json=data)

            if response.status_code == 200:
                results = response.json()
                print(f"    ✅ Search successful (Status: {response.status_code})")

                # Check if we have results
                if results.get('results'):
                    print(f"    📊 Found {len(results['results'])} results")

                    # Show first result details
                    first_result = results['results'][0]
                    print(f"    📄 First result:")
                    print(f"      - Text: {first_result.get('text', '')[:100]}...")
                    print(f"      - Score: {first_result.get('score', 'N/A')}")
                    print(f"      - Source: {first_result.get('source', 'N/A')}")

                    # Check if LLM response is present (DeepSeek API working)
                    if results.get('llm_response'):
                        print(f"    🤖 DeepSeek LLM Response: {results['llm_response'][:200]}...")
                        print("    ✅ DeepSeek API is working for retrieval!")
                    else:
                        print("    ⚠️ No LLM response in results (vector search only)")
                else:
                    print(f"    ⚠️ No search results for query: '{query}'")

            else:
                print(f"    ❌ Search failed: {response.status_code}")
                print(f"    Error details: {response.text}")

        except Exception as e:
            print(f"    ❌ Search error: {e}")

def test_llm_only_query():
    """Test a query that would require LLM processing"""
    print("\n🧠 Testing LLM-intensive query...")

    query = "Summarize the main topics in the documents"

    try:
        headers = {
            'X-API-Key': API_KEY,
            'Content-Type': 'application/json'
        }
        data = {
            'query': query,
            'top_k': 3,
            'use_llm': True
        }

        response = requests.post(f"{BASE_URL}/api/search", headers=headers, json=data)

        if response.status_code == 200:
            results = response.json()
            print("    ✅ LLM query successful")

            if results.get('llm_response'):
                print(f"    🤖 DeepSeek Response:")
                print(f"      {results['llm_response']}")
                return True
            else:
                print("    ❌ No LLM response received")
                return False
        else:
            print(f"    ❌ LLM query failed: {response.status_code}")
            print(f"    Error: {response.text}")
            return False

    except Exception as e:
        print(f"    ❌ LLM query error: {e}")
        return False

def check_server_logs_for_errors():
    """Check if there are any DeepSeek API errors in server logs"""
    print("\n📋 Checking for DeepSeek API errors...")

    # The server logs should show DeepSeek API calls
    print("    Check server terminal for DeepSeek API activity")
    print("    Look for messages like:")
    print("      - 'OpenAI LLM Options'")
    print("      - 'DeepSeek API calls'")
    print("      - Any authentication or API errors")

def main():
    """Run DeepSeek API retrieval tests"""
    print("=" * 60)
    print("🧪 DEEPSEEK API RETRIEVAL TEST")
    print("=" * 60)

    # Step 1: Check server health
    if not test_health():
        print("❌ Cannot proceed - server is not healthy")
        return False

    # Step 2: Test basic search with DeepSeek
    test_search_with_deepseek()

    # Step 3: Test LLM-intensive query
    llm_working = test_llm_only_query()

    # Step 4: Check for errors
    check_server_logs_for_errors()

    print("\n" + "=" * 60)
    print("🎯 DEEPSEEK API RETRIEVAL STATUS")
    print("=" * 60)

    if llm_working:
        print("✅ DeepSeek API is working for retrieval")
        print("✅ LLM responses are being generated")
        print("✅ Vector search is operational")
        print("✅ Complete RAG pipeline is functional")
    else:
        print("⚠️ DeepSeek API may have issues")
        print("⚠️ Check API key and connectivity")
        print("✅ Vector search is working (fallback mode)")

    print("\n📋 Next steps:")
    print("1. Check server logs for DeepSeek API activity")
    print("2. Verify API key in .env file")
    print("3. Test with different document types")
    print("4. Monitor response times and quality")

    return llm_working

if __name__ == "__main__":
    success = main()
    exit(0 if success else 1)