Files
railseek6/test_deepseek_retrieval.py

181 lines
6.1 KiB
Python

#!/usr/bin/env python3
"""
Test script to verify DeepSeek API retrieval functionality
"""
import requests
import json
import time
# Configuration
BASE_URL = "http://localhost:3015"
API_KEY = "jleu1212"
def test_health():
"""Test server health"""
print("🔍 Testing server health...")
try:
response = requests.get(f"{BASE_URL}/api/health")
if response.status_code == 200:
print("✅ Server is healthy")
return True
else:
print(f"❌ Server health check failed: {response.status_code}")
return False
except Exception as e:
print(f"❌ Server health check error: {e}")
return False
def test_search_with_deepseek():
"""Test search functionality with DeepSeek API"""
print("\n🔍 Testing DeepSeek API retrieval...")
test_queries = [
"railway transportation",
"table data",
"document processing",
"search functionality"
]
for query in test_queries:
print(f"\n Testing query: '{query}'")
try:
headers = {
'X-API-Key': API_KEY,
'Content-Type': 'application/json'
}
data = {
'query': query,
'top_k': 5,
'use_llm': True # Force LLM usage to test DeepSeek integration
}
print(f" Sending request to {BASE_URL}/api/search...")
response = requests.post(f"{BASE_URL}/api/search", headers=headers, json=data)
if response.status_code == 200:
results = response.json()
print(f" ✅ Search successful (Status: {response.status_code})")
# Check if we have results
if results.get('results'):
print(f" 📊 Found {len(results['results'])} results")
# Show first result details
first_result = results['results'][0]
print(f" 📄 First result:")
print(f" - Text: {first_result.get('text', '')[:100]}...")
print(f" - Score: {first_result.get('score', 'N/A')}")
print(f" - Source: {first_result.get('source', 'N/A')}")
# Check if LLM response is present (DeepSeek API working)
if results.get('llm_response'):
print(f" 🤖 DeepSeek LLM Response: {results['llm_response'][:200]}...")
print(" ✅ DeepSeek API is working for retrieval!")
else:
print(" ⚠️ No LLM response in results (vector search only)")
else:
print(f" ⚠️ No search results for query: '{query}'")
else:
print(f" ❌ Search failed: {response.status_code}")
print(f" Error details: {response.text}")
except Exception as e:
print(f" ❌ Search error: {e}")
def test_llm_only_query():
"""Test a query that would require LLM processing"""
print("\n🧠 Testing LLM-intensive query...")
query = "Summarize the main topics in the documents"
try:
headers = {
'X-API-Key': API_KEY,
'Content-Type': 'application/json'
}
data = {
'query': query,
'top_k': 3,
'use_llm': True
}
response = requests.post(f"{BASE_URL}/api/search", headers=headers, json=data)
if response.status_code == 200:
results = response.json()
print(" ✅ LLM query successful")
if results.get('llm_response'):
print(f" 🤖 DeepSeek Response:")
print(f" {results['llm_response']}")
return True
else:
print(" ❌ No LLM response received")
return False
else:
print(f" ❌ LLM query failed: {response.status_code}")
print(f" Error: {response.text}")
return False
except Exception as e:
print(f" ❌ LLM query error: {e}")
return False
def check_server_logs_for_errors():
"""Check if there are any DeepSeek API errors in server logs"""
print("\n📋 Checking for DeepSeek API errors...")
# The server logs should show DeepSeek API calls
print(" Check server terminal for DeepSeek API activity")
print(" Look for messages like:")
print(" - 'OpenAI LLM Options'")
print(" - 'DeepSeek API calls'")
print(" - Any authentication or API errors")
def main():
"""Run DeepSeek API retrieval tests"""
print("=" * 60)
print("🧪 DEEPSEEK API RETRIEVAL TEST")
print("=" * 60)
# Step 1: Check server health
if not test_health():
print("❌ Cannot proceed - server is not healthy")
return False
# Step 2: Test basic search with DeepSeek
test_search_with_deepseek()
# Step 3: Test LLM-intensive query
llm_working = test_llm_only_query()
# Step 4: Check for errors
check_server_logs_for_errors()
print("\n" + "=" * 60)
print("🎯 DEEPSEEK API RETRIEVAL STATUS")
print("=" * 60)
if llm_working:
print("✅ DeepSeek API is working for retrieval")
print("✅ LLM responses are being generated")
print("✅ Vector search is operational")
print("✅ Complete RAG pipeline is functional")
else:
print("⚠️ DeepSeek API may have issues")
print("⚠️ Check API key and connectivity")
print("✅ Vector search is working (fallback mode)")
print("\n📋 Next steps:")
print("1. Check server logs for DeepSeek API activity")
print("2. Verify API key in .env file")
print("3. Test with different document types")
print("4. Monitor response times and quality")
return llm_working
if __name__ == "__main__":
success = main()
exit(0 if success else 1)