#!/usr/bin/env python3 """ Comprehensive verification to know for sure if Ollama reranker was used """ import requests import time import json import sys def check_server_configuration(): """Check server startup configuration""" print("=== 1. SERVER CONFIGURATION CHECK ===") # Check what command the server was started with print("Checking server configuration files...") config_files = { "start_server.py": "LightRAG-main/start_server.py", "zrun.bat": "LightRAG-main/zrun.bat" } for name, path in config_files.items(): try: with open(path, 'r') as f: content = f.read() if '--rerank-binding ollama' in content: print(f"✅ {name}: Configured for Ollama rerank") elif '--rerank-binding jina' in content: print(f"❌ {name}: Still configured for Jina rerank") else: print(f"⚠️ {name}: No rerank binding found") except Exception as e: print(f"⚠️ {name}: Could not read ({e})") return True def check_ollama_logs(): """Check Ollama logs for rerank activity""" print("\n=== 2. OLLAMA LOGS CHECK ===") # Test if Ollama is responding to embedding requests test_payload = { "model": "jina-reranker-v2:latest", "prompt": "test query for verification" } try: print("Sending test embedding request to Ollama...") start_time = time.time() response = requests.post( "http://localhost:11434/api/embed", json=test_payload, timeout=10 ) end_time = time.time() if response.status_code == 200: print(f"✅ Ollama embedding API is working ({end_time-start_time:.2f}s)") result = response.json() embedding_len = len(result.get('embedding', [])) print(f" Embedding dimension: {embedding_len}") return True else: print(f"❌ Ollama returned status {response.status_code}") return False except Exception as e: print(f"❌ Cannot connect to Ollama: {e}") return False def check_lightrag_logs(): """Check LightRAG server logs for rerank activity""" print("\n=== 3. LIGHTRAG SERVER LOGS ===") # Check if server is running try: response = requests.get("http://localhost:3015/health", timeout=5) if response.status_code == 200: print("✅ LightRAG server is running") # Try to get server logs (if endpoint exists) try: logs_response = requests.get("http://localhost:3015/logs", timeout=5) if logs_response.status_code == 200: logs = logs_response.text if 'ollama' in logs.lower() or 'rerank' in logs.lower(): print("✅ Found rerank references in server logs") else: print("⚠️ No rerank references in logs (may be clean)") else: print("⚠️ Logs endpoint not available") except: print("⚠️ Could not access logs endpoint") return True else: print(f"❌ Server returned status {response.status_code}") return False except Exception as e: print(f"❌ Cannot connect to LightRAG server: {e}") return False def perform_live_rerank_test(): """Perform a live test to verify rerank is working""" print("\n=== 4. LIVE RERANK TEST ===") # Create a test query test_query = { "query": "artificial intelligence machine learning", "workspace": "default", "top_k": 3, "history_turns": 0, "enable_rerank": True # Ensure rerank is enabled } try: print("Sending query with rerank enabled...") start_time = time.time() response = requests.post( "http://localhost:3015/api/query", json=test_query, headers={"Content-Type": "application/json"}, timeout=30 ) end_time = time.time() if response.status_code == 200: result = response.json() print(f"✅ Query successful ({end_time-start_time:.2f}s)") # Check for rerank indicators chunks = result.get('chunks', []) reranked_chunks = result.get('reranked_chunks', []) if reranked_chunks: print(f"✅ Explicit reranked_chunks found: {len(reranked_chunks)}") # Check if they have scores if reranked_chunks and 'score' in reranked_chunks[0]: print(f"✅ Rerank scores present: {reranked_chunks[0]['score']}") return True elif chunks: print(f"✅ {len(chunks)} chunks returned") # Check if chunks are sorted by relevance (indicating rerank) if len(chunks) > 1 and 'score' in chunks[0]: scores = [c.get('score', 0) for c in chunks] if scores == sorted(scores, reverse=True): print("✅ Chunks are sorted by score (rerank likely used)") return True else: print("⚠️ Chunks not sorted by score") return False else: print("⚠️ No scores in chunks (rerank may not be used)") return False else: print("⚠️ No chunks in response (may be no documents)") return True else: print(f"❌ Query failed: {response.status_code}") print(f"Response: {response.text[:200]}") return False except Exception as e: print(f"❌ Error during live test: {e}") return False def check_with_and_without_rerank(): """Compare results with and without rerank""" print("\n=== 5. COMPARISON TEST (With vs Without Rerank) ===") test_cases = [ {"enable_rerank": True, "name": "WITH rerank"}, {"enable_rerank": False, "name": "WITHOUT rerank"} ] results = {} for test_case in test_cases: test_query = { "query": "test artificial intelligence", "workspace": "default", "top_k": 3, "history_turns": 0, "enable_rerank": test_case["enable_rerank"] } try: print(f"Testing {test_case['name']}...") start_time = time.time() response = requests.post( "http://localhost:3015/api/query", json=test_query, headers={"Content-Type": "application/json"}, timeout=30 ) end_time = time.time() if response.status_code == 200: result = response.json() chunks = result.get('chunks', []) results[test_case["name"]] = { "time": end_time - start_time, "chunk_count": len(chunks), "has_scores": bool(chunks and 'score' in chunks[0]) } print(f" ✅ {len(chunks)} chunks in {end_time-start_time:.2f}s") else: print(f" ❌ Failed: {response.status_code}") results[test_case["name"]] = {"error": response.status_code} except Exception as e: print(f" ❌ Error: {e}") results[test_case["name"]] = {"error": str(e)} # Compare results print("\n--- Comparison Results ---") if "WITH rerank" in results and "WITHOUT rerank" in results: with_rerank = results["WITH rerank"] without_rerank = results["WITHOUT rerank"] if "time" in with_rerank and "time" in without_rerank: time_diff = with_rerank["time"] - without_rerank["time"] if time_diff > 0.5: # Rerank should take noticeably longer print(f"✅ Rerank takes {time_diff:.2f}s longer (expected)") else: print(f"⚠️ Rerank time difference small: {time_diff:.2f}s") if with_rerank.get("has_scores", False) and not without_rerank.get("has_scores", False): print("✅ Scores only present WITH rerank (good indicator)") else: print("⚠️ Score presence doesn't differentiate") return True def monitor_ollama_activity(): """Monitor Ollama for real-time activity""" print("\n=== 6. REAL-TIME OLLAMA MONITORING ===") print("Monitoring Ollama activity for 10 seconds...") print("Perform a search in LightRAG UI now to see if Ollama is called.") # Get initial Ollama stats try: initial_response = requests.get("http://localhost:11434/api/version", timeout=5) if initial_response.status_code == 200: print(f"Ollama version: {initial_response.json().get('version', 'unknown')}") except: pass # Monitor for embedding calls print("Waiting for activity... (perform a search now)") # Simple monitoring by checking if Ollama responds to a quick test # In a real scenario, you'd check Ollama logs or metrics print("\nTo monitor Ollama usage in real-time:") print("1. Check Ollama logs: Look for 'embed' or 'jina-reranker' entries") print("2. Monitor GPU usage: nvidia-smi should show activity during searches") print("3. Check network traffic: Wireshark on port 11434") return True def create_definitive_verification_script(): """Create a script for ongoing verification""" print("\n=== 7. ONGOING VERIFICATION SCRIPT ===") script_content = '''#!/usr/bin/env python3 """ Quick verification that Ollama rerank is being used Run this after performing a search in LightRAG """ import requests import time def verify_ollama_rerank(): # 1. Check Ollama is reachable try: resp = requests.get("http://localhost:11434/api/tags", timeout=5) if "jina-reranker-v2" in resp.text: print("✅ Ollama has Jina rerank model") else: print("❌ Jina rerank model not found") except: print("❌ Cannot connect to Ollama") return False # 2. Perform a test query query = { "query": "test verification query", "workspace": "default", "top_k": 2, "enable_rerank": True } try: start = time.time() resp = requests.post("http://localhost:3015/api/query", json=query, timeout=30) elapsed = time.time() - start if resp.status_code == 200: data = resp.json() chunks = data.get('chunks', []) if chunks and len(chunks) > 0: if 'score' in chunks[0]: print(f"✅ Rerank used (scores present, took {elapsed:.2f}s)") print(f" Top score: {chunks[0].get('score', 'N/A')}") return True else: print(f"⚠️ No scores (rerank may not be used)") return False else: print("⚠️ No chunks returned") return False else: print(f"❌ Query failed: {resp.status_code}") return False except Exception as e: print(f"❌ Error: {e}") return False if __name__ == "__main__": verify_ollama_rerank() ''' with open("verify_rerank_quick.py", "w") as f: f.write(script_content) print("✅ Created quick verification script: verify_rerank_quick.py") print(" Run: python verify_rerank_quick.py") return True def main(): """Run all verification steps""" print("=" * 60) print("DEFINITIVE VERIFICATION: Is Ollama Rerank Being Used?") print("=" * 60) steps = [ ("Configuration Check", check_server_configuration), ("Ollama Logs", check_ollama_logs), ("LightRAG Logs", check_lightrag_logs), ("Live Rerank Test", perform_live_rerank_test), ("Comparison Test", check_with_and_without_rerank), ("Ollama Monitoring", monitor_ollama_activity), ("Create Verification Script", create_definitive_verification_script) ] results = [] for step_name, step_func in steps: print(f"\n{'='*40}") print(f"STEP: {step_name}") print(f"{'='*40}") try: result = step_func() results.append((step_name, result)) except Exception as e: print(f"Error in {step_name}: {e}") results.append((step_name, False)) # Summary print("\n" + "=" * 60) print("VERIFICATION SUMMARY") print("=" * 60) all_passed = True for step_name, passed in results: status = "✅ PASS" if passed else "⚠️ CHECK" if not passed: all_passed = False print(f"{step_name:30} {status}") print("\n" + "=" * 60) if all_passed: print("🎉 CONCLUSIVE: Ollama rerank IS being used") else: print("⚠️ INCONCLUSIVE: Some checks need attention") print("\n" + "=" * 60) print("DEFINITIVE WAYS TO KNOW:") print("1. ✅ Check server logs for 'ollama_rerank' calls") print("2. ✅ Monitor Ollama port 11434 for embedding requests") print("3. ✅ Check GPU usage (nvidia-smi) during searches") print("4. ✅ Compare query times with/without 'Enable rank'") print("5. ✅ Look for 'score' field in API responses") print("\nIMMEDIATE VERIFICATION:") print("Run the created script: python verify_rerank_quick.py") return 0 if all_passed else 1 if __name__ == "__main__": sys.exit(main())