jina rerank working

2026-01-13 09:51:35 +08:00
parent 370fe6368a
commit 9745ca2476
23 changed files with 1967 additions and 6 deletions
--- a/verify_ollama_rerank_usage.py
+++ b/verify_ollama_rerank_usage.py
@@ -0,0 +1,394 @@
+#!/usr/bin/env python3
+"""
+Comprehensive verification to know for sure if Ollama reranker was used
+"""
+import requests
+import time
+import json
+import sys
+
+def check_server_configuration():
+    """Check server startup configuration"""
+    print("=== 1. SERVER CONFIGURATION CHECK ===")
+    
+    # Check what command the server was started with
+    print("Checking server configuration files...")
+    
+    config_files = {
+        "start_server.py": "LightRAG-main/start_server.py",
+        "zrun.bat": "LightRAG-main/zrun.bat"
+    }
+    
+    for name, path in config_files.items():
+        try:
+            with open(path, 'r') as f:
+                content = f.read()
+                if '--rerank-binding ollama' in content:
+                    print(f"✅ {name}: Configured for Ollama rerank")
+                elif '--rerank-binding jina' in content:
+                    print(f"❌ {name}: Still configured for Jina rerank")
+                else:
+                    print(f"⚠️  {name}: No rerank binding found")
+        except Exception as e:
+            print(f"⚠️  {name}: Could not read ({e})")
+    
+    return True
+
+def check_ollama_logs():
+    """Check Ollama logs for rerank activity"""
+    print("\n=== 2. OLLAMA LOGS CHECK ===")
+    
+    # Test if Ollama is responding to embedding requests
+    test_payload = {
+        "model": "jina-reranker-v2:latest",
+        "prompt": "test query for verification"
+    }
+    
+    try:
+        print("Sending test embedding request to Ollama...")
+        start_time = time.time()
+        response = requests.post(
+            "http://localhost:11434/api/embed",
+            json=test_payload,
+            timeout=10
+        )
+        end_time = time.time()
+        
+        if response.status_code == 200:
+            print(f"✅ Ollama embedding API is working ({end_time-start_time:.2f}s)")
+            result = response.json()
+            embedding_len = len(result.get('embedding', []))
+            print(f"   Embedding dimension: {embedding_len}")
+            return True
+        else:
+            print(f"❌ Ollama returned status {response.status_code}")
+            return False
+    except Exception as e:
+        print(f"❌ Cannot connect to Ollama: {e}")
+        return False
+
+def check_lightrag_logs():
+    """Check LightRAG server logs for rerank activity"""
+    print("\n=== 3. LIGHTRAG SERVER LOGS ===")
+    
+    # Check if server is running
+    try:
+        response = requests.get("http://localhost:3015/health", timeout=5)
+        if response.status_code == 200:
+            print("✅ LightRAG server is running")
+            
+            # Try to get server logs (if endpoint exists)
+            try:
+                logs_response = requests.get("http://localhost:3015/logs", timeout=5)
+                if logs_response.status_code == 200:
+                    logs = logs_response.text
+                    if 'ollama' in logs.lower() or 'rerank' in logs.lower():
+                        print("✅ Found rerank references in server logs")
+                    else:
+                        print("⚠️  No rerank references in logs (may be clean)")
+                else:
+                    print("⚠️  Logs endpoint not available")
+            except:
+                print("⚠️  Could not access logs endpoint")
+            
+            return True
+        else:
+            print(f"❌ Server returned status {response.status_code}")
+            return False
+    except Exception as e:
+        print(f"❌ Cannot connect to LightRAG server: {e}")
+        return False
+
+def perform_live_rerank_test():
+    """Perform a live test to verify rerank is working"""
+    print("\n=== 4. LIVE RERANK TEST ===")
+    
+    # Create a test query
+    test_query = {
+        "query": "artificial intelligence machine learning",
+        "workspace": "default",
+        "top_k": 3,
+        "history_turns": 0,
+        "enable_rerank": True  # Ensure rerank is enabled
+    }
+    
+    try:
+        print("Sending query with rerank enabled...")
+        start_time = time.time()
+        response = requests.post(
+            "http://localhost:3015/api/query",
+            json=test_query,
+            headers={"Content-Type": "application/json"},
+            timeout=30
+        )
+        end_time = time.time()
+        
+        if response.status_code == 200:
+            result = response.json()
+            print(f"✅ Query successful ({end_time-start_time:.2f}s)")
+            
+            # Check for rerank indicators
+            chunks = result.get('chunks', [])
+            reranked_chunks = result.get('reranked_chunks', [])
+            
+            if reranked_chunks:
+                print(f"✅ Explicit reranked_chunks found: {len(reranked_chunks)}")
+                # Check if they have scores
+                if reranked_chunks and 'score' in reranked_chunks[0]:
+                    print(f"✅ Rerank scores present: {reranked_chunks[0]['score']}")
+                return True
+            elif chunks:
+                print(f"✅ {len(chunks)} chunks returned")
+                # Check if chunks are sorted by relevance (indicating rerank)
+                if len(chunks) > 1 and 'score' in chunks[0]:
+                    scores = [c.get('score', 0) for c in chunks]
+                    if scores == sorted(scores, reverse=True):
+                        print("✅ Chunks are sorted by score (rerank likely used)")
+                        return True
+                    else:
+                        print("⚠️  Chunks not sorted by score")
+                        return False
+                else:
+                    print("⚠️  No scores in chunks (rerank may not be used)")
+                    return False
+            else:
+                print("⚠️  No chunks in response (may be no documents)")
+                return True
+        else:
+            print(f"❌ Query failed: {response.status_code}")
+            print(f"Response: {response.text[:200]}")
+            return False
+            
+    except Exception as e:
+        print(f"❌ Error during live test: {e}")
+        return False
+
+def check_with_and_without_rerank():
+    """Compare results with and without rerank"""
+    print("\n=== 5. COMPARISON TEST (With vs Without Rerank) ===")
+    
+    test_cases = [
+        {"enable_rerank": True, "name": "WITH rerank"},
+        {"enable_rerank": False, "name": "WITHOUT rerank"}
+    ]
+    
+    results = {}
+    
+    for test_case in test_cases:
+        test_query = {
+            "query": "test artificial intelligence",
+            "workspace": "default", 
+            "top_k": 3,
+            "history_turns": 0,
+            "enable_rerank": test_case["enable_rerank"]
+        }
+        
+        try:
+            print(f"Testing {test_case['name']}...")
+            start_time = time.time()
+            response = requests.post(
+                "http://localhost:3015/api/query",
+                json=test_query,
+                headers={"Content-Type": "application/json"},
+                timeout=30
+            )
+            end_time = time.time()
+            
+            if response.status_code == 200:
+                result = response.json()
+                chunks = result.get('chunks', [])
+                results[test_case["name"]] = {
+                    "time": end_time - start_time,
+                    "chunk_count": len(chunks),
+                    "has_scores": bool(chunks and 'score' in chunks[0])
+                }
+                print(f"  ✅ {len(chunks)} chunks in {end_time-start_time:.2f}s")
+            else:
+                print(f"  ❌ Failed: {response.status_code}")
+                results[test_case["name"]] = {"error": response.status_code}
+                
+        except Exception as e:
+            print(f"  ❌ Error: {e}")
+            results[test_case["name"]] = {"error": str(e)}
+    
+    # Compare results
+    print("\n--- Comparison Results ---")
+    if "WITH rerank" in results and "WITHOUT rerank" in results:
+        with_rerank = results["WITH rerank"]
+        without_rerank = results["WITHOUT rerank"]
+        
+        if "time" in with_rerank and "time" in without_rerank:
+            time_diff = with_rerank["time"] - without_rerank["time"]
+            if time_diff > 0.5:  # Rerank should take noticeably longer
+                print(f"✅ Rerank takes {time_diff:.2f}s longer (expected)")
+            else:
+                print(f"⚠️  Rerank time difference small: {time_diff:.2f}s")
+        
+        if with_rerank.get("has_scores", False) and not without_rerank.get("has_scores", False):
+            print("✅ Scores only present WITH rerank (good indicator)")
+        else:
+            print("⚠️  Score presence doesn't differentiate")
+    
+    return True
+
+def monitor_ollama_activity():
+    """Monitor Ollama for real-time activity"""
+    print("\n=== 6. REAL-TIME OLLAMA MONITORING ===")
+    
+    print("Monitoring Ollama activity for 10 seconds...")
+    print("Perform a search in LightRAG UI now to see if Ollama is called.")
+    
+    # Get initial Ollama stats
+    try:
+        initial_response = requests.get("http://localhost:11434/api/version", timeout=5)
+        if initial_response.status_code == 200:
+            print(f"Ollama version: {initial_response.json().get('version', 'unknown')}")
+    except:
+        pass
+    
+    # Monitor for embedding calls
+    print("Waiting for activity... (perform a search now)")
+    
+    # Simple monitoring by checking if Ollama responds to a quick test
+    # In a real scenario, you'd check Ollama logs or metrics
+    print("\nTo monitor Ollama usage in real-time:")
+    print("1. Check Ollama logs: Look for 'embed' or 'jina-reranker' entries")
+    print("2. Monitor GPU usage: nvidia-smi should show activity during searches")
+    print("3. Check network traffic: Wireshark on port 11434")
+    
+    return True
+
+def create_definitive_verification_script():
+    """Create a script for ongoing verification"""
+    print("\n=== 7. ONGOING VERIFICATION SCRIPT ===")
+    
+    script_content = '''#!/usr/bin/env python3
+"""
+Quick verification that Ollama rerank is being used
+Run this after performing a search in LightRAG
+"""
+import requests
+import time
+
+def verify_ollama_rerank():
+    # 1. Check Ollama is reachable
+    try:
+        resp = requests.get("http://localhost:11434/api/tags", timeout=5)
+        if "jina-reranker-v2" in resp.text:
+            print("✅ Ollama has Jina rerank model")
+        else:
+            print("❌ Jina rerank model not found")
+    except:
+        print("❌ Cannot connect to Ollama")
+        return False
+    
+    # 2. Perform a test query
+    query = {
+        "query": "test verification query",
+        "workspace": "default",
+        "top_k": 2,
+        "enable_rerank": True
+    }
+    
+    try:
+        start = time.time()
+        resp = requests.post("http://localhost:3015/api/query", 
+                           json=query, timeout=30)
+        elapsed = time.time() - start
+        
+        if resp.status_code == 200:
+            data = resp.json()
+            chunks = data.get('chunks', [])
+            
+            if chunks and len(chunks) > 0:
+                if 'score' in chunks[0]:
+                    print(f"✅ Rerank used (scores present, took {elapsed:.2f}s)")
+                    print(f"   Top score: {chunks[0].get('score', 'N/A')}")
+                    return True
+                else:
+                    print(f"⚠️  No scores (rerank may not be used)")
+                    return False
+            else:
+                print("⚠️  No chunks returned")
+                return False
+        else:
+            print(f"❌ Query failed: {resp.status_code}")
+            return False
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        return False
+
+if __name__ == "__main__":
+    verify_ollama_rerank()
+'''
+    
+    with open("verify_rerank_quick.py", "w") as f:
+        f.write(script_content)
+    
+    print("✅ Created quick verification script: verify_rerank_quick.py")
+    print("   Run: python verify_rerank_quick.py")
+    
+    return True
+
+def main():
+    """Run all verification steps"""
+    print("=" * 60)
+    print("DEFINITIVE VERIFICATION: Is Ollama Rerank Being Used?")
+    print("=" * 60)
+    
+    steps = [
+        ("Configuration Check", check_server_configuration),
+        ("Ollama Logs", check_ollama_logs),
+        ("LightRAG Logs", check_lightrag_logs),
+        ("Live Rerank Test", perform_live_rerank_test),
+        ("Comparison Test", check_with_and_without_rerank),
+        ("Ollama Monitoring", monitor_ollama_activity),
+        ("Create Verification Script", create_definitive_verification_script)
+    ]
+    
+    results = []
+    
+    for step_name, step_func in steps:
+        print(f"\n{'='*40}")
+        print(f"STEP: {step_name}")
+        print(f"{'='*40}")
+        try:
+            result = step_func()
+            results.append((step_name, result))
+        except Exception as e:
+            print(f"Error in {step_name}: {e}")
+            results.append((step_name, False))
+    
+    # Summary
+    print("\n" + "=" * 60)
+    print("VERIFICATION SUMMARY")
+    print("=" * 60)
+    
+    all_passed = True
+    for step_name, passed in results:
+        status = "✅ PASS" if passed else "⚠️  CHECK"
+        if not passed:
+            all_passed = False
+        print(f"{step_name:30} {status}")
+    
+    print("\n" + "=" * 60)
+    if all_passed:
+        print("🎉 CONCLUSIVE: Ollama rerank IS being used")
+    else:
+        print("⚠️  INCONCLUSIVE: Some checks need attention")
+    
+    print("\n" + "=" * 60)
+    print("DEFINITIVE WAYS TO KNOW:")
+    print("1. ✅ Check server logs for 'ollama_rerank' calls")
+    print("2. ✅ Monitor Ollama port 11434 for embedding requests")
+    print("3. ✅ Check GPU usage (nvidia-smi) during searches")
+    print("4. ✅ Compare query times with/without 'Enable rank'")
+    print("5. ✅ Look for 'score' field in API responses")
+    
+    print("\nIMMEDIATE VERIFICATION:")
+    print("Run the created script: python verify_rerank_quick.py")
+    
+    return 0 if all_passed else 1
+
+if __name__ == "__main__":
+    sys.exit(main())