railseek6/get_search_results.py

import requests
import json
import base64

def test_search_with_auth():
    """Test search functionality with authentication"""

    base_url = "http://localhost:3015"
    username = "jleu3482"
    password = "jleu1212"

    # Create basic auth header
    credentials = f"{username}:{password}"
    encoded_credentials = base64.b64encode(credentials.encode()).decode()
    headers = {
        "Authorization": f"Basic {encoded_credentials}",
        "Content-Type": "application/json"
    }

    # Test queries
    queries = [
        "OCR",
        "text extraction",
        "document processing",
        "optical character recognition",
        "PDF conversion"
    ]

    print("🔍 SEARCH RESULTS FOR OCR PDF CONTENT")
    print("=" * 70)

    for query in queries:
        print(f"\n📝 Query: '{query}'")
        print("-" * 40)

        try:
            payload = {
                "query": query,
                "top_k": 5
            }

            response = requests.post(
                f"{base_url}/search",
                json=payload,
                headers=headers
            )

            if response.status_code == 200:
                results = response.json()
                print(f"✅ Search successful - {len(results.get('results', []))} results found")

                # Display results
                for i, result in enumerate(results.get('results', [])):
                    print(f"\n  Result {i+1}:")
                    print(f"    Content: {result.get('content', '')[:200]}...")
                    print(f"    Score: {result.get('score', 0):.4f}")
                    print(f"    Source: {result.get('source', 'Unknown')}")

            else:
                print(f"❌ Search failed: {response.status_code} - {response.text}")

        except Exception as e:
            print(f"❌ Search error: {e}")

    print("\n" + "=" * 70)
    print("📊 SEARCH SUMMARY")
    print("=" * 70)
    print("If results are found, the OCR PDF has been successfully:")
    print("✅ Uploaded to the system")
    print("✅ Processed with OCR text extraction")
    print("✅ Indexed across all databases")
    print("✅ Made searchable via vector similarity")
    print("\nIf no results are found, the document may not be indexed yet.")
    print("Please upload the OCR PDF through the Web UI first.")

if __name__ == "__main__":
    test_search_with_auth()