87 lines
2.5 KiB
Python
87 lines
2.5 KiB
Python
|
|
"""
|
|
LightRAG DLL Issue Workaround
|
|
This module provides fallbacks when PyTorch DLL loading fails
|
|
"""
|
|
|
|
import logging
|
|
|
|
logger = logging.getLogger("lightrag")
|
|
|
|
class EntityExtractionFallback:
|
|
"""Fallback entity extraction when spaCy fails due to DLL issues"""
|
|
|
|
def __init__(self):
|
|
self.available = False
|
|
self._setup_fallback()
|
|
|
|
def _setup_fallback(self):
|
|
"""Setup basic entity extraction fallback"""
|
|
try:
|
|
# Try to use regex-based entity extraction as fallback
|
|
import re
|
|
self.available = True
|
|
logger.info("Using regex-based entity extraction fallback")
|
|
except Exception as e:
|
|
logger.warning(f"Entity extraction fallback not available: {e}")
|
|
|
|
def extract_entities_fallback(self, text):
|
|
"""Basic entity extraction using regex patterns"""
|
|
if not self.available:
|
|
return [], []
|
|
|
|
entities = []
|
|
relations = []
|
|
|
|
# Simple regex patterns for common entities
|
|
patterns = {
|
|
'PERSON': r'[A-Z][a-z]+ [A-Z][a-z]+',
|
|
'ORG': r'[A-Z][a-z]+ (?:Inc|Corp|Company|Ltd)',
|
|
'GPE': r'[A-Z][a-z]+, [A-Z]{2}',
|
|
'DATE': r'\d{1,2}/\d{1,2}/\d{4}',
|
|
'MONEY': r'\$\d+(?:\.\d{2})?',
|
|
}
|
|
|
|
for entity_type, pattern in patterns.items():
|
|
matches = re.finditer(pattern, text)
|
|
for match in matches:
|
|
entities.append({
|
|
'text': match.group(),
|
|
'label': entity_type,
|
|
'start': match.start(),
|
|
'end': match.end()
|
|
})
|
|
|
|
return entities, relations
|
|
|
|
# Global fallback instance
|
|
entity_fallback = EntityExtractionFallback()
|
|
|
|
def safe_extract_entities(text):
|
|
"""
|
|
Safely extract entities with fallback if spaCy fails
|
|
"""
|
|
try:
|
|
# Try to use spaCy first
|
|
import spacy
|
|
nlp = spacy.load("en_core_web_sm")
|
|
doc = nlp(text)
|
|
|
|
entities = []
|
|
for ent in doc.ents:
|
|
entities.append({
|
|
'text': ent.text,
|
|
'label': ent.label_,
|
|
'start': ent.start_char,
|
|
'end': ent.end_char
|
|
})
|
|
|
|
# Simple relation extraction (placeholder)
|
|
relations = []
|
|
|
|
return entities, relations
|
|
|
|
except Exception as e:
|
|
logger.warning(f"spaCy entity extraction failed, using fallback: {e}")
|
|
return entity_fallback.extract_entities_fallback(text)
|