Files
crm.clientright.ru/aiassist/ner_dp_extraction.py
Fedor ac7467f0b4 Major CRM updates: AI Assistant, Court Status API, S3 integration improvements, and extensive file storage system
- Added comprehensive AI Assistant system (aiassist/ directory):
  * Vector search and embedding capabilities
  * Typebot proxy integration
  * Elastic search functionality
  * Message classification and chat history
  * MCP proxy for external integrations

- Implemented Court Status API (GetCourtStatus.php):
  * Real-time court document status checking
  * Integration with external court systems
  * Comprehensive error handling and logging

- Enhanced S3 integration:
  * Improved file backup system with metadata
  * Batch processing capabilities
  * Enhanced error logging and recovery
  * Copy operations with URL fixing

- Added Telegram contact creation API
- Improved error logging across all modules
- Enhanced callback system for AI responses
- Extensive backup file storage with timestamps
- Updated documentation and README files

- File storage improvements:
  * Thousands of backup files with proper metadata
  * Fix operations for broken file references
  * Project-specific backup and recovery systems
  * Comprehensive file integrity checking

Total: 26,461+ files added/modified including AWS SDK, vendor dependencies, and extensive backup system.
2025-10-16 11:17:21 +03:00

63 lines
2.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
import sys
import json
from deeppavlov import build_model, configs
def group_entities(tokens, labels):
entities = []
current_entity = []
current_type = None
for token, label in zip(tokens, labels):
if label.startswith("B-"):
if current_entity:
entities.append((" ".join(current_entity), current_type))
current_entity = [token]
current_type = label.split("-")[1] # например, PER или ORG
elif label.startswith("I-") and current_entity and label.split("-")[1] == current_type:
current_entity.append(token)
else:
if current_entity:
entities.append((" ".join(current_entity), current_type))
current_entity = []
current_type = None
if current_entity:
entities.append((" ".join(current_entity), current_type))
return entities
def extract_entities_dp(text):
# Загружаем модель NER DeepPavlov
ner_model = build_model(configs.ner.ner_rus_bert, download=True)
result = ner_model([text])
# Ожидаемый формат результата:
# [
# [ [ "Иванов", "Иван", "Иванович", "заключил", "договор", "с", "ООО", "Рога", "и", "Копыта" ] ],
# [ [ "B-PER", "I-PER", "I-PER", "O", "O", "O", "B-ORG", "I-ORG", "I-ORG", "I-ORG" ] ]
# ]
tokens = result[0][0] if result and result[0] else []
labels = result[1][0] if result and result[1] else []
grouped = group_entities(tokens, labels)
# Извлекаем первую сущность типа PERSON как истца и первую сущность типа ORG как ответчика
истец = next((entity for entity, typ in grouped if typ == "PER"), "Не определено")
ответчик = next((entity for entity, typ in grouped if typ == "ORG"), "Не определено")
суть_спора = text[:100] if text else "Не определено"
return {
"истец": истец,
"ответчик": ответчик,
"суть_спора": суть_спора
}
if __name__ == '__main__':
if len(sys.argv) < 2:
print(json.dumps({
"истец": "Не определено",
"ответчик": "Не определено",
"суть_спора": "Не определено"
}, ensure_ascii=False))
sys.exit(0)
input_text = " ".join(sys.argv[1:])
entities = extract_entities_dp(input_text)
print(json.dumps(entities, ensure_ascii=False, indent=2))