Проект аудита отелей: основные скрипты и документация
- Краулеры: smart_crawler.py, regional_crawler.py - Аудит: audit_orel_to_excel.py, audit_chukotka_to_excel.py - РКН проверка: check_rkn_registry.py, recheck_unclear_rkn.py - Отчёты: create_orel_horizontal_report.py - Обработка: process_all_hotels_embeddings.py - Документация: README.md, DB_SCHEMA_REFERENCE.md
This commit is contained in:
77
check_graphiti_data.py
Normal file
77
check_graphiti_data.py
Normal file
@@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python3
|
||||
from neo4j import GraphDatabase
|
||||
|
||||
NEO4J_URI = "bolt://localhost:7687"
|
||||
NEO4J_USER = "neo4j"
|
||||
NEO4J_PASSWORD = "supersecret"
|
||||
|
||||
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
|
||||
|
||||
with driver.session() as session:
|
||||
print("=" * 70)
|
||||
print("🔍 ПРОВЕРКА ДАННЫХ В NEO4J (group_id='hotel_spb')")
|
||||
print("=" * 70)
|
||||
|
||||
# Проверяем эпизоды
|
||||
result = session.run("""
|
||||
MATCH (e:Episode)
|
||||
WHERE e.group_id = 'hotel_spb'
|
||||
RETURN count(e) AS episode_count
|
||||
""")
|
||||
episode_count = result.single()["episode_count"]
|
||||
print(f"\n📄 Эпизодов в hotel_spb: {episode_count}")
|
||||
|
||||
if episode_count > 0:
|
||||
# Примеры эпизодов
|
||||
result = session.run("""
|
||||
MATCH (e:Episode)
|
||||
WHERE e.group_id = 'hotel_spb'
|
||||
RETURN e.name AS name, e.content AS content,
|
||||
size(e.embedding) AS emb_size
|
||||
LIMIT 3
|
||||
""")
|
||||
print(f"\n🔍 Примеры эпизодов:")
|
||||
for r in result:
|
||||
print(f" Name: {r['name']}")
|
||||
print(f" Embedding: {r['emb_size']} размерность")
|
||||
print(f" Content: {r['content'][:120]}...")
|
||||
print()
|
||||
|
||||
# Сущности
|
||||
result = session.run("""
|
||||
MATCH (e:Entity)
|
||||
WHERE e.group_id = 'hotel_spb'
|
||||
RETURN count(e) AS count
|
||||
""")
|
||||
entities = result.single()["count"]
|
||||
print(f"🏷️ Сущностей: {entities}")
|
||||
|
||||
# Рёбра
|
||||
result = session.run("""
|
||||
MATCH ()-[r]->()
|
||||
WHERE r.group_id = 'hotel_spb'
|
||||
RETURN count(r) AS count
|
||||
""")
|
||||
edges = result.single()["count"]
|
||||
print(f"🔗 Рёбер: {edges}")
|
||||
else:
|
||||
print("\n❌ Данных НЕТ!")
|
||||
print(" Возможно данные загружались с другим group_id")
|
||||
|
||||
# Поищем недавно созданные эпизоды
|
||||
result = session.run("""
|
||||
MATCH (e:Episode)
|
||||
WHERE e.created_at > datetime() - duration('PT10M')
|
||||
RETURN e.group_id AS group_id, count(e) AS count
|
||||
""")
|
||||
print("\n Эпизоды созданные за последние 10 минут:")
|
||||
for r in result:
|
||||
print(f" group_id='{r['group_id']}': {r['count']} эпизодов")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
|
||||
driver.close()
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user