- Краулеры: smart_crawler.py, regional_crawler.py - Аудит: audit_orel_to_excel.py, audit_chukotka_to_excel.py - РКН проверка: check_rkn_registry.py, recheck_unclear_rkn.py - Отчёты: create_orel_horizontal_report.py - Обработка: process_all_hotels_embeddings.py - Документация: README.md, DB_SCHEMA_REFERENCE.md
78 lines
2.5 KiB
Python
78 lines
2.5 KiB
Python
#!/usr/bin/env python3
|
||
from neo4j import GraphDatabase
|
||
|
||
NEO4J_URI = "bolt://localhost:7687"
|
||
NEO4J_USER = "neo4j"
|
||
NEO4J_PASSWORD = "supersecret"
|
||
|
||
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
|
||
|
||
with driver.session() as session:
|
||
print("=" * 70)
|
||
print("🔍 ПРОВЕРКА ДАННЫХ В NEO4J (group_id='hotel_spb')")
|
||
print("=" * 70)
|
||
|
||
# Проверяем эпизоды
|
||
result = session.run("""
|
||
MATCH (e:Episode)
|
||
WHERE e.group_id = 'hotel_spb'
|
||
RETURN count(e) AS episode_count
|
||
""")
|
||
episode_count = result.single()["episode_count"]
|
||
print(f"\n📄 Эпизодов в hotel_spb: {episode_count}")
|
||
|
||
if episode_count > 0:
|
||
# Примеры эпизодов
|
||
result = session.run("""
|
||
MATCH (e:Episode)
|
||
WHERE e.group_id = 'hotel_spb'
|
||
RETURN e.name AS name, e.content AS content,
|
||
size(e.embedding) AS emb_size
|
||
LIMIT 3
|
||
""")
|
||
print(f"\n🔍 Примеры эпизодов:")
|
||
for r in result:
|
||
print(f" Name: {r['name']}")
|
||
print(f" Embedding: {r['emb_size']} размерность")
|
||
print(f" Content: {r['content'][:120]}...")
|
||
print()
|
||
|
||
# Сущности
|
||
result = session.run("""
|
||
MATCH (e:Entity)
|
||
WHERE e.group_id = 'hotel_spb'
|
||
RETURN count(e) AS count
|
||
""")
|
||
entities = result.single()["count"]
|
||
print(f"🏷️ Сущностей: {entities}")
|
||
|
||
# Рёбра
|
||
result = session.run("""
|
||
MATCH ()-[r]->()
|
||
WHERE r.group_id = 'hotel_spb'
|
||
RETURN count(r) AS count
|
||
""")
|
||
edges = result.single()["count"]
|
||
print(f"🔗 Рёбер: {edges}")
|
||
else:
|
||
print("\n❌ Данных НЕТ!")
|
||
print(" Возможно данные загружались с другим group_id")
|
||
|
||
# Поищем недавно созданные эпизоды
|
||
result = session.run("""
|
||
MATCH (e:Episode)
|
||
WHERE e.created_at > datetime() - duration('PT10M')
|
||
RETURN e.group_id AS group_id, count(e) AS count
|
||
""")
|
||
print("\n Эпизоды созданные за последние 10 минут:")
|
||
for r in result:
|
||
print(f" group_id='{r['group_id']}': {r['count']} эпизодов")
|
||
|
||
print("\n" + "=" * 70)
|
||
|
||
driver.close()
|
||
|
||
|
||
|
||
|