88 lines
2.8 KiB
Python
88 lines
2.8 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Очистка дубликатов в documents_meta
|
||
|
|
"""
|
||
|
|
import asyncio
|
||
|
|
import asyncpg
|
||
|
|
import json
|
||
|
|
|
||
|
|
POSTGRES_HOST = "147.45.189.234"
|
||
|
|
POSTGRES_PORT = 5432
|
||
|
|
POSTGRES_DB = "default_db"
|
||
|
|
POSTGRES_USER = "gen_user"
|
||
|
|
POSTGRES_PASSWORD = "2~~9_^kVsU?2\\S"
|
||
|
|
|
||
|
|
CLAIM_ID = "bddb6815-8e17-4d54-a721-5e94382942c7"
|
||
|
|
|
||
|
|
async def fix_duplicates():
|
||
|
|
conn = await asyncpg.connect(
|
||
|
|
host=POSTGRES_HOST,
|
||
|
|
port=POSTGRES_PORT,
|
||
|
|
database=POSTGRES_DB,
|
||
|
|
user=POSTGRES_USER,
|
||
|
|
password=POSTGRES_PASSWORD
|
||
|
|
)
|
||
|
|
|
||
|
|
try:
|
||
|
|
row = await conn.fetchrow("""
|
||
|
|
SELECT id, payload
|
||
|
|
FROM clpr_claims
|
||
|
|
WHERE id::text = $1 OR payload->>'claim_id' = $1
|
||
|
|
ORDER BY updated_at DESC
|
||
|
|
LIMIT 1
|
||
|
|
""", CLAIM_ID)
|
||
|
|
|
||
|
|
if not row:
|
||
|
|
print(f"❌ Черновик {CLAIM_ID} не найден!")
|
||
|
|
return
|
||
|
|
|
||
|
|
payload = row['payload'] if isinstance(row['payload'], dict) else json.loads(row['payload'])
|
||
|
|
documents_meta = payload.get('documents_meta', [])
|
||
|
|
|
||
|
|
print(f"📋 Было документов в documents_meta: {len(documents_meta)}")
|
||
|
|
|
||
|
|
# Убираем дубликаты по file_id (оставляем первый)
|
||
|
|
seen_file_ids = set()
|
||
|
|
unique_documents_meta = []
|
||
|
|
|
||
|
|
for doc in documents_meta:
|
||
|
|
file_id = doc.get('file_id')
|
||
|
|
if file_id and file_id not in seen_file_ids:
|
||
|
|
seen_file_ids.add(file_id)
|
||
|
|
unique_documents_meta.append(doc)
|
||
|
|
elif file_id:
|
||
|
|
print(f" ⚠️ Пропущен дубликат: {file_id[:80]}...")
|
||
|
|
|
||
|
|
print(f"📋 Стало документов в documents_meta: {len(unique_documents_meta)}")
|
||
|
|
|
||
|
|
# Обновляем payload
|
||
|
|
payload['documents_meta'] = unique_documents_meta
|
||
|
|
|
||
|
|
await conn.execute("""
|
||
|
|
UPDATE clpr_claims
|
||
|
|
SET
|
||
|
|
payload = $1::jsonb,
|
||
|
|
updated_at = now()
|
||
|
|
WHERE id::text = $2 OR payload->>'claim_id' = $2
|
||
|
|
""", json.dumps(payload, ensure_ascii=False), CLAIM_ID)
|
||
|
|
|
||
|
|
print(f"\n✅ Дубликаты удалены!")
|
||
|
|
|
||
|
|
# Проверяем результат
|
||
|
|
row_after = await conn.fetchrow("""
|
||
|
|
SELECT jsonb_array_length(payload->'documents_meta') as docs_count
|
||
|
|
FROM clpr_claims
|
||
|
|
WHERE id::text = $1 OR payload->>'claim_id' = $1
|
||
|
|
ORDER BY updated_at DESC
|
||
|
|
LIMIT 1
|
||
|
|
""", CLAIM_ID)
|
||
|
|
|
||
|
|
print(f"📊 Результат: {row_after['docs_count']} документов в documents_meta")
|
||
|
|
|
||
|
|
finally:
|
||
|
|
await conn.close()
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
asyncio.run(fix_duplicates())
|
||
|
|
|