feat: adds Sync statistics.
This commit is contained in:
128
sync.py
128
sync.py
@@ -76,25 +76,62 @@ def convert_to_pacific_time(date_str):
|
||||
|
||||
def extract_domains_from_emails(emails: List[str]) -> List[str]:
|
||||
"""Extract unique domains from a list of email addresses.
|
||||
|
||||
|
||||
Args:
|
||||
emails (List[str]): List of email addresses
|
||||
|
||||
|
||||
Returns:
|
||||
List[str]: List of unique domains extracted from the emails
|
||||
"""
|
||||
if not emails:
|
||||
return []
|
||||
|
||||
|
||||
domains = set()
|
||||
for email in emails:
|
||||
if email and '@' in email:
|
||||
# Extract domain part after @
|
||||
domain = email.split('@')[1].lower()
|
||||
domains.add(domain)
|
||||
|
||||
|
||||
return sorted(list(domains))
|
||||
|
||||
|
||||
def record_sync_stats(db, recent_successes: int, oldest_successes: int, failures: int):
|
||||
"""Record sync statistics for today in Firestore.
|
||||
|
||||
Args:
|
||||
db: Firestore client
|
||||
recent_successes: Number of recently active projects updated
|
||||
oldest_successes: Number of oldest projects updated
|
||||
failures: Number of failed updates
|
||||
"""
|
||||
from datetime import datetime as dt
|
||||
pacific = pytz.timezone('America/Los_Angeles')
|
||||
today = dt.now(pacific).strftime('%Y-%m-%d')
|
||||
doc_id = f"sync_{today}"
|
||||
|
||||
try:
|
||||
doc_ref = db.collection("sync_stats").document(doc_id)
|
||||
doc = doc_ref.get()
|
||||
if doc.exists:
|
||||
current = doc.to_dict()
|
||||
doc_ref.update({
|
||||
"recent_successes": current.get("recent_successes", 0) + recent_successes,
|
||||
"oldest_successes": current.get("oldest_successes", 0) + oldest_successes,
|
||||
"failures": current.get("failures", 0) + failures,
|
||||
"updated_at": dt.now(pytz.UTC).isoformat()
|
||||
})
|
||||
else:
|
||||
doc_ref.set({
|
||||
"date": today,
|
||||
"recent_successes": recent_successes,
|
||||
"oldest_successes": oldest_successes,
|
||||
"failures": failures,
|
||||
"created_at": dt.now(pytz.UTC).isoformat()
|
||||
})
|
||||
print(f"[STATS] Recorded sync stats: recent={recent_successes}, oldest={oldest_successes}, failures={failures}")
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Failed to record sync stats: {e}")
|
||||
|
||||
from models.project_model import ProjectModel
|
||||
from filevine_client import FilevineClient
|
||||
|
||||
@@ -383,9 +420,9 @@ def get_oldest_unsynced_projects(db, fraction: float = 0.2) -> List[int]:
|
||||
def main():
|
||||
"""Main function to fetch and sync projects"""
|
||||
import argparse
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Sync Filevine projects to Firestore')
|
||||
parser.add_argument('--mode', choices=['full', 'last_n', 'oldest_percent', 'hybrid', 'single'],
|
||||
parser.add_argument('--mode', choices=['full', 'last_n', 'oldest_percent', 'hybrid', 'single'],
|
||||
default='hybrid', help='Sync mode: full=all projects, last_n=recently active, oldest_percent=oldest by last_synced_at, hybrid=last_n+oldest_percent, single=one project')
|
||||
parser.add_argument('--days', type=int, default=14, help='Number of days for last_n mode (default: 14)')
|
||||
parser.add_argument('--percent', type=float, default=20.0, help='Percentage for oldest_percent mode (default: 20)')
|
||||
@@ -401,28 +438,72 @@ def main():
|
||||
client.get_bearer_token()
|
||||
from app import db
|
||||
|
||||
recent_successes = 0
|
||||
oldest_successes = 0
|
||||
total_failures = 0
|
||||
|
||||
if args.mode == 'full':
|
||||
print("[MODE] Full sync - fetching all projects")
|
||||
projects = client.list_all_projects()
|
||||
|
||||
detailed_rows = process_projects_parallel(projects, client, max_workers=10)
|
||||
documents = []
|
||||
for row in detailed_rows:
|
||||
if row.get('ProjectId'):
|
||||
row['is_archived'] = (row.get('phase_name') == 'Archived')
|
||||
documents.append((row.get('ProjectId'), row))
|
||||
batch_write_to_firestore(db, "projects", documents)
|
||||
success_count = sum(1 for r in detailed_rows if r.get('ProjectId'))
|
||||
fail_count = len(detailed_rows) - success_count
|
||||
record_sync_stats(db, success_count, 0, fail_count)
|
||||
|
||||
elif args.mode == 'last_n':
|
||||
days_ago = (datetime.now() - timedelta(days=args.days)).strftime('%Y-%m-%d')
|
||||
print(f"[MODE] Last {args.days} days - fetching active since {days_ago}")
|
||||
projects = client.list_all_projects(latest_activity_since=days_ago)
|
||||
|
||||
detailed_rows = process_projects_parallel(projects, client, max_workers=10)
|
||||
documents = []
|
||||
for row in detailed_rows:
|
||||
if row.get('ProjectId'):
|
||||
row['is_archived'] = (row.get('phase_name') == 'Archived')
|
||||
documents.append((row.get('ProjectId'), row))
|
||||
batch_write_to_firestore(db, "projects", documents)
|
||||
success_count = sum(1 for r in detailed_rows if r.get('ProjectId'))
|
||||
fail_count = len(detailed_rows) - success_count
|
||||
record_sync_stats(db, success_count, 0, fail_count)
|
||||
|
||||
elif args.mode == 'oldest_percent':
|
||||
fraction = args.percent / 100.0
|
||||
oldest_ids = get_oldest_unsynced_projects(db, fraction=fraction)
|
||||
print(f"[MODE] Oldest {args.percent}% - fetching {len(oldest_ids)} projects")
|
||||
|
||||
|
||||
all_projects = client.list_all_projects()
|
||||
projects = [p for p in all_projects if p.get("projectId", {}).get("native") in set(oldest_ids)]
|
||||
|
||||
detailed_rows = process_projects_parallel(projects, client, max_workers=10)
|
||||
documents = []
|
||||
for row in detailed_rows:
|
||||
if row.get('ProjectId'):
|
||||
row['is_archived'] = (row.get('phase_name') == 'Archived')
|
||||
documents.append((row.get('ProjectId'), row))
|
||||
batch_write_to_firestore(db, "projects", documents)
|
||||
success_count = sum(1 for r in detailed_rows if r.get('ProjectId'))
|
||||
fail_count = len(detailed_rows) - success_count
|
||||
record_sync_stats(db, 0, success_count, fail_count)
|
||||
|
||||
elif args.mode == 'single':
|
||||
print(f"[MODE] Single project - fetching project {args.project_id}")
|
||||
project_detail = client.fetch_project_detail(args.project_id)
|
||||
projects = [project_detail] if project_detail else []
|
||||
|
||||
detailed_rows = process_projects_parallel(projects, client, max_workers=10)
|
||||
documents = []
|
||||
for row in detailed_rows:
|
||||
if row.get('ProjectId'):
|
||||
row['is_archived'] = (row.get('phase_name') == 'Archived')
|
||||
documents.append((row.get('ProjectId'), row))
|
||||
batch_write_to_firestore(db, "projects", documents)
|
||||
success_count = sum(1 for r in detailed_rows if r.get('ProjectId'))
|
||||
fail_count = len(detailed_rows) - success_count
|
||||
record_sync_stats(db, success_count, 0, fail_count)
|
||||
|
||||
elif args.mode == 'hybrid':
|
||||
print("[MODE] Hybrid - active + oldest")
|
||||
|
||||
@@ -439,17 +520,22 @@ def main():
|
||||
|
||||
all_projects = client.list_all_projects()
|
||||
projects = [p for p in all_projects if p.get("projectId", {}).get("native") in all_ids_to_sync]
|
||||
detailed_rows = process_projects_parallel(projects, client, max_workers=10)
|
||||
|
||||
# Process projects in parallel
|
||||
detailed_rows = process_projects_parallel(projects, client, max_workers=10)
|
||||
# Classify successes by source
|
||||
project_ids_synced = {r.get('ProjectId') for r in detailed_rows if r.get('ProjectId')}
|
||||
recent_successes = len([pid for pid in project_ids_synced if pid in active_ids])
|
||||
oldest_successes = len([pid for pid in project_ids_synced if pid in oldest_ids])
|
||||
|
||||
# Batch write all results to Firestore
|
||||
documents = []
|
||||
for row in detailed_rows:
|
||||
if row.get('ProjectId'):
|
||||
row['is_archived'] = (row.get('phase_name') == 'Archived')
|
||||
documents.append((row.get('ProjectId'), row))
|
||||
batch_write_to_firestore(db, "projects", documents)
|
||||
documents = []
|
||||
for row in detailed_rows:
|
||||
if row.get('ProjectId'):
|
||||
row['is_archived'] = (row.get('phase_name') == 'Archived')
|
||||
documents.append((row.get('ProjectId'), row))
|
||||
batch_write_to_firestore(db, "projects", documents)
|
||||
|
||||
total_failures = len(detailed_rows) - len(project_ids_synced)
|
||||
record_sync_stats(db, recent_successes, oldest_successes, total_failures)
|
||||
|
||||
print(f"[SYNC] Complete - {len(documents)} projects saved to Firestore")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user