feat: adds Sync statistics.

This commit is contained in:
2026-05-13 07:39:10 -07:00
parent 8dd7ae8c95
commit 275820b502
4 changed files with 248 additions and 26 deletions

128
sync.py
View File

@@ -76,25 +76,62 @@ def convert_to_pacific_time(date_str):
def extract_domains_from_emails(emails: List[str]) -> List[str]:
"""Extract unique domains from a list of email addresses.
Args:
emails (List[str]): List of email addresses
Returns:
List[str]: List of unique domains extracted from the emails
"""
if not emails:
return []
domains = set()
for email in emails:
if email and '@' in email:
# Extract domain part after @
domain = email.split('@')[1].lower()
domains.add(domain)
return sorted(list(domains))
def record_sync_stats(db, recent_successes: int, oldest_successes: int, failures: int):
"""Record sync statistics for today in Firestore.
Args:
db: Firestore client
recent_successes: Number of recently active projects updated
oldest_successes: Number of oldest projects updated
failures: Number of failed updates
"""
from datetime import datetime as dt
pacific = pytz.timezone('America/Los_Angeles')
today = dt.now(pacific).strftime('%Y-%m-%d')
doc_id = f"sync_{today}"
try:
doc_ref = db.collection("sync_stats").document(doc_id)
doc = doc_ref.get()
if doc.exists:
current = doc.to_dict()
doc_ref.update({
"recent_successes": current.get("recent_successes", 0) + recent_successes,
"oldest_successes": current.get("oldest_successes", 0) + oldest_successes,
"failures": current.get("failures", 0) + failures,
"updated_at": dt.now(pytz.UTC).isoformat()
})
else:
doc_ref.set({
"date": today,
"recent_successes": recent_successes,
"oldest_successes": oldest_successes,
"failures": failures,
"created_at": dt.now(pytz.UTC).isoformat()
})
print(f"[STATS] Recorded sync stats: recent={recent_successes}, oldest={oldest_successes}, failures={failures}")
except Exception as e:
print(f"[ERROR] Failed to record sync stats: {e}")
from models.project_model import ProjectModel
from filevine_client import FilevineClient
@@ -383,9 +420,9 @@ def get_oldest_unsynced_projects(db, fraction: float = 0.2) -> List[int]:
def main():
"""Main function to fetch and sync projects"""
import argparse
parser = argparse.ArgumentParser(description='Sync Filevine projects to Firestore')
parser.add_argument('--mode', choices=['full', 'last_n', 'oldest_percent', 'hybrid', 'single'],
parser.add_argument('--mode', choices=['full', 'last_n', 'oldest_percent', 'hybrid', 'single'],
default='hybrid', help='Sync mode: full=all projects, last_n=recently active, oldest_percent=oldest by last_synced_at, hybrid=last_n+oldest_percent, single=one project')
parser.add_argument('--days', type=int, default=14, help='Number of days for last_n mode (default: 14)')
parser.add_argument('--percent', type=float, default=20.0, help='Percentage for oldest_percent mode (default: 20)')
@@ -401,28 +438,72 @@ def main():
client.get_bearer_token()
from app import db
recent_successes = 0
oldest_successes = 0
total_failures = 0
if args.mode == 'full':
print("[MODE] Full sync - fetching all projects")
projects = client.list_all_projects()
detailed_rows = process_projects_parallel(projects, client, max_workers=10)
documents = []
for row in detailed_rows:
if row.get('ProjectId'):
row['is_archived'] = (row.get('phase_name') == 'Archived')
documents.append((row.get('ProjectId'), row))
batch_write_to_firestore(db, "projects", documents)
success_count = sum(1 for r in detailed_rows if r.get('ProjectId'))
fail_count = len(detailed_rows) - success_count
record_sync_stats(db, success_count, 0, fail_count)
elif args.mode == 'last_n':
days_ago = (datetime.now() - timedelta(days=args.days)).strftime('%Y-%m-%d')
print(f"[MODE] Last {args.days} days - fetching active since {days_ago}")
projects = client.list_all_projects(latest_activity_since=days_ago)
detailed_rows = process_projects_parallel(projects, client, max_workers=10)
documents = []
for row in detailed_rows:
if row.get('ProjectId'):
row['is_archived'] = (row.get('phase_name') == 'Archived')
documents.append((row.get('ProjectId'), row))
batch_write_to_firestore(db, "projects", documents)
success_count = sum(1 for r in detailed_rows if r.get('ProjectId'))
fail_count = len(detailed_rows) - success_count
record_sync_stats(db, success_count, 0, fail_count)
elif args.mode == 'oldest_percent':
fraction = args.percent / 100.0
oldest_ids = get_oldest_unsynced_projects(db, fraction=fraction)
print(f"[MODE] Oldest {args.percent}% - fetching {len(oldest_ids)} projects")
all_projects = client.list_all_projects()
projects = [p for p in all_projects if p.get("projectId", {}).get("native") in set(oldest_ids)]
detailed_rows = process_projects_parallel(projects, client, max_workers=10)
documents = []
for row in detailed_rows:
if row.get('ProjectId'):
row['is_archived'] = (row.get('phase_name') == 'Archived')
documents.append((row.get('ProjectId'), row))
batch_write_to_firestore(db, "projects", documents)
success_count = sum(1 for r in detailed_rows if r.get('ProjectId'))
fail_count = len(detailed_rows) - success_count
record_sync_stats(db, 0, success_count, fail_count)
elif args.mode == 'single':
print(f"[MODE] Single project - fetching project {args.project_id}")
project_detail = client.fetch_project_detail(args.project_id)
projects = [project_detail] if project_detail else []
detailed_rows = process_projects_parallel(projects, client, max_workers=10)
documents = []
for row in detailed_rows:
if row.get('ProjectId'):
row['is_archived'] = (row.get('phase_name') == 'Archived')
documents.append((row.get('ProjectId'), row))
batch_write_to_firestore(db, "projects", documents)
success_count = sum(1 for r in detailed_rows if r.get('ProjectId'))
fail_count = len(detailed_rows) - success_count
record_sync_stats(db, success_count, 0, fail_count)
elif args.mode == 'hybrid':
print("[MODE] Hybrid - active + oldest")
@@ -439,17 +520,22 @@ def main():
all_projects = client.list_all_projects()
projects = [p for p in all_projects if p.get("projectId", {}).get("native") in all_ids_to_sync]
detailed_rows = process_projects_parallel(projects, client, max_workers=10)
# Process projects in parallel
detailed_rows = process_projects_parallel(projects, client, max_workers=10)
# Classify successes by source
project_ids_synced = {r.get('ProjectId') for r in detailed_rows if r.get('ProjectId')}
recent_successes = len([pid for pid in project_ids_synced if pid in active_ids])
oldest_successes = len([pid for pid in project_ids_synced if pid in oldest_ids])
# Batch write all results to Firestore
documents = []
for row in detailed_rows:
if row.get('ProjectId'):
row['is_archived'] = (row.get('phase_name') == 'Archived')
documents.append((row.get('ProjectId'), row))
batch_write_to_firestore(db, "projects", documents)
documents = []
for row in detailed_rows:
if row.get('ProjectId'):
row['is_archived'] = (row.get('phase_name') == 'Archived')
documents.append((row.get('ProjectId'), row))
batch_write_to_firestore(db, "projects", documents)
total_failures = len(detailed_rows) - len(project_ids_synced)
record_sync_stats(db, recent_successes, oldest_successes, total_failures)
print(f"[SYNC] Complete - {len(documents)} projects saved to Firestore")