Compare commits

..

7 Commits

Author SHA1 Message Date
8dd7ae8c95 feat(dashboard): add admin-only Last Synced column
Adds a "Last Synced" column visible only to admins, positioned as the
last column. Displays the sync timestamp formatted as YYYY-MM-DD.
Includes the column in the visibility toggle modal.
2026-05-12 23:57:26 -07:00
616ffde402 fix: exclude archived projects from oldest-percent sync selection
Archived projects should not count toward the N% fraction nor be selected for syncing.
2026-05-12 23:44:52 -07:00
eb78676cdb feat: add script to backfill is_archived field on existing projects
One-time migration to set is_archived = (phase_name == 'Archived') for all projects already in Firestore.
2026-05-12 23:40:46 -07:00
3633923fa7 refactor: overhaul sync script with CLI modes, batch writes, and archive tracking
Add argparse with full/last_n/oldest_percent/hybrid/single sync modes.

Implement batch Firestore writes to reduce API overhead.

Add is_archived flag based on phase_name during sync.

Track last_synced_at on each project for incremental sync.

Improve logging with structured prefixes and worker summaries.

Remove dead code (duplicate date function, sync_single helper).
2026-05-12 23:40:25 -07:00
c62de705de feat: exclude archived projects from dashboard and query results
Add is_archived == False filter to all project queries so archived cases are hidden from users.
2026-05-12 23:39:49 -07:00
3ed260ef23 feat: add last_synced_at field to ProjectModel
Track when each project was last synced from Filevine for incremental sync strategies.
2026-05-12 23:39:30 -07:00
9df9e003c1 refactor: use requests Session for connection pooling and clean up debug output
Replace per-request requests.get/post with a shared Session for connection reuse.

Remove verbose print statements and add structured [WARN] prefixes to error logs.
2026-05-12 23:38:46 -07:00
7 changed files with 263 additions and 96 deletions

13
app.py
View File

@@ -55,7 +55,7 @@ def projects_for(profile, case_email_match, per_page, offset):
# Check if case_email_match is a valid email address (contains @) # Check if case_email_match is a valid email address (contains @)
if '@' in case_email_match_lower and not case_email_match_lower.startswith('@'): if '@' in case_email_match_lower and not case_email_match_lower.startswith('@'):
# If it's a complete email address, filter by exact match in viewing_emails # If it's a complete email address, filter by exact match in viewing_emails
projects_ref = db.collection("projects").where("viewing_emails", "array_contains", case_email_match_lower) projects_ref = db.collection("projects").where("viewing_emails", "array_contains", case_email_match_lower).where("is_archived", "==", False)
cnt = int(projects_ref.count().get()[0][0].value) cnt = int(projects_ref.count().get()[0][0].value)
projects = [] projects = []
for doc in projects_ref.order_by("matter_description").limit(per_page).offset(offset).stream(): for doc in projects_ref.order_by("matter_description").limit(per_page).offset(offset).stream():
@@ -69,7 +69,7 @@ def projects_for(profile, case_email_match, per_page, offset):
domain_search = domain_search[1:] # Remove the @ sign domain_search = domain_search[1:] # Remove the @ sign
# Filter by domain match in viewing_emails # Filter by domain match in viewing_emails
projects_ref = db.collection("projects").where("viewing_domains", "array_contains", domain_search) projects_ref = db.collection("projects").where("viewing_domains", "array_contains", domain_search).where("is_archived", "==", False)
print("HERE domain", domain_search) print("HERE domain", domain_search)
cnt = int(projects_ref.count().get()[0][0].value) cnt = int(projects_ref.count().get()[0][0].value)
@@ -79,7 +79,7 @@ def projects_for(profile, case_email_match, per_page, offset):
return (projects, cnt) return (projects, cnt)
else: else:
projects_ref = db.collection("projects") projects_ref = db.collection("projects").where("is_archived", "==", False)
else: else:
# For non-admin users, check if they have domain email or specific case email # For non-admin users, check if they have domain email or specific case email
@@ -89,10 +89,10 @@ def projects_for(profile, case_email_match, per_page, offset):
if case_domain_email: if case_domain_email:
# Use exact match on viewing_domains field # Use exact match on viewing_domains field
domain_lower = case_domain_email.lower() domain_lower = case_domain_email.lower()
projects_ref = db.collection("projects").where("viewing_domains", "array_contains", domain_lower) projects_ref = db.collection("projects").where("viewing_domains", "array_contains", domain_lower).where("is_archived", "==", False)
elif case_email: elif case_email:
# Use the original logic for specific case email match # Use the original logic for specific case email match
projects_ref = db.collection("projects").where("viewing_emails", "array_contains", case_email.lower()) projects_ref = db.collection("projects").where("viewing_emails", "array_contains", case_email.lower()).where("is_archived", "==", False)
else: else:
return ([], 0) return ([], 0)
@@ -278,7 +278,8 @@ def dashboard(page=1):
current_page=page, current_page=page,
total_pages=total_pages, total_pages=total_pages,
total_projects=total_projects, total_projects=total_projects,
per_page=per_page) per_page=per_page,
is_admin=is_admin)

61
backfill_is_archived.py Normal file
View File

@@ -0,0 +1,61 @@
#!/usr/bin/env python3
"""
One-off script to backfill is_archived field on all projects in Firestore.
This sets is_archived = True for projects where phase_name == "Archived",
and is_archived = False for all other projects.
Usage:
python backfill_is_archived.py
"""
import os
from firebase_admin import credentials, initialize_app, firestore
# Path to your staging service account JSON
CREDENTIALS_PATH = "./rothbard-staging2-12345-firebase-adminsdk-fbsvc-7f95268383.json"
def main():
# Initialize Firebase Admin with staging credentials
cred = credentials.Certificate(CREDENTIALS_PATH)
app = initialize_app(cred, name='backfill-is-archived')
db = firestore.client(app=app)
projects_ref = db.collection("projects")
docs = list(projects_ref.stream())
total = len(docs)
archived_count = 0
updated_count = 0
batch_size = 500
print(f"Found {total} projects. Processing in batches of {batch_size}...")
for i in range(0, total, batch_size):
batch = db.batch()
batch_docs = docs[i:i + batch_size]
for doc in batch_docs:
data = doc.to_dict()
phase_name = data.get("phase_name", "")
is_archived = (phase_name == "Archived")
if is_archived:
archived_count += 1
# Only update if the field is missing or different
if data.get("is_archived") != is_archived:
ref = projects_ref.document(doc.id)
batch.update(ref, {"is_archived": is_archived})
updated_count += 1
batch.commit()
print(f" Committed batch {i//batch_size + 1}/{(total + batch_size - 1)//batch_size}")
print(f"\nDone!")
print(f" Total projects: {total}")
print(f" Projects with phase_name == 'Archived': {archived_count}")
print(f" Documents updated: {updated_count}")
if __name__ == "__main__":
main()

View File

@@ -19,6 +19,12 @@ class FilevineClient:
"x-fv-orgid": str(FV_ORG_ID), "x-fv-orgid": str(FV_ORG_ID),
"x-fv-userid": str(FV_USER_ID), "x-fv-userid": str(FV_USER_ID),
} }
self.session = requests.Session()
self.session.headers.update({
"Accept": "application/json",
"x-fv-orgid": str(FV_ORG_ID),
"x-fv-userid": str(FV_USER_ID),
})
self.get_bearer_token() self.get_bearer_token()
def get_bearer_token(self) -> str: def get_bearer_token(self) -> str:
@@ -33,14 +39,12 @@ class FilevineClient:
} }
headers = {"Accept": "application/json"} headers = {"Accept": "application/json"}
print("data is", data) resp = self.session.post(url, data=data, headers=headers, timeout=30)
print(data)
resp = requests.post(url, data=data, headers=headers, timeout=30)
resp.raise_for_status() resp.raise_for_status()
js = resp.json() js = resp.json()
token = js.get("access_token") token = js.get("access_token")
print(f"Got bearer js", js)
self.bearer_token = token self.bearer_token = token
self.session.headers["Authorization"] = f"Bearer {token}"
self.headers["Authorization"] = f"Bearer {token}" self.headers["Authorization"] = f"Bearer {token}"
return token return token
@@ -60,7 +64,6 @@ class FilevineClient:
while True: while True:
cnt = len(results) cnt = len(results)
print(f"list try {tries}, starting at {offset}, previous count {last_count}, currently at {cnt}")
tries += 1 tries += 1
url = base url = base
params = {} params = {}
@@ -72,7 +75,7 @@ class FilevineClient:
if latest_activity_since: if latest_activity_since:
params["latestActivitySince"] = latest_activity_since params["latestActivitySince"] = latest_activity_since
r = requests.get(url, headers=self.headers, params=params, timeout=30) r = self.session.get(url, headers=self.headers, params=params, timeout=30)
r.raise_for_status() r.raise_for_status()
page = r.json() page = r.json()
items = page.get("items", []) items = page.get("items", [])
@@ -89,35 +92,35 @@ class FilevineClient:
def fetch_project_detail(self, project_id_native: int) -> Dict[str, Any]: def fetch_project_detail(self, project_id_native: int) -> Dict[str, Any]:
"""Fetch detailed information for a specific project""" """Fetch detailed information for a specific project"""
url = f"{self.base_url}/Projects/{project_id_native}" url = f"{self.base_url}/Projects/{project_id_native}"
r = requests.get(url, headers=self.headers, timeout=30) r = self.session.get(url, headers=self.headers, timeout=30)
r.raise_for_status() r.raise_for_status()
return r.json() return r.json()
def fetch_project_team(self, project_id_native: int) -> List[Dict[str, Any]]: def fetch_project_team(self, project_id_native: int) -> List[Dict[str, Any]]:
"""Fetch team members for a specific project""" """Fetch team members for a specific project"""
url = f"{self.base_url}/Projects/{project_id_native}/team?limit=1000" url = f"{self.base_url}/Projects/{project_id_native}/team?limit=1000"
r = requests.get(url, headers=self.headers, timeout=30) r = self.session.get(url, headers=self.headers, timeout=30)
r.raise_for_status() r.raise_for_status()
return r.json().get('items') or [] return r.json().get('items') or []
def fetch_project_tasks(self, project_id_native: int) -> Dict[str, Any]: def fetch_project_tasks(self, project_id_native: int) -> Dict[str, Any]:
"""Fetch tasks for a specific project""" """Fetch tasks for a specific project"""
url = f"{self.base_url}/Projects/{project_id_native}/tasks" url = f"{self.base_url}/Projects/{project_id_native}/tasks"
r = requests.get(url, headers=self.headers, timeout=30) r = self.session.get(url, headers=self.headers, timeout=30)
r.raise_for_status() r.raise_for_status()
return r.json() return r.json()
def fetch_client(self, client_id_native: int) -> Dict[str, Any]: def fetch_client(self, client_id_native: int) -> Dict[str, Any]:
"""Fetch client information by client ID""" """Fetch client information by client ID"""
url = f"{self.base_url}/contacts/{client_id_native}" url = f"{self.base_url}/contacts/{client_id_native}"
r = requests.get(url, headers=self.headers, timeout=30) r = self.session.get(url, headers=self.headers, timeout=30)
r.raise_for_status() r.raise_for_status()
return r.json() return r.json()
def fetch_contacts(self, project_id_native: int) -> Optional[List[Dict[str, Any]]]: def fetch_contacts(self, project_id_native: int) -> Optional[List[Dict[str, Any]]]:
"""Fetch contacts for a specific project""" """Fetch contacts for a specific project"""
url = f"{self.base_url}/projects/{project_id_native}/contacts" url = f"{self.base_url}/projects/{project_id_native}/contacts"
r = requests.get(url, headers=self.headers, timeout=30) r = self.session.get(url, headers=self.headers, timeout=30)
r.raise_for_status() r.raise_for_status()
return r.json().get("items") return r.json().get("items")
@@ -125,20 +128,20 @@ class FilevineClient:
"""Fetch a specific form for a project""" """Fetch a specific form for a project"""
try: try:
url = f"{self.base_url}/Projects/{project_id_native}/Forms/{form}" url = f"{self.base_url}/Projects/{project_id_native}/Forms/{form}"
r = requests.get(url, headers=self.headers, timeout=30) r = self.session.get(url, headers=self.headers, timeout=30)
r.raise_for_status() r.raise_for_status()
return r.json() return r.json()
except Exception as e: except Exception as e:
print(e) print(f"[WARN] Failed to fetch form '{form}' for project {project_id_native}: {e}")
return {} return {}
def fetch_collection(self, project_id_native: int, collection: str) -> List[Dict[str, Any]]: def fetch_collection(self, project_id_native: int, collection: str) -> List[Dict[str, Any]]:
"""Fetch a collection for a project""" """Fetch a collection for a project"""
try: try:
url = f"{self.base_url}/Projects/{project_id_native}/Collections/{collection}" url = f"{self.base_url}/Projects/{project_id_native}/Collections/{collection}"
r = requests.get(url, headers=self.headers, timeout=30) r = self.session.get(url, headers=self.headers, timeout=30)
r.raise_for_status() r.raise_for_status()
return [x.get('dataObject') for x in r.json().get("items")] return [x.get('dataObject') for x in r.json().get("items")]
except Exception as e: except Exception as e:
print(e) print(f"[WARN] Failed to fetch collection '{collection}' for project {project_id_native}: {e}")
return {} return {}

View File

@@ -70,9 +70,10 @@ class ProjectModel:
project_name: str = "", project_name: str = "",
project_url: str = "", project_url: str = "",
property_contacts: Dict[str, Any] = None, property_contacts: Dict[str, Any] = None,
viewing_emails: List[str] = None, viewing_emails: List[str] = None,
viewing_domains: List[str] = None viewing_domains: List[str] = None,
): last_synced_at: str = ""
):
self.client = client self.client = client
self.matter_description = matter_description self.matter_description = matter_description
@@ -131,6 +132,7 @@ class ProjectModel:
self.property_contacts = property_contacts or {} self.property_contacts = property_contacts or {}
self.viewing_emails = viewing_emails or [] self.viewing_emails = viewing_emails or []
self.viewing_domains = viewing_domains or [] self.viewing_domains = viewing_domains or []
self.last_synced_at = last_synced_at
def to_dict(self) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]:
"""Convert the ProjectModel to a dictionary for Firestore storage.""" """Convert the ProjectModel to a dictionary for Firestore storage."""
@@ -191,7 +193,8 @@ class ProjectModel:
"ProjectUrl": self.project_url, "ProjectUrl": self.project_url,
"property_contacts": self.property_contacts, "property_contacts": self.property_contacts,
"viewing_emails": self.viewing_emails, "viewing_emails": self.viewing_emails,
"viewing_domains": self.viewing_domains "viewing_domains": self.viewing_domains,
"last_synced_at": self.last_synced_at
} }
@classmethod @classmethod
@@ -254,5 +257,6 @@ class ProjectModel:
project_url=data.get("ProjectUrl", ""), project_url=data.get("ProjectUrl", ""),
property_contacts=data.get("property_contacts", {}), property_contacts=data.get("property_contacts", {}),
viewing_emails=data.get("viewing_emails", []), viewing_emails=data.get("viewing_emails", []),
viewing_domains=data.get("viewing_domains", []) viewing_domains=data.get("viewing_domains", []),
last_synced_at=data.get("last_synced_at", "")
) )

View File

@@ -73,13 +73,13 @@ def query_projects_for_user(
domain_lower = filter_domain.lower() domain_lower = filter_domain.lower()
projects_ref = db.collection("projects").where( projects_ref = db.collection("projects").where(
"viewing_domains", "array_contains", domain_lower "viewing_domains", "array_contains", domain_lower
) ).where("is_archived", "==", False)
else: else:
# Email-based search # Email-based search
email_lower = filter_email.lower() email_lower = filter_email.lower()
projects_ref = db.collection("projects").where( projects_ref = db.collection("projects").where(
"viewing_emails", "array_contains", email_lower "viewing_emails", "array_contains", email_lower
) ).where("is_archived", "==", False)
# Get total count # Get total count
total_count = int(projects_ref.count().get()[0][0].value) total_count = int(projects_ref.count().get()[0][0].value)

221
sync.py
View File

@@ -9,7 +9,7 @@ import os
import concurrent.futures import concurrent.futures
import threading import threading
from typing import List, Dict, Any, Optional from typing import List, Dict, Any, Optional
from datetime import datetime from datetime import datetime, timedelta
import pytz import pytz
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
@@ -17,6 +17,34 @@ load_dotenv()
# Add the current directory to the Python path so we can import app and models # Add the current directory to the Python path so we can import app and models
sys.path.append(os.path.dirname(os.path.abspath(__file__))) sys.path.append(os.path.dirname(os.path.abspath(__file__)))
def batch_write_to_firestore(db, collection_name: str, documents: List[tuple], batch_size: int = 500):
"""Write documents to Firestore in batches from the main thread.
Args:
db: Firestore client
collection_name: Name of the collection
documents: List of (doc_id, data) tuples
batch_size: Number of documents per batch
"""
collection = db.collection(collection_name)
total = len(documents)
written = 0
for i in range(0, total, batch_size):
batch = documents[i:i + batch_size]
try:
write_batch = db.batch()
for doc_id, data in batch:
ref = collection.document(str(doc_id))
write_batch.set(ref, data)
write_batch.commit()
written += len(batch)
print(f"[BATCH] Wrote {written}/{total} documents")
except Exception as e:
print(f"[ERROR] Batch write failed: {e}")
print(f"[BATCH] Completed writing {written} documents to Firestore")
def convert_to_pacific_time(date_str): def convert_to_pacific_time(date_str):
"""Convert UTC date string to Pacific Time and format as YYYY-MM-DD. """Convert UTC date string to Pacific Time and format as YYYY-MM-DD.
@@ -67,22 +95,6 @@ def extract_domains_from_emails(emails: List[str]) -> List[str]:
return sorted(list(domains)) return sorted(list(domains))
try:
# Parse the UTC datetime
utc_time = datetime.fromisoformat(date_str.replace('Z', '+00:00'))
# Set timezone to UTC
utc_time = utc_time.replace(tzinfo=pytz.UTC)
# Convert to Pacific Time
pacific_time = utc_time.astimezone(pytz.timezone('America/Los_Angeles'))
# Format as YYYY-MM-DD
return pacific_time.strftime('%m/%d/%Y')
except (ValueError, AttributeError) as e:
print(f"[WARN] Date conversion failed for '{date_str}': {e}")
return ''
from models.project_model import ProjectModel from models.project_model import ProjectModel
from filevine_client import FilevineClient from filevine_client import FilevineClient
@@ -111,18 +123,19 @@ def process_project(index: int, total: int, project_data: dict, client: Filevine
p = project_data p = project_data
pid = (p.get("projectId") or {}).get("native") pid = (p.get("projectId") or {}).get("native")
print(f"Working on {pid} ({index}/{total})")
client = get_filevine_client() client = get_filevine_client()
if pid is None: if pid is None:
print(f"[SKIP] Missing projectId for item {index}")
return {} return {}
project_name = p.get("projectName", "")
try: try:
c = client.fetch_client((p.get("clientId") or {}).get("native")) c = client.fetch_client((p.get("clientId") or {}).get("native"))
cs = client.fetch_contacts(pid) cs = client.fetch_contacts(pid)
detail = client.fetch_project_detail(pid) detail = client.fetch_project_detail(pid)
except Exception as e: except Exception as e:
print(f"[WARN] Failed to fetch essential data for {pid}: {e}") print(f"[ERROR] Failed to fetch essential data for project {pid} '{project_name}': {e}")
return {} return {}
defendant_one = next((c.get('orgContact', {}) for c in cs if "Defendant" in c.get('orgContact', {}).get('personTypes', [])), {}) defendant_one = next((c.get('orgContact', {}) for c in cs if "Defendant" in c.get('orgContact', {}).get('personTypes', [])), {})
@@ -171,9 +184,6 @@ def process_project(index: int, total: int, project_data: dict, client: Filevine
# Extract default date # Extract default date
default_date = convert_to_pacific_time(dates_and_deadlines.get("defaultDate")) or '' default_date = convert_to_pacific_time(dates_and_deadlines.get("defaultDate")) or ''
case_filed_date = convert_to_pacific_time(dates_and_deadlines.get("dateCaseFiled")) or '' case_filed_date = convert_to_pacific_time(dates_and_deadlines.get("dateCaseFiled")) or ''
cf = dates_and_deadlines.get("dateCaseFiled")
from pprint import pprint
print(f"CASE FILED {case_filed_date} {cf}")
# Extract motion hearing dates # Extract motion hearing dates
demurrer_hearing_date = convert_to_pacific_time(dates_and_deadlines.get("demurrerHearingDate")) or '' demurrer_hearing_date = convert_to_pacific_time(dates_and_deadlines.get("demurrerHearingDate")) or ''
@@ -219,12 +229,8 @@ def process_project(index: int, total: int, project_data: dict, client: Filevine
# Extract attorney fees and costs # Extract attorney fees and costs
attorney_fees = fees_and_costs.get("totalAttorneysFees") or '' attorney_fees = fees_and_costs.get("totalAttorneysFees") or ''
costs = fees_and_costs.get("totalCosts") or '' costs = fees_and_costs.get("totalCosts") or ''
from pprint import pprint
property_managers = [property_contacts.get('propertyManager1'), property_contacts.get('propertyManager2'), property_contacts.get('propertyManager3'), property_contacts.get('propertyManager4')] property_managers = [property_contacts.get('propertyManager1'), property_contacts.get('propertyManager2'), property_contacts.get('propertyManager3'), property_contacts.get('propertyManager4')]
import itertools
# valid_property_managers = list(itertools.chain(*))
valid_property_managers = [e.get('address').lower() for pm in property_managers if pm and pm.get('emails') for e in pm.get('emails') if e and e.get('address')] valid_property_managers = [e.get('address').lower() for pm in property_managers if pm and pm.get('emails') for e in pm.get('emails') if e and e.get('address')]
print(valid_property_managers)
row = ProjectModel( row = ProjectModel(
@@ -284,79 +290,168 @@ def process_project(index: int, total: int, project_data: dict, client: Filevine
project_url=p.get("projectUrl") or detail.get("projectUrl"), project_url=p.get("projectUrl") or detail.get("projectUrl"),
#property_contacts=property_contacts #property_contacts=property_contacts
viewing_emails = valid_property_managers, viewing_emails = valid_property_managers,
viewing_domains = extract_domains_from_emails(valid_property_managers) viewing_domains = extract_domains_from_emails(valid_property_managers),
last_synced_at=datetime.now(pytz.UTC).isoformat()
) )
# Store the results in Firestore print(f"[{index}/{total}] Saved: {pid} | Matter {row.number} | {project_name}")
from app import db # Import db from app
projects_ref = db.collection("projects")
from pprint import pprint
# pprint([p.get("number"), property_info, new_file_review])
# Add new projects
project_id = row.project_id
if project_id:
projects_ref.document(str(project_id)).set(row.to_dict())
print(f"Finished on {pid} Matter {row.number} ({index}/{total})")
return row.to_dict() return row.to_dict()
except Exception as e: except Exception as e:
print(f"[ERROR] Processing failed for {pid}: {e}") print(f"[ERROR] Failed to process project {pid} '{project_name}': {e}")
import traceback
traceback.print_exc()
return {} return {}
def process_projects_parallel(projects: List[dict], client: FilevineClient, max_workers: int = 9) -> List[Dict[str, Any]]: def process_projects_parallel(projects: List[dict], client: FilevineClient, max_workers: int = 10) -> List[Dict[str, Any]]:
""" """
Process projects in parallel using a worker pool. Process projects in parallel using a worker pool.
Args: Args:
projects: List of project data dictionaries projects: List of project data dictionaries
client: FilevineClient instance client: FilevineClient instance
max_workers: Number of concurrent workers (default 9) max_workers: Number of concurrent workers (default 10)
Returns: Returns:
List of processed project dictionaries List of processed project dictionaries
""" """
# Create a thread pool with specified number of workers
total = len(projects) total = len(projects)
success_count = 0
fail_count = 0
print(f"[WORKERS] Starting parallel processing of {total} projects with {max_workers} workers...")
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers, initializer=worker_init, initargs=(client,)) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers, initializer=worker_init, initargs=(client,)) as executor:
# Submit all tasks to the executor
future_to_project = {executor.submit(process_project, indx, total, project, client): project for indx, project in enumerate(projects)} future_to_project = {executor.submit(process_project, indx, total, project, client): project for indx, project in enumerate(projects)}
# Collect results as they complete
results = [] results = []
for future in concurrent.futures.as_completed(future_to_project): for future in concurrent.futures.as_completed(future_to_project):
try: try:
result = future.result() result = future.result()
if result and result.get('ProjectId'):
success_count += 1
else:
fail_count += 1
results.append(result) results.append(result)
except Exception as e: except Exception as e:
print(f"[ERROR] Processing failed: {e}") fail_count += 1
# Add empty dict or handle error appropriately print(f"[ERROR] Worker thread failed: {e}")
results.append({}) results.append({})
print(f"[WORKERS] Completed: {success_count} succeeded, {fail_count} failed, {total} total")
return results return results
def get_oldest_unsynced_projects(db, fraction: float = 0.2) -> List[int]:
"""Get the oldest fraction of projects by last_synced_at from Firestore.
Args:
db: Firestore client
fraction: Fraction of projects to return (default 0.2 = 1/5th)
Returns:
List of project IDs (native) that need syncing
"""
try:
projects_ref = db.collection("projects")
all_docs = list(projects_ref.stream())
# Exclude archived projects from the sync pool
active_docs = [doc for doc in all_docs if not doc.to_dict().get("is_archived")]
total = len(active_docs)
count_to_sync = max(1, int(total * fraction))
# Sort by last_synced_at ascending (empty strings first, then oldest timestamps)
sorted_docs = sorted(active_docs, key=lambda doc: doc.to_dict().get("last_synced_at", ""))
selected_docs = sorted_docs[:count_to_sync]
result_ids = [int(doc.id) for doc in selected_docs if doc.id and doc.id != "None"]
print(f"[SYNC STRATEGY] {total} active projects in Firestore, will sync oldest {len(result_ids)} ({fraction*100:.0f}%)")
if selected_docs:
sample = selected_docs[0].to_dict()
print(f"[SYNC STRATEGY] Oldest: ID={result_ids[0]}, last_synced_at='{sample.get('last_synced_at', 'N/A')}'")
if len(selected_docs) > 1:
sample = selected_docs[-1].to_dict()
print(f"[SYNC STRATEGY] Cutoff: ID={result_ids[-1]}, last_synced_at='{sample.get('last_synced_at', 'N/A')}'")
return result_ids
except Exception as e:
print(f"[ERROR] Failed to get oldest unsynced projects: {e}")
import traceback
traceback.print_exc()
return []
def main(): def main():
"""Main function to fetch and sync projects""" """Main function to fetch and sync projects"""
print("Starting project sync...") import argparse
parser = argparse.ArgumentParser(description='Sync Filevine projects to Firestore')
parser.add_argument('--mode', choices=['full', 'last_n', 'oldest_percent', 'hybrid', 'single'],
default='hybrid', help='Sync mode: full=all projects, last_n=recently active, oldest_percent=oldest by last_synced_at, hybrid=last_n+oldest_percent, single=one project')
parser.add_argument('--days', type=int, default=14, help='Number of days for last_n mode (default: 14)')
parser.add_argument('--percent', type=float, default=20.0, help='Percentage for oldest_percent mode (default: 20)')
parser.add_argument('--project-id', type=int, help='Project ID for single mode (required when mode=single)')
args = parser.parse_args()
if args.mode == 'single' and not args.project_id:
parser.error("--project-id is required when mode is 'single'")
print(f"[SYNC] Starting sync - mode={args.mode}, workers=10")
try: try:
# Initialize Filevine client
client = FilevineClient() client = FilevineClient()
bearer = client.get_bearer_token() client.get_bearer_token()
from app import db
# List projects (all pages) with filter for projects updated in the last 7 days if args.mode == 'full':
from datetime import datetime, timedelta print("[MODE] Full sync - fetching all projects")
seven_days_ago = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d') projects = client.list_all_projects()
projects = client.list_all_projects(latest_activity_since=seven_days_ago)
#projects = [p for p in projects if (p.get("projectId") or {}).get("native") == 15914808] elif args.mode == 'last_n':
#projects = projects[:10] days_ago = (datetime.now() - timedelta(days=args.days)).strftime('%Y-%m-%d')
print(f"[MODE] Last {args.days} days - fetching active since {days_ago}")
projects = client.list_all_projects(latest_activity_since=days_ago)
elif args.mode == 'oldest_percent':
fraction = args.percent / 100.0
oldest_ids = get_oldest_unsynced_projects(db, fraction=fraction)
print(f"[MODE] Oldest {args.percent}% - fetching {len(oldest_ids)} projects")
all_projects = client.list_all_projects()
projects = [p for p in all_projects if p.get("projectId", {}).get("native") in set(oldest_ids)]
elif args.mode == 'single':
print(f"[MODE] Single project - fetching project {args.project_id}")
project_detail = client.fetch_project_detail(args.project_id)
projects = [project_detail] if project_detail else []
elif args.mode == 'hybrid':
print("[MODE] Hybrid - active + oldest")
days_ago = (datetime.now() - timedelta(days=args.days)).strftime('%Y-%m-%d')
active_projects = client.list_all_projects(latest_activity_since=days_ago)
active_ids = {p.get("projectId", {}).get("native") for p in active_projects}
print(f"[SYNC] {len(active_projects)} active since {days_ago}")
fraction = args.percent / 100.0
oldest_ids = get_oldest_unsynced_projects(db, fraction=fraction)
all_ids_to_sync = active_ids.union(set(oldest_ids))
print(f"[SYNC] {len(all_ids_to_sync)} total unique to sync")
all_projects = client.list_all_projects()
projects = [p for p in all_projects if p.get("projectId", {}).get("native") in all_ids_to_sync]
# Process projects in parallel # Process projects in parallel
detailed_rows = process_projects_parallel(projects, client, 9) detailed_rows = process_projects_parallel(projects, client, max_workers=10)
# Batch write all results to Firestore
documents = []
for row in detailed_rows:
if row.get('ProjectId'):
row['is_archived'] = (row.get('phase_name') == 'Archived')
documents.append((row.get('ProjectId'), row))
batch_write_to_firestore(db, "projects", documents)
print(f"Successfully synced {len(detailed_rows)} projects to Firestore") print(f"[SYNC] Complete - {len(documents)} projects saved to Firestore")
except Exception as e: except Exception as e:
print(f"Error during sync: {e}") print(f"Error during sync: {e}")
@@ -364,13 +459,5 @@ def main():
traceback.print_exc() traceback.print_exc()
sys.exit(1) sys.exit(1)
def sync_single(x):
client = FilevineClient()
z = process_project(0, 1, client.fetch_project_detail(x), client)
from pprint import pprint
#pprint(z)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -167,6 +167,9 @@
<th style="background-color: rgb(89, 121, 142);" class="px-4 py-3 w-32 sticky top-0 z-[40]" :class="{'hidden': !isColumnVisible('Date Possession Recovered')}">Date Possession Recovered</th> <th style="background-color: rgb(89, 121, 142);" class="px-4 py-3 w-32 sticky top-0 z-[40]" :class="{'hidden': !isColumnVisible('Date Possession Recovered')}">Date Possession Recovered</th>
<th style="background-color: rgb(89, 121, 142);" class="px-4 py-3 w-32 sticky top-0 z-[40]" :class="{'hidden': !isColumnVisible('Attorney\'s Fees')}">Attorney's Fees</th> <th style="background-color: rgb(89, 121, 142);" class="px-4 py-3 w-32 sticky top-0 z-[40]" :class="{'hidden': !isColumnVisible('Attorney\'s Fees')}">Attorney's Fees</th>
<th style="background-color: rgb(89, 121, 142);" class="px-4 py-3 w-32 sticky top-0 z-[40]" :class="{'hidden': !isColumnVisible('Costs')}">Costs</th> <th style="background-color: rgb(89, 121, 142);" class="px-4 py-3 w-32 sticky top-0 z-[40]" :class="{'hidden': !isColumnVisible('Costs')}">Costs</th>
{% if is_admin %}
<th style="background-color: rgb(89, 121, 142);" class="px-4 py-3 w-32 sticky top-0 z-[40]" :class="{'hidden': !isColumnVisible('Last Synced')}">Last Synced</th>
{% endif %}
</tr> </tr>
</thead> </thead>
<tbody class="bg-slate-100 divide-y divide-slate-300"> <tbody class="bg-slate-100 divide-y divide-slate-300">
@@ -542,6 +545,13 @@
{{ r.costs }} {{ r.costs }}
{% endcall %} {% endcall %}
</td> </td>
{% if is_admin %}
<td class="px-4 py-3 text-sm" :class="{'hidden': !isColumnVisible('Last Synced')}">
{% call expander() %}
{% if r.last_synced_at %}{{ r.last_synced_at.split('T')[0] }}{% endif %}
{% endcall %}
</td>
{% endif %}
</tr> </tr>
{% else %} {% else %}
<tr> <tr>
@@ -610,7 +620,8 @@
'Matter Gate or Entry Code', 'Matter Gate or Entry Code',
'Date Possession Recovered', 'Date Possession Recovered',
'Attorney\'s Fees', 'Attorney\'s Fees',
'Costs' 'Costs',
'Last Synced'
], ],
selectAll: true, selectAll: true,
visibleColumns: [], visibleColumns: [],