Compare commits
7 Commits
dc81c8e2a7
...
feat/admin
| Author | SHA1 | Date | |
|---|---|---|---|
| 8dd7ae8c95 | |||
| 616ffde402 | |||
| eb78676cdb | |||
| 3633923fa7 | |||
| c62de705de | |||
| 3ed260ef23 | |||
| 9df9e003c1 |
13
app.py
13
app.py
@@ -55,7 +55,7 @@ def projects_for(profile, case_email_match, per_page, offset):
|
|||||||
# Check if case_email_match is a valid email address (contains @)
|
# Check if case_email_match is a valid email address (contains @)
|
||||||
if '@' in case_email_match_lower and not case_email_match_lower.startswith('@'):
|
if '@' in case_email_match_lower and not case_email_match_lower.startswith('@'):
|
||||||
# If it's a complete email address, filter by exact match in viewing_emails
|
# If it's a complete email address, filter by exact match in viewing_emails
|
||||||
projects_ref = db.collection("projects").where("viewing_emails", "array_contains", case_email_match_lower)
|
projects_ref = db.collection("projects").where("viewing_emails", "array_contains", case_email_match_lower).where("is_archived", "==", False)
|
||||||
cnt = int(projects_ref.count().get()[0][0].value)
|
cnt = int(projects_ref.count().get()[0][0].value)
|
||||||
projects = []
|
projects = []
|
||||||
for doc in projects_ref.order_by("matter_description").limit(per_page).offset(offset).stream():
|
for doc in projects_ref.order_by("matter_description").limit(per_page).offset(offset).stream():
|
||||||
@@ -69,7 +69,7 @@ def projects_for(profile, case_email_match, per_page, offset):
|
|||||||
domain_search = domain_search[1:] # Remove the @ sign
|
domain_search = domain_search[1:] # Remove the @ sign
|
||||||
|
|
||||||
# Filter by domain match in viewing_emails
|
# Filter by domain match in viewing_emails
|
||||||
projects_ref = db.collection("projects").where("viewing_domains", "array_contains", domain_search)
|
projects_ref = db.collection("projects").where("viewing_domains", "array_contains", domain_search).where("is_archived", "==", False)
|
||||||
print("HERE domain", domain_search)
|
print("HERE domain", domain_search)
|
||||||
cnt = int(projects_ref.count().get()[0][0].value)
|
cnt = int(projects_ref.count().get()[0][0].value)
|
||||||
|
|
||||||
@@ -79,7 +79,7 @@ def projects_for(profile, case_email_match, per_page, offset):
|
|||||||
return (projects, cnt)
|
return (projects, cnt)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
projects_ref = db.collection("projects")
|
projects_ref = db.collection("projects").where("is_archived", "==", False)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# For non-admin users, check if they have domain email or specific case email
|
# For non-admin users, check if they have domain email or specific case email
|
||||||
@@ -89,10 +89,10 @@ def projects_for(profile, case_email_match, per_page, offset):
|
|||||||
if case_domain_email:
|
if case_domain_email:
|
||||||
# Use exact match on viewing_domains field
|
# Use exact match on viewing_domains field
|
||||||
domain_lower = case_domain_email.lower()
|
domain_lower = case_domain_email.lower()
|
||||||
projects_ref = db.collection("projects").where("viewing_domains", "array_contains", domain_lower)
|
projects_ref = db.collection("projects").where("viewing_domains", "array_contains", domain_lower).where("is_archived", "==", False)
|
||||||
elif case_email:
|
elif case_email:
|
||||||
# Use the original logic for specific case email match
|
# Use the original logic for specific case email match
|
||||||
projects_ref = db.collection("projects").where("viewing_emails", "array_contains", case_email.lower())
|
projects_ref = db.collection("projects").where("viewing_emails", "array_contains", case_email.lower()).where("is_archived", "==", False)
|
||||||
else:
|
else:
|
||||||
return ([], 0)
|
return ([], 0)
|
||||||
|
|
||||||
@@ -278,7 +278,8 @@ def dashboard(page=1):
|
|||||||
current_page=page,
|
current_page=page,
|
||||||
total_pages=total_pages,
|
total_pages=total_pages,
|
||||||
total_projects=total_projects,
|
total_projects=total_projects,
|
||||||
per_page=per_page)
|
per_page=per_page,
|
||||||
|
is_admin=is_admin)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
61
backfill_is_archived.py
Normal file
61
backfill_is_archived.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
One-off script to backfill is_archived field on all projects in Firestore.
|
||||||
|
|
||||||
|
This sets is_archived = True for projects where phase_name == "Archived",
|
||||||
|
and is_archived = False for all other projects.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python backfill_is_archived.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from firebase_admin import credentials, initialize_app, firestore
|
||||||
|
|
||||||
|
# Path to your staging service account JSON
|
||||||
|
CREDENTIALS_PATH = "./rothbard-staging2-12345-firebase-adminsdk-fbsvc-7f95268383.json"
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Initialize Firebase Admin with staging credentials
|
||||||
|
cred = credentials.Certificate(CREDENTIALS_PATH)
|
||||||
|
app = initialize_app(cred, name='backfill-is-archived')
|
||||||
|
db = firestore.client(app=app)
|
||||||
|
|
||||||
|
projects_ref = db.collection("projects")
|
||||||
|
docs = list(projects_ref.stream())
|
||||||
|
|
||||||
|
total = len(docs)
|
||||||
|
archived_count = 0
|
||||||
|
updated_count = 0
|
||||||
|
batch_size = 500
|
||||||
|
|
||||||
|
print(f"Found {total} projects. Processing in batches of {batch_size}...")
|
||||||
|
|
||||||
|
for i in range(0, total, batch_size):
|
||||||
|
batch = db.batch()
|
||||||
|
batch_docs = docs[i:i + batch_size]
|
||||||
|
|
||||||
|
for doc in batch_docs:
|
||||||
|
data = doc.to_dict()
|
||||||
|
phase_name = data.get("phase_name", "")
|
||||||
|
is_archived = (phase_name == "Archived")
|
||||||
|
|
||||||
|
if is_archived:
|
||||||
|
archived_count += 1
|
||||||
|
|
||||||
|
# Only update if the field is missing or different
|
||||||
|
if data.get("is_archived") != is_archived:
|
||||||
|
ref = projects_ref.document(doc.id)
|
||||||
|
batch.update(ref, {"is_archived": is_archived})
|
||||||
|
updated_count += 1
|
||||||
|
|
||||||
|
batch.commit()
|
||||||
|
print(f" Committed batch {i//batch_size + 1}/{(total + batch_size - 1)//batch_size}")
|
||||||
|
|
||||||
|
print(f"\nDone!")
|
||||||
|
print(f" Total projects: {total}")
|
||||||
|
print(f" Projects with phase_name == 'Archived': {archived_count}")
|
||||||
|
print(f" Documents updated: {updated_count}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -19,6 +19,12 @@ class FilevineClient:
|
|||||||
"x-fv-orgid": str(FV_ORG_ID),
|
"x-fv-orgid": str(FV_ORG_ID),
|
||||||
"x-fv-userid": str(FV_USER_ID),
|
"x-fv-userid": str(FV_USER_ID),
|
||||||
}
|
}
|
||||||
|
self.session = requests.Session()
|
||||||
|
self.session.headers.update({
|
||||||
|
"Accept": "application/json",
|
||||||
|
"x-fv-orgid": str(FV_ORG_ID),
|
||||||
|
"x-fv-userid": str(FV_USER_ID),
|
||||||
|
})
|
||||||
self.get_bearer_token()
|
self.get_bearer_token()
|
||||||
|
|
||||||
def get_bearer_token(self) -> str:
|
def get_bearer_token(self) -> str:
|
||||||
@@ -33,14 +39,12 @@ class FilevineClient:
|
|||||||
}
|
}
|
||||||
|
|
||||||
headers = {"Accept": "application/json"}
|
headers = {"Accept": "application/json"}
|
||||||
print("data is", data)
|
resp = self.session.post(url, data=data, headers=headers, timeout=30)
|
||||||
print(data)
|
|
||||||
resp = requests.post(url, data=data, headers=headers, timeout=30)
|
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
js = resp.json()
|
js = resp.json()
|
||||||
token = js.get("access_token")
|
token = js.get("access_token")
|
||||||
print(f"Got bearer js", js)
|
|
||||||
self.bearer_token = token
|
self.bearer_token = token
|
||||||
|
self.session.headers["Authorization"] = f"Bearer {token}"
|
||||||
self.headers["Authorization"] = f"Bearer {token}"
|
self.headers["Authorization"] = f"Bearer {token}"
|
||||||
return token
|
return token
|
||||||
|
|
||||||
@@ -60,7 +64,6 @@ class FilevineClient:
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
cnt = len(results)
|
cnt = len(results)
|
||||||
print(f"list try {tries}, starting at {offset}, previous count {last_count}, currently at {cnt}")
|
|
||||||
tries += 1
|
tries += 1
|
||||||
url = base
|
url = base
|
||||||
params = {}
|
params = {}
|
||||||
@@ -72,7 +75,7 @@ class FilevineClient:
|
|||||||
if latest_activity_since:
|
if latest_activity_since:
|
||||||
params["latestActivitySince"] = latest_activity_since
|
params["latestActivitySince"] = latest_activity_since
|
||||||
|
|
||||||
r = requests.get(url, headers=self.headers, params=params, timeout=30)
|
r = self.session.get(url, headers=self.headers, params=params, timeout=30)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
page = r.json()
|
page = r.json()
|
||||||
items = page.get("items", [])
|
items = page.get("items", [])
|
||||||
@@ -89,35 +92,35 @@ class FilevineClient:
|
|||||||
def fetch_project_detail(self, project_id_native: int) -> Dict[str, Any]:
|
def fetch_project_detail(self, project_id_native: int) -> Dict[str, Any]:
|
||||||
"""Fetch detailed information for a specific project"""
|
"""Fetch detailed information for a specific project"""
|
||||||
url = f"{self.base_url}/Projects/{project_id_native}"
|
url = f"{self.base_url}/Projects/{project_id_native}"
|
||||||
r = requests.get(url, headers=self.headers, timeout=30)
|
r = self.session.get(url, headers=self.headers, timeout=30)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
return r.json()
|
return r.json()
|
||||||
|
|
||||||
def fetch_project_team(self, project_id_native: int) -> List[Dict[str, Any]]:
|
def fetch_project_team(self, project_id_native: int) -> List[Dict[str, Any]]:
|
||||||
"""Fetch team members for a specific project"""
|
"""Fetch team members for a specific project"""
|
||||||
url = f"{self.base_url}/Projects/{project_id_native}/team?limit=1000"
|
url = f"{self.base_url}/Projects/{project_id_native}/team?limit=1000"
|
||||||
r = requests.get(url, headers=self.headers, timeout=30)
|
r = self.session.get(url, headers=self.headers, timeout=30)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
return r.json().get('items') or []
|
return r.json().get('items') or []
|
||||||
|
|
||||||
def fetch_project_tasks(self, project_id_native: int) -> Dict[str, Any]:
|
def fetch_project_tasks(self, project_id_native: int) -> Dict[str, Any]:
|
||||||
"""Fetch tasks for a specific project"""
|
"""Fetch tasks for a specific project"""
|
||||||
url = f"{self.base_url}/Projects/{project_id_native}/tasks"
|
url = f"{self.base_url}/Projects/{project_id_native}/tasks"
|
||||||
r = requests.get(url, headers=self.headers, timeout=30)
|
r = self.session.get(url, headers=self.headers, timeout=30)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
return r.json()
|
return r.json()
|
||||||
|
|
||||||
def fetch_client(self, client_id_native: int) -> Dict[str, Any]:
|
def fetch_client(self, client_id_native: int) -> Dict[str, Any]:
|
||||||
"""Fetch client information by client ID"""
|
"""Fetch client information by client ID"""
|
||||||
url = f"{self.base_url}/contacts/{client_id_native}"
|
url = f"{self.base_url}/contacts/{client_id_native}"
|
||||||
r = requests.get(url, headers=self.headers, timeout=30)
|
r = self.session.get(url, headers=self.headers, timeout=30)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
return r.json()
|
return r.json()
|
||||||
|
|
||||||
def fetch_contacts(self, project_id_native: int) -> Optional[List[Dict[str, Any]]]:
|
def fetch_contacts(self, project_id_native: int) -> Optional[List[Dict[str, Any]]]:
|
||||||
"""Fetch contacts for a specific project"""
|
"""Fetch contacts for a specific project"""
|
||||||
url = f"{self.base_url}/projects/{project_id_native}/contacts"
|
url = f"{self.base_url}/projects/{project_id_native}/contacts"
|
||||||
r = requests.get(url, headers=self.headers, timeout=30)
|
r = self.session.get(url, headers=self.headers, timeout=30)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
return r.json().get("items")
|
return r.json().get("items")
|
||||||
|
|
||||||
@@ -125,20 +128,20 @@ class FilevineClient:
|
|||||||
"""Fetch a specific form for a project"""
|
"""Fetch a specific form for a project"""
|
||||||
try:
|
try:
|
||||||
url = f"{self.base_url}/Projects/{project_id_native}/Forms/{form}"
|
url = f"{self.base_url}/Projects/{project_id_native}/Forms/{form}"
|
||||||
r = requests.get(url, headers=self.headers, timeout=30)
|
r = self.session.get(url, headers=self.headers, timeout=30)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
return r.json()
|
return r.json()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(f"[WARN] Failed to fetch form '{form}' for project {project_id_native}: {e}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def fetch_collection(self, project_id_native: int, collection: str) -> List[Dict[str, Any]]:
|
def fetch_collection(self, project_id_native: int, collection: str) -> List[Dict[str, Any]]:
|
||||||
"""Fetch a collection for a project"""
|
"""Fetch a collection for a project"""
|
||||||
try:
|
try:
|
||||||
url = f"{self.base_url}/Projects/{project_id_native}/Collections/{collection}"
|
url = f"{self.base_url}/Projects/{project_id_native}/Collections/{collection}"
|
||||||
r = requests.get(url, headers=self.headers, timeout=30)
|
r = self.session.get(url, headers=self.headers, timeout=30)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
return [x.get('dataObject') for x in r.json().get("items")]
|
return [x.get('dataObject') for x in r.json().get("items")]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(f"[WARN] Failed to fetch collection '{collection}' for project {project_id_native}: {e}")
|
||||||
return {}
|
return {}
|
||||||
@@ -70,9 +70,10 @@ class ProjectModel:
|
|||||||
project_name: str = "",
|
project_name: str = "",
|
||||||
project_url: str = "",
|
project_url: str = "",
|
||||||
property_contacts: Dict[str, Any] = None,
|
property_contacts: Dict[str, Any] = None,
|
||||||
viewing_emails: List[str] = None,
|
viewing_emails: List[str] = None,
|
||||||
viewing_domains: List[str] = None
|
viewing_domains: List[str] = None,
|
||||||
):
|
last_synced_at: str = ""
|
||||||
|
):
|
||||||
|
|
||||||
self.client = client
|
self.client = client
|
||||||
self.matter_description = matter_description
|
self.matter_description = matter_description
|
||||||
@@ -131,6 +132,7 @@ class ProjectModel:
|
|||||||
self.property_contacts = property_contacts or {}
|
self.property_contacts = property_contacts or {}
|
||||||
self.viewing_emails = viewing_emails or []
|
self.viewing_emails = viewing_emails or []
|
||||||
self.viewing_domains = viewing_domains or []
|
self.viewing_domains = viewing_domains or []
|
||||||
|
self.last_synced_at = last_synced_at
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
"""Convert the ProjectModel to a dictionary for Firestore storage."""
|
"""Convert the ProjectModel to a dictionary for Firestore storage."""
|
||||||
@@ -191,7 +193,8 @@ class ProjectModel:
|
|||||||
"ProjectUrl": self.project_url,
|
"ProjectUrl": self.project_url,
|
||||||
"property_contacts": self.property_contacts,
|
"property_contacts": self.property_contacts,
|
||||||
"viewing_emails": self.viewing_emails,
|
"viewing_emails": self.viewing_emails,
|
||||||
"viewing_domains": self.viewing_domains
|
"viewing_domains": self.viewing_domains,
|
||||||
|
"last_synced_at": self.last_synced_at
|
||||||
}
|
}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -254,5 +257,6 @@ class ProjectModel:
|
|||||||
project_url=data.get("ProjectUrl", ""),
|
project_url=data.get("ProjectUrl", ""),
|
||||||
property_contacts=data.get("property_contacts", {}),
|
property_contacts=data.get("property_contacts", {}),
|
||||||
viewing_emails=data.get("viewing_emails", []),
|
viewing_emails=data.get("viewing_emails", []),
|
||||||
viewing_domains=data.get("viewing_domains", [])
|
viewing_domains=data.get("viewing_domains", []),
|
||||||
|
last_synced_at=data.get("last_synced_at", "")
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -73,13 +73,13 @@ def query_projects_for_user(
|
|||||||
domain_lower = filter_domain.lower()
|
domain_lower = filter_domain.lower()
|
||||||
projects_ref = db.collection("projects").where(
|
projects_ref = db.collection("projects").where(
|
||||||
"viewing_domains", "array_contains", domain_lower
|
"viewing_domains", "array_contains", domain_lower
|
||||||
)
|
).where("is_archived", "==", False)
|
||||||
else:
|
else:
|
||||||
# Email-based search
|
# Email-based search
|
||||||
email_lower = filter_email.lower()
|
email_lower = filter_email.lower()
|
||||||
projects_ref = db.collection("projects").where(
|
projects_ref = db.collection("projects").where(
|
||||||
"viewing_emails", "array_contains", email_lower
|
"viewing_emails", "array_contains", email_lower
|
||||||
)
|
).where("is_archived", "==", False)
|
||||||
|
|
||||||
# Get total count
|
# Get total count
|
||||||
total_count = int(projects_ref.count().get()[0][0].value)
|
total_count = int(projects_ref.count().get()[0][0].value)
|
||||||
|
|||||||
221
sync.py
221
sync.py
@@ -9,7 +9,7 @@ import os
|
|||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import threading
|
import threading
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
import pytz
|
import pytz
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
@@ -17,6 +17,34 @@ load_dotenv()
|
|||||||
# Add the current directory to the Python path so we can import app and models
|
# Add the current directory to the Python path so we can import app and models
|
||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
def batch_write_to_firestore(db, collection_name: str, documents: List[tuple], batch_size: int = 500):
|
||||||
|
"""Write documents to Firestore in batches from the main thread.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db: Firestore client
|
||||||
|
collection_name: Name of the collection
|
||||||
|
documents: List of (doc_id, data) tuples
|
||||||
|
batch_size: Number of documents per batch
|
||||||
|
"""
|
||||||
|
collection = db.collection(collection_name)
|
||||||
|
total = len(documents)
|
||||||
|
written = 0
|
||||||
|
|
||||||
|
for i in range(0, total, batch_size):
|
||||||
|
batch = documents[i:i + batch_size]
|
||||||
|
try:
|
||||||
|
write_batch = db.batch()
|
||||||
|
for doc_id, data in batch:
|
||||||
|
ref = collection.document(str(doc_id))
|
||||||
|
write_batch.set(ref, data)
|
||||||
|
write_batch.commit()
|
||||||
|
written += len(batch)
|
||||||
|
print(f"[BATCH] Wrote {written}/{total} documents")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[ERROR] Batch write failed: {e}")
|
||||||
|
|
||||||
|
print(f"[BATCH] Completed writing {written} documents to Firestore")
|
||||||
|
|
||||||
def convert_to_pacific_time(date_str):
|
def convert_to_pacific_time(date_str):
|
||||||
"""Convert UTC date string to Pacific Time and format as YYYY-MM-DD.
|
"""Convert UTC date string to Pacific Time and format as YYYY-MM-DD.
|
||||||
|
|
||||||
@@ -67,22 +95,6 @@ def extract_domains_from_emails(emails: List[str]) -> List[str]:
|
|||||||
|
|
||||||
return sorted(list(domains))
|
return sorted(list(domains))
|
||||||
|
|
||||||
try:
|
|
||||||
# Parse the UTC datetime
|
|
||||||
utc_time = datetime.fromisoformat(date_str.replace('Z', '+00:00'))
|
|
||||||
|
|
||||||
# Set timezone to UTC
|
|
||||||
utc_time = utc_time.replace(tzinfo=pytz.UTC)
|
|
||||||
|
|
||||||
# Convert to Pacific Time
|
|
||||||
pacific_time = utc_time.astimezone(pytz.timezone('America/Los_Angeles'))
|
|
||||||
|
|
||||||
# Format as YYYY-MM-DD
|
|
||||||
return pacific_time.strftime('%m/%d/%Y')
|
|
||||||
except (ValueError, AttributeError) as e:
|
|
||||||
print(f"[WARN] Date conversion failed for '{date_str}': {e}")
|
|
||||||
return ''
|
|
||||||
|
|
||||||
from models.project_model import ProjectModel
|
from models.project_model import ProjectModel
|
||||||
from filevine_client import FilevineClient
|
from filevine_client import FilevineClient
|
||||||
|
|
||||||
@@ -111,18 +123,19 @@ def process_project(index: int, total: int, project_data: dict, client: Filevine
|
|||||||
|
|
||||||
p = project_data
|
p = project_data
|
||||||
pid = (p.get("projectId") or {}).get("native")
|
pid = (p.get("projectId") or {}).get("native")
|
||||||
print(f"Working on {pid} ({index}/{total})")
|
|
||||||
client = get_filevine_client()
|
client = get_filevine_client()
|
||||||
|
|
||||||
if pid is None:
|
if pid is None:
|
||||||
|
print(f"[SKIP] Missing projectId for item {index}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
project_name = p.get("projectName", "")
|
||||||
try:
|
try:
|
||||||
c = client.fetch_client((p.get("clientId") or {}).get("native"))
|
c = client.fetch_client((p.get("clientId") or {}).get("native"))
|
||||||
cs = client.fetch_contacts(pid)
|
cs = client.fetch_contacts(pid)
|
||||||
detail = client.fetch_project_detail(pid)
|
detail = client.fetch_project_detail(pid)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[WARN] Failed to fetch essential data for {pid}: {e}")
|
print(f"[ERROR] Failed to fetch essential data for project {pid} '{project_name}': {e}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
defendant_one = next((c.get('orgContact', {}) for c in cs if "Defendant" in c.get('orgContact', {}).get('personTypes', [])), {})
|
defendant_one = next((c.get('orgContact', {}) for c in cs if "Defendant" in c.get('orgContact', {}).get('personTypes', [])), {})
|
||||||
@@ -171,9 +184,6 @@ def process_project(index: int, total: int, project_data: dict, client: Filevine
|
|||||||
# Extract default date
|
# Extract default date
|
||||||
default_date = convert_to_pacific_time(dates_and_deadlines.get("defaultDate")) or ''
|
default_date = convert_to_pacific_time(dates_and_deadlines.get("defaultDate")) or ''
|
||||||
case_filed_date = convert_to_pacific_time(dates_and_deadlines.get("dateCaseFiled")) or ''
|
case_filed_date = convert_to_pacific_time(dates_and_deadlines.get("dateCaseFiled")) or ''
|
||||||
cf = dates_and_deadlines.get("dateCaseFiled")
|
|
||||||
from pprint import pprint
|
|
||||||
print(f"CASE FILED {case_filed_date} {cf}")
|
|
||||||
|
|
||||||
# Extract motion hearing dates
|
# Extract motion hearing dates
|
||||||
demurrer_hearing_date = convert_to_pacific_time(dates_and_deadlines.get("demurrerHearingDate")) or ''
|
demurrer_hearing_date = convert_to_pacific_time(dates_and_deadlines.get("demurrerHearingDate")) or ''
|
||||||
@@ -219,12 +229,8 @@ def process_project(index: int, total: int, project_data: dict, client: Filevine
|
|||||||
# Extract attorney fees and costs
|
# Extract attorney fees and costs
|
||||||
attorney_fees = fees_and_costs.get("totalAttorneysFees") or ''
|
attorney_fees = fees_and_costs.get("totalAttorneysFees") or ''
|
||||||
costs = fees_and_costs.get("totalCosts") or ''
|
costs = fees_and_costs.get("totalCosts") or ''
|
||||||
from pprint import pprint
|
|
||||||
property_managers = [property_contacts.get('propertyManager1'), property_contacts.get('propertyManager2'), property_contacts.get('propertyManager3'), property_contacts.get('propertyManager4')]
|
property_managers = [property_contacts.get('propertyManager1'), property_contacts.get('propertyManager2'), property_contacts.get('propertyManager3'), property_contacts.get('propertyManager4')]
|
||||||
import itertools
|
|
||||||
# valid_property_managers = list(itertools.chain(*))
|
|
||||||
valid_property_managers = [e.get('address').lower() for pm in property_managers if pm and pm.get('emails') for e in pm.get('emails') if e and e.get('address')]
|
valid_property_managers = [e.get('address').lower() for pm in property_managers if pm and pm.get('emails') for e in pm.get('emails') if e and e.get('address')]
|
||||||
print(valid_property_managers)
|
|
||||||
|
|
||||||
|
|
||||||
row = ProjectModel(
|
row = ProjectModel(
|
||||||
@@ -284,79 +290,168 @@ def process_project(index: int, total: int, project_data: dict, client: Filevine
|
|||||||
project_url=p.get("projectUrl") or detail.get("projectUrl"),
|
project_url=p.get("projectUrl") or detail.get("projectUrl"),
|
||||||
#property_contacts=property_contacts
|
#property_contacts=property_contacts
|
||||||
viewing_emails = valid_property_managers,
|
viewing_emails = valid_property_managers,
|
||||||
viewing_domains = extract_domains_from_emails(valid_property_managers)
|
viewing_domains = extract_domains_from_emails(valid_property_managers),
|
||||||
|
last_synced_at=datetime.now(pytz.UTC).isoformat()
|
||||||
)
|
)
|
||||||
# Store the results in Firestore
|
print(f"[{index}/{total}] Saved: {pid} | Matter {row.number} | {project_name}")
|
||||||
from app import db # Import db from app
|
|
||||||
|
|
||||||
projects_ref = db.collection("projects")
|
|
||||||
from pprint import pprint
|
|
||||||
# pprint([p.get("number"), property_info, new_file_review])
|
|
||||||
|
|
||||||
# Add new projects
|
|
||||||
project_id = row.project_id
|
|
||||||
if project_id:
|
|
||||||
projects_ref.document(str(project_id)).set(row.to_dict())
|
|
||||||
|
|
||||||
print(f"Finished on {pid} Matter {row.number} ({index}/{total})")
|
|
||||||
return row.to_dict()
|
return row.to_dict()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[ERROR] Processing failed for {pid}: {e}")
|
print(f"[ERROR] Failed to process project {pid} '{project_name}': {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def process_projects_parallel(projects: List[dict], client: FilevineClient, max_workers: int = 9) -> List[Dict[str, Any]]:
|
def process_projects_parallel(projects: List[dict], client: FilevineClient, max_workers: int = 10) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Process projects in parallel using a worker pool.
|
Process projects in parallel using a worker pool.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
projects: List of project data dictionaries
|
projects: List of project data dictionaries
|
||||||
client: FilevineClient instance
|
client: FilevineClient instance
|
||||||
max_workers: Number of concurrent workers (default 9)
|
max_workers: Number of concurrent workers (default 10)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of processed project dictionaries
|
List of processed project dictionaries
|
||||||
"""
|
"""
|
||||||
# Create a thread pool with specified number of workers
|
|
||||||
total = len(projects)
|
total = len(projects)
|
||||||
|
success_count = 0
|
||||||
|
fail_count = 0
|
||||||
|
|
||||||
|
print(f"[WORKERS] Starting parallel processing of {total} projects with {max_workers} workers...")
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers, initializer=worker_init, initargs=(client,)) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers, initializer=worker_init, initargs=(client,)) as executor:
|
||||||
# Submit all tasks to the executor
|
|
||||||
future_to_project = {executor.submit(process_project, indx, total, project, client): project for indx, project in enumerate(projects)}
|
future_to_project = {executor.submit(process_project, indx, total, project, client): project for indx, project in enumerate(projects)}
|
||||||
|
|
||||||
# Collect results as they complete
|
|
||||||
results = []
|
results = []
|
||||||
for future in concurrent.futures.as_completed(future_to_project):
|
for future in concurrent.futures.as_completed(future_to_project):
|
||||||
try:
|
try:
|
||||||
result = future.result()
|
result = future.result()
|
||||||
|
if result and result.get('ProjectId'):
|
||||||
|
success_count += 1
|
||||||
|
else:
|
||||||
|
fail_count += 1
|
||||||
results.append(result)
|
results.append(result)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[ERROR] Processing failed: {e}")
|
fail_count += 1
|
||||||
# Add empty dict or handle error appropriately
|
print(f"[ERROR] Worker thread failed: {e}")
|
||||||
results.append({})
|
results.append({})
|
||||||
|
|
||||||
|
print(f"[WORKERS] Completed: {success_count} succeeded, {fail_count} failed, {total} total")
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
def get_oldest_unsynced_projects(db, fraction: float = 0.2) -> List[int]:
|
||||||
|
"""Get the oldest fraction of projects by last_synced_at from Firestore.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db: Firestore client
|
||||||
|
fraction: Fraction of projects to return (default 0.2 = 1/5th)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of project IDs (native) that need syncing
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
projects_ref = db.collection("projects")
|
||||||
|
all_docs = list(projects_ref.stream())
|
||||||
|
|
||||||
|
# Exclude archived projects from the sync pool
|
||||||
|
active_docs = [doc for doc in all_docs if not doc.to_dict().get("is_archived")]
|
||||||
|
total = len(active_docs)
|
||||||
|
count_to_sync = max(1, int(total * fraction))
|
||||||
|
|
||||||
|
# Sort by last_synced_at ascending (empty strings first, then oldest timestamps)
|
||||||
|
sorted_docs = sorted(active_docs, key=lambda doc: doc.to_dict().get("last_synced_at", ""))
|
||||||
|
selected_docs = sorted_docs[:count_to_sync]
|
||||||
|
result_ids = [int(doc.id) for doc in selected_docs if doc.id and doc.id != "None"]
|
||||||
|
|
||||||
|
print(f"[SYNC STRATEGY] {total} active projects in Firestore, will sync oldest {len(result_ids)} ({fraction*100:.0f}%)")
|
||||||
|
if selected_docs:
|
||||||
|
sample = selected_docs[0].to_dict()
|
||||||
|
print(f"[SYNC STRATEGY] Oldest: ID={result_ids[0]}, last_synced_at='{sample.get('last_synced_at', 'N/A')}'")
|
||||||
|
if len(selected_docs) > 1:
|
||||||
|
sample = selected_docs[-1].to_dict()
|
||||||
|
print(f"[SYNC STRATEGY] Cutoff: ID={result_ids[-1]}, last_synced_at='{sample.get('last_synced_at', 'N/A')}'")
|
||||||
|
return result_ids
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[ERROR] Failed to get oldest unsynced projects: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main function to fetch and sync projects"""
|
"""Main function to fetch and sync projects"""
|
||||||
print("Starting project sync...")
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Sync Filevine projects to Firestore')
|
||||||
|
parser.add_argument('--mode', choices=['full', 'last_n', 'oldest_percent', 'hybrid', 'single'],
|
||||||
|
default='hybrid', help='Sync mode: full=all projects, last_n=recently active, oldest_percent=oldest by last_synced_at, hybrid=last_n+oldest_percent, single=one project')
|
||||||
|
parser.add_argument('--days', type=int, default=14, help='Number of days for last_n mode (default: 14)')
|
||||||
|
parser.add_argument('--percent', type=float, default=20.0, help='Percentage for oldest_percent mode (default: 20)')
|
||||||
|
parser.add_argument('--project-id', type=int, help='Project ID for single mode (required when mode=single)')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.mode == 'single' and not args.project_id:
|
||||||
|
parser.error("--project-id is required when mode is 'single'")
|
||||||
|
|
||||||
|
print(f"[SYNC] Starting sync - mode={args.mode}, workers=10")
|
||||||
try:
|
try:
|
||||||
# Initialize Filevine client
|
|
||||||
client = FilevineClient()
|
client = FilevineClient()
|
||||||
bearer = client.get_bearer_token()
|
client.get_bearer_token()
|
||||||
|
from app import db
|
||||||
|
|
||||||
# List projects (all pages) with filter for projects updated in the last 7 days
|
if args.mode == 'full':
|
||||||
from datetime import datetime, timedelta
|
print("[MODE] Full sync - fetching all projects")
|
||||||
seven_days_ago = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')
|
projects = client.list_all_projects()
|
||||||
projects = client.list_all_projects(latest_activity_since=seven_days_ago)
|
|
||||||
|
|
||||||
#projects = [p for p in projects if (p.get("projectId") or {}).get("native") == 15914808]
|
elif args.mode == 'last_n':
|
||||||
#projects = projects[:10]
|
days_ago = (datetime.now() - timedelta(days=args.days)).strftime('%Y-%m-%d')
|
||||||
|
print(f"[MODE] Last {args.days} days - fetching active since {days_ago}")
|
||||||
|
projects = client.list_all_projects(latest_activity_since=days_ago)
|
||||||
|
|
||||||
|
elif args.mode == 'oldest_percent':
|
||||||
|
fraction = args.percent / 100.0
|
||||||
|
oldest_ids = get_oldest_unsynced_projects(db, fraction=fraction)
|
||||||
|
print(f"[MODE] Oldest {args.percent}% - fetching {len(oldest_ids)} projects")
|
||||||
|
|
||||||
|
all_projects = client.list_all_projects()
|
||||||
|
projects = [p for p in all_projects if p.get("projectId", {}).get("native") in set(oldest_ids)]
|
||||||
|
|
||||||
|
elif args.mode == 'single':
|
||||||
|
print(f"[MODE] Single project - fetching project {args.project_id}")
|
||||||
|
project_detail = client.fetch_project_detail(args.project_id)
|
||||||
|
projects = [project_detail] if project_detail else []
|
||||||
|
|
||||||
|
elif args.mode == 'hybrid':
|
||||||
|
print("[MODE] Hybrid - active + oldest")
|
||||||
|
|
||||||
|
days_ago = (datetime.now() - timedelta(days=args.days)).strftime('%Y-%m-%d')
|
||||||
|
active_projects = client.list_all_projects(latest_activity_since=days_ago)
|
||||||
|
active_ids = {p.get("projectId", {}).get("native") for p in active_projects}
|
||||||
|
print(f"[SYNC] {len(active_projects)} active since {days_ago}")
|
||||||
|
|
||||||
|
fraction = args.percent / 100.0
|
||||||
|
oldest_ids = get_oldest_unsynced_projects(db, fraction=fraction)
|
||||||
|
|
||||||
|
all_ids_to_sync = active_ids.union(set(oldest_ids))
|
||||||
|
print(f"[SYNC] {len(all_ids_to_sync)} total unique to sync")
|
||||||
|
|
||||||
|
all_projects = client.list_all_projects()
|
||||||
|
projects = [p for p in all_projects if p.get("projectId", {}).get("native") in all_ids_to_sync]
|
||||||
|
|
||||||
# Process projects in parallel
|
# Process projects in parallel
|
||||||
detailed_rows = process_projects_parallel(projects, client, 9)
|
detailed_rows = process_projects_parallel(projects, client, max_workers=10)
|
||||||
|
|
||||||
|
# Batch write all results to Firestore
|
||||||
|
documents = []
|
||||||
|
for row in detailed_rows:
|
||||||
|
if row.get('ProjectId'):
|
||||||
|
row['is_archived'] = (row.get('phase_name') == 'Archived')
|
||||||
|
documents.append((row.get('ProjectId'), row))
|
||||||
|
batch_write_to_firestore(db, "projects", documents)
|
||||||
|
|
||||||
print(f"Successfully synced {len(detailed_rows)} projects to Firestore")
|
print(f"[SYNC] Complete - {len(documents)} projects saved to Firestore")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error during sync: {e}")
|
print(f"Error during sync: {e}")
|
||||||
@@ -364,13 +459,5 @@ def main():
|
|||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
def sync_single(x):
|
|
||||||
client = FilevineClient()
|
|
||||||
z = process_project(0, 1, client.fetch_project_detail(x), client)
|
|
||||||
from pprint import pprint
|
|
||||||
|
|
||||||
#pprint(z)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -167,6 +167,9 @@
|
|||||||
<th style="background-color: rgb(89, 121, 142);" class="px-4 py-3 w-32 sticky top-0 z-[40]" :class="{'hidden': !isColumnVisible('Date Possession Recovered')}">Date Possession Recovered</th>
|
<th style="background-color: rgb(89, 121, 142);" class="px-4 py-3 w-32 sticky top-0 z-[40]" :class="{'hidden': !isColumnVisible('Date Possession Recovered')}">Date Possession Recovered</th>
|
||||||
<th style="background-color: rgb(89, 121, 142);" class="px-4 py-3 w-32 sticky top-0 z-[40]" :class="{'hidden': !isColumnVisible('Attorney\'s Fees')}">Attorney's Fees</th>
|
<th style="background-color: rgb(89, 121, 142);" class="px-4 py-3 w-32 sticky top-0 z-[40]" :class="{'hidden': !isColumnVisible('Attorney\'s Fees')}">Attorney's Fees</th>
|
||||||
<th style="background-color: rgb(89, 121, 142);" class="px-4 py-3 w-32 sticky top-0 z-[40]" :class="{'hidden': !isColumnVisible('Costs')}">Costs</th>
|
<th style="background-color: rgb(89, 121, 142);" class="px-4 py-3 w-32 sticky top-0 z-[40]" :class="{'hidden': !isColumnVisible('Costs')}">Costs</th>
|
||||||
|
{% if is_admin %}
|
||||||
|
<th style="background-color: rgb(89, 121, 142);" class="px-4 py-3 w-32 sticky top-0 z-[40]" :class="{'hidden': !isColumnVisible('Last Synced')}">Last Synced</th>
|
||||||
|
{% endif %}
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody class="bg-slate-100 divide-y divide-slate-300">
|
<tbody class="bg-slate-100 divide-y divide-slate-300">
|
||||||
@@ -542,6 +545,13 @@
|
|||||||
{{ r.costs }}
|
{{ r.costs }}
|
||||||
{% endcall %}
|
{% endcall %}
|
||||||
</td>
|
</td>
|
||||||
|
{% if is_admin %}
|
||||||
|
<td class="px-4 py-3 text-sm" :class="{'hidden': !isColumnVisible('Last Synced')}">
|
||||||
|
{% call expander() %}
|
||||||
|
{% if r.last_synced_at %}{{ r.last_synced_at.split('T')[0] }}{% endif %}
|
||||||
|
{% endcall %}
|
||||||
|
</td>
|
||||||
|
{% endif %}
|
||||||
</tr>
|
</tr>
|
||||||
{% else %}
|
{% else %}
|
||||||
<tr>
|
<tr>
|
||||||
@@ -610,7 +620,8 @@
|
|||||||
'Matter Gate or Entry Code',
|
'Matter Gate or Entry Code',
|
||||||
'Date Possession Recovered',
|
'Date Possession Recovered',
|
||||||
'Attorney\'s Fees',
|
'Attorney\'s Fees',
|
||||||
'Costs'
|
'Costs',
|
||||||
|
'Last Synced'
|
||||||
],
|
],
|
||||||
selectAll: true,
|
selectAll: true,
|
||||||
visibleColumns: [],
|
visibleColumns: [],
|
||||||
|
|||||||
Reference in New Issue
Block a user