diff --git a/app.py b/app.py index e467d72..c5fc565 100644 --- a/app.py +++ b/app.py @@ -6,11 +6,12 @@ import pytz from flask import Flask, render_template, request, redirect, url_for, session, abort, jsonify from dotenv import load_dotenv +load_dotenv() import firebase_admin from firebase_admin import credentials, auth as fb_auth, firestore import requests +from filevine_client import FilevineClient -load_dotenv() app = Flask(__name__) app.secret_key = os.environ.get("FLASK_SECRET_KEY", os.urandom(32)) @@ -126,18 +127,19 @@ def fetch_all_projects(): """Fetch all projects for a user and store them in Firestore""" print("Fetching projects....") - # Get bearer token - bearer = get_filevine_bearer() + # Initialize Filevine client + client = FilevineClient() + bearer = client.get_bearer_token() # List projects (all pages) - projects = list_all_projects(bearer) + projects = client.list_all_projects() projects = projects[:] # Fetch details for each detailed_rows = [] import worker_pool - detailed_rows = worker_pool.process_projects_parallel(projects, bearer, 9) + detailed_rows = worker_pool.process_projects_parallel(projects, client, 9) # Store the results in Firestore projects_ref = db.collection("projects") @@ -208,160 +210,7 @@ def welcome(): # --- Filevine API --- - -def get_filevine_bearer(): - url = "https://identity.filevine.com/connect/token" - data = { - "client_id": FV_CLIENT_ID, - "client_secret": FV_CLIENT_SECRET, - "grant_type": "personal_access_token", - "scope": "fv.api.gateway.access tenant filevine.v2.api.* email openid fv.auth.tenant.read", - "token": FV_PAT, - } - headers = {"Accept": "application/json"} - resp = requests.post(url, data=data, headers=headers, timeout=30) - resp.raise_for_status() - js = resp.json() - token = js.get("access_token") - print(f"Got bearer js", js) - return token - - -def list_all_projects(bearer: str): - base = "https://api.filevineapp.com/fv-app/v2/Projects?limit=500" - headers = { - "Accept": "application/json", - "Authorization": f"Bearer {bearer}", - "x-fv-orgid": str(FV_ORG_ID), - "x-fv-userid": str(FV_USER_ID), - } - results = [] - last_count = None - tries = 0 - offset = 0 - # TODO we probably need to sync the data with fierbase - cnt = 0 - while True: - cnt = len(results) - print(f"list try {tries}, starting at {offset}, previous count {last_count}, currently at {cnt}") - tries += 1 - url = base - params = {} - if last_count is not None: - # Some deployments use LastID/Offset pagination; adapt if needed - offset = offset + last_count - params["offset"] = offset - r = requests.get(url, headers=headers, params=params, timeout=30) - r.raise_for_status() - page = r.json() - from pprint import pprint - print(f"Fetched page. Headers: {r.headers}, Offset: {offset}") - items = page.get("items", []) - results.extend(items) - has_more = page.get("hasMore") - last_count = page.get("count") - if not has_more: - break - # Safety valve - if tries > 200: - break - return results - - -def fetch_project_detail(bearer: str, project_id_native: int): - url = f"https://api.filevineapp.com/fv-app/v2/Projects/{project_id_native}" - headers = { - "Accept": "application/json", - "Authorization": f"Bearer {bearer}", - "x-fv-orgid": str(FV_ORG_ID), - "x-fv-userid": str(FV_USER_ID), - } - r = requests.get(url, headers=headers, timeout=30) - r.raise_for_status() - return r.json() - -def fetch_project_team(bearer: str, project_id_native: int): - url = f"https://api.filevineapp.com/fv-app/v2/Projects/{project_id_native}/team?limit=1000" - headers = { - "Accept": "application/json", - "Authorization": f"Bearer {bearer}", - "x-fv-orgid": str(FV_ORG_ID), - "x-fv-userid": str(FV_USER_ID), - } - r = requests.get(url, headers=headers, timeout=30) - r.raise_for_status() - from pprint import pprint - return r.json().get('items') or [] - -def fetch_project_tasks(bearer: str, project_id_native: int): - url = f"https://api.filevineapp.com/fv-app/v2/Projects/{project_id_native}/tasks" - headers = { - "Accept": "application/json", - "Authorization": f"Bearer {bearer}", - "x-fv-orgid": str(FV_ORG_ID), - "x-fv-userid": str(FV_USER_ID), - } - r = requests.get(url, headers=headers, timeout=30) - r.raise_for_status() - return r.json() - - -def fetch_client(bearer: str, client_id_native: int): - url = f"https://api.filevineapp.com/fv-app/v2/contacts/{client_id_native}" - headers = { - "Accept": "application/json", - "Authorization": f"Bearer {bearer}", - "x-fv-orgid": str(FV_ORG_ID), - "x-fv-userid": str(FV_USER_ID), - } - r = requests.get(url, headers=headers, timeout=30) - r.raise_for_status() - return r.json() - -def fetch_contacts(bearer: str, project_id_native: int): - url = f"https://api.filevineapp.com/fv-app/v2/projects/{project_id_native}/contacts" - headers = { - "Accept": "application/json", - "Authorization": f"Bearer {bearer}", - "x-fv-orgid": str(FV_ORG_ID), - "x-fv-userid": str(FV_USER_ID), - } - r = requests.get(url, headers=headers, timeout=30) - r.raise_for_status() - return r.json().get("items") - - -def fetch_form(bearer: str, project_id_native: int, form: str): - try: - url = f"https://api.filevineapp.com/fv-app/v2/Projects/{project_id_native}/Forms/{form}" - headers = { - "Accept": "application/json", - "Authorization": f"Bearer {bearer}", - "x-fv-orgid": str(FV_ORG_ID), - "x-fv-userid": str(FV_USER_ID), - } - r = requests.get(url, headers=headers, timeout=30) - r.raise_for_status() - return r.json() - except Exception as e: - print(e) - return {} - -def fetch_collection(bearer: str, project_id_native: int, collection: str): - try: - url = f"https://api.filevineapp.com/fv-app/v2/Projects/{project_id_native}/Collections/{collection}" - headers = { - "Accept": "application/json", - "Authorization": f"Bearer {bearer}", - "x-fv-orgid": str(FV_ORG_ID), - "x-fv-userid": str(FV_USER_ID), - } - r = requests.get(url, headers=headers, timeout=30) - r.raise_for_status() - return [x.get('dataObject') for x in r.json().get("items")] - except Exception as e: - print(e) - return {} +# Filevine client is now in filevine_client.py diff --git a/filevine_client.py b/filevine_client.py new file mode 100644 index 0000000..572779b --- /dev/null +++ b/filevine_client.py @@ -0,0 +1,132 @@ +import os +import requests +from typing import List, Dict, Any, Optional + +# Load environment variables +FV_CLIENT_ID = os.environ.get("FILEVINE_CLIENT_ID") +FV_CLIENT_SECRET = os.environ.get("FILEVINE_CLIENT_SECRET") +FV_PAT = os.environ.get("FILEVINE_PERSONAL_ACCESS_TOKEN") +FV_ORG_ID = os.environ.get("FILEVINE_ORG_ID") +FV_USER_ID = os.environ.get("FILEVINE_USER_ID") + +class FilevineClient: + def __init__(self, bearer_token: str = None): + self.bearer_token = bearer_token + self.base_url = "https://api.filevineapp.com/fv-app/v2" + self.headers = { + "Accept": "application/json", + "Authorization": f"Bearer {self.bearer_token}", + "x-fv-orgid": str(FV_ORG_ID), + "x-fv-userid": str(FV_USER_ID), + } + + def get_bearer_token(self) -> str: + """Get a new bearer token using Filevine credentials""" + url = "https://identity.filevine.com/connect/token" + data = { + "client_id": FV_CLIENT_ID, + "client_secret": FV_CLIENT_SECRET, + "grant_type": "personal_access_token", + "scope": "fv.api.gateway.access tenant filevine.v2.api.* email openid fv.auth.tenant.read", + "token": FV_PAT, + } + + headers = {"Accept": "application/json"} + print(data) + resp = requests.post(url, data=data, headers=headers, timeout=30) + resp.raise_for_status() + js = resp.json() + token = js.get("access_token") + print(f"Got bearer js", js) + self.bearer_token = token + self.headers["Authorization"] = f"Bearer {token}" + return token + + def list_all_projects(self) -> List[Dict[str, Any]]: + """Fetch all projects from Filevine API""" + base = f"{self.base_url}/Projects?limit=500" + results = [] + last_count = None + tries = 0 + offset = 0 + cnt = 0 + + while True: + cnt = len(results) + print(f"list try {tries}, starting at {offset}, previous count {last_count}, currently at {cnt}") + tries += 1 + url = base + params = {} + if last_count is not None: + offset = offset + last_count + params["offset"] = offset + r = requests.get(url, headers=self.headers, params=params, timeout=30) + r.raise_for_status() + page = r.json() + items = page.get("items", []) + results.extend(items) + has_more = page.get("hasMore") + last_count = page.get("count") + if not has_more: + break + # Safety valve + if tries > 200: + break + return results + + def fetch_project_detail(self, project_id_native: int) -> Dict[str, Any]: + """Fetch detailed information for a specific project""" + url = f"{self.base_url}/Projects/{project_id_native}" + r = requests.get(url, headers=self.headers, timeout=30) + r.raise_for_status() + return r.json() + + def fetch_project_team(self, project_id_native: int) -> List[Dict[str, Any]]: + """Fetch team members for a specific project""" + url = f"{self.base_url}/Projects/{project_id_native}/team?limit=1000" + r = requests.get(url, headers=self.headers, timeout=30) + r.raise_for_status() + return r.json().get('items') or [] + + def fetch_project_tasks(self, project_id_native: int) -> Dict[str, Any]: + """Fetch tasks for a specific project""" + url = f"{self.base_url}/Projects/{project_id_native}/tasks" + r = requests.get(url, headers=self.headers, timeout=30) + r.raise_for_status() + return r.json() + + def fetch_client(self, client_id_native: int) -> Dict[str, Any]: + """Fetch client information by client ID""" + url = f"{self.base_url}/contacts/{client_id_native}" + r = requests.get(url, headers=self.headers, timeout=30) + r.raise_for_status() + return r.json() + + def fetch_contacts(self, project_id_native: int) -> Optional[List[Dict[str, Any]]]: + """Fetch contacts for a specific project""" + url = f"{self.base_url}/projects/{project_id_native}/contacts" + r = requests.get(url, headers=self.headers, timeout=30) + r.raise_for_status() + return r.json().get("items") + + def fetch_form(self, project_id_native: int, form: str) -> Dict[str, Any]: + """Fetch a specific form for a project""" + try: + url = f"{self.base_url}/Projects/{project_id_native}/Forms/{form}" + r = requests.get(url, headers=self.headers, timeout=30) + r.raise_for_status() + return r.json() + except Exception as e: + print(e) + return {} + + def fetch_collection(self, project_id_native: int, collection: str) -> List[Dict[str, Any]]: + """Fetch a collection for a project""" + try: + url = f"{self.base_url}/Projects/{project_id_native}/Collections/{collection}" + r = requests.get(url, headers=self.headers, timeout=30) + r.raise_for_status() + return [x.get('dataObject') for x in r.json().get("items")] + except Exception as e: + print(e) + return {} \ No newline at end of file diff --git a/worker_pool.py b/worker_pool.py index afb891b..a124843 100644 --- a/worker_pool.py +++ b/worker_pool.py @@ -3,76 +3,68 @@ import threading from typing import List, Any, Callable, Tuple import time -# Global thread-local storage for bearer token to avoid passing it around +# Global thread-local storage for FilevineClient to avoid passing it around _thread_local = threading.local() -def get_bearer_token(): - """Get bearer token from thread local storage""" - return getattr(_thread_local, 'bearer', None) +def get_filevine_client(): + """Get FilevineClient from thread local storage""" + return getattr(_thread_local, 'client', None) -def set_bearer_token(token): - """Set bearer token in thread local storage""" - _thread_local.bearer = token +def set_filevine_client(client): + """Set FilevineClient in thread local storage""" + _thread_local.client = client -def worker_init(bearer_token: str): - """Initialize worker with bearer token""" - set_bearer_token(bearer_token) +def worker_init(client: 'FilevineClient'): + """Initialize worker with FilevineClient""" + set_filevine_client(client) -def process_project(index: int, total: int, project_data: dict, bearer_token: str) -> dict: +def process_project(index: int, total: int, project_data: dict, client: 'FilevineClient') -> dict: """ Process a single project with all its API calls. This is the function that will be executed by workers in parallel. """ - # Set the bearer token for this thread - set_bearer_token(bearer_token) + # Set the FilevineClient for this thread + set_filevine_client(client) - from app import ( - fetch_client, - fetch_contacts, - fetch_project_detail, - fetch_form, - fetch_collection, - fetch_project_tasks, - fetch_project_team, - convert_to_pacific_time - ) + from app import convert_to_pacific_time p = project_data pid = (p.get("projectId") or {}).get("native") print(f"Working on {pid} ({index}/{total})") - c = fetch_client(bearer_token, (p.get("clientId") or {}).get("native")) - cs = fetch_contacts(bearer_token, pid) + client = get_filevine_client() + c = client.fetch_client((p.get("clientId") or {}).get("native")) + cs = client.fetch_contacts(pid) if pid is None: return {} try: - detail = fetch_project_detail(bearer_token, pid) + detail = client.fetch_project_detail(pid) except Exception as e: print(f"[WARN] detail fetch failed for {pid}: {e}") detail = {} defendant_one = next((c.get('orgContact', {}) for c in cs if "Defendant" in c.get('orgContact', {}).get('personTypes', [])), {}) - new_file_review = fetch_form(bearer_token, pid, "newFileReview") or {} - dates_and_deadlines = fetch_form(bearer_token, pid, "datesAndDeadlines") or {} - service_info = fetch_collection(bearer_token, pid, "serviceInfo") or [] - property_info = fetch_form(bearer_token, pid, "propertyInfo") - matter_overview = fetch_form(bearer_token, pid, "matterOverview") - fees_and_costs = fetch_form(bearer_token, pid, "feesAndCosts") or {} - property_contacts = fetch_form(bearer_token, pid, "propertyContacts") or {} - lease_info_np = fetch_form(bearer_token, pid, "leaseInfoNP") or {} + new_file_review = client.fetch_form(pid, "newFileReview") or {} + dates_and_deadlines = client.fetch_form(pid, "datesAndDeadlines") or {} + service_info = client.fetch_collection(pid, "serviceInfo") or [] + property_info = client.fetch_form(pid, "propertyInfo") + matter_overview = client.fetch_form(pid, "matterOverview") + fees_and_costs = client.fetch_form(pid, "feesAndCosts") or {} + property_contacts = client.fetch_form(pid, "propertyContacts") or {} + lease_info_np = client.fetch_form(pid, "leaseInfoNP") or {} completed_tasks = [{"description": x.get("body"), "completed": convert_to_pacific_time(x.get("completedDate"))} - for x in fetch_project_tasks(bearer_token, pid).get("items") + for x in client.fetch_project_tasks(pid).get("items") if x.get("isCompleted")] pending_tasks = [{"description": x.get("body"), "completed": convert_to_pacific_time(x.get("completedDate"))} - for x in fetch_project_tasks(bearer_token, pid).get("items") + for x in client.fetch_project_tasks(pid).get("items") if not x.get("isCompleted")] - team = fetch_project_team(bearer_token, pid) + team = client.fetch_project_team(pid) assigned_attorney = next((m.get('fullname') for m in team if ('Assigned Attorney' in [r.get('name') for r in m.get('teamOrgRoles')]) @@ -202,13 +194,13 @@ def process_project(index: int, total: int, project_data: dict, bearer_token: st return row -def process_projects_parallel(projects: List[dict], bearer_token: str, max_workers: int = 9) -> List[dict]: +def process_projects_parallel(projects: List[dict], client: 'FilevineClient', max_workers: int = 9) -> List[dict]: """ Process projects in parallel using a worker pool. Args: projects: List of project data dictionaries - bearer_token: Filevine API bearer token + client: FilevineClient instance max_workers: Number of concurrent workers (default 20) Returns: @@ -216,9 +208,9 @@ def process_projects_parallel(projects: List[dict], bearer_token: str, max_worke """ # Create a thread pool with specified number of workers total = len(projects) - with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers, initializer=worker_init, initargs=(bearer_token,)) as executor: + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers, initializer=worker_init, initargs=(client,)) as executor: # Submit all tasks to the executor - future_to_project = {executor.submit(process_project, indx, total, project, bearer_token): project for indx, project in enumerate(projects)} + future_to_project = {executor.submit(process_project, indx, total, project, client): project for indx, project in enumerate(projects)} # Collect results as they complete results = []