Files
e-filing/app/analyze_videos.py

275 lines
9.0 KiB
Python

#!/usr/bin/env python3
"""Analyze screen recordings using OpenRouter + Gemini Vision.
Sends the full video file directly to Gemini via OpenRouter for a
UX research-style analysis. Saves results as markdown in docs/research/.
Usage:
uv run python -m app.analyze_videos # analyze all .mp4 in videos/
uv run python -m app.analyze_videos videos/file.mp4 # single video
"""
import argparse
import base64
import json
import os
import re
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
import httpx
from dotenv import load_dotenv
# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
load_dotenv()
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
if not OPENROUTER_API_KEY:
print(
"ERROR: OpenRouter API key not found.\n"
" Put OPENROUTER_API_KEY=sk-... in .env (repo root) or set the env var.\n"
" Get one at https://openrouter.ai/keys",
file=sys.stderr,
)
sys.exit(1)
OPENROUTER_BASE = "https://openrouter.ai/api/v1"
DEFAULT_MODEL = os.getenv("OPENROUTER_MODEL", "~google/gemini-flash-latest")
UX_PROMPT = """\
Write extremely detailed step-by-step instructions for an entry-level intern
who must execute this workflow flawlessly on their first try. No shortcuts,
no assumptions, no "the user knows to…" phrases. Every action must be explicit.
Treat every click, hover, scroll, tab switch, and window interaction as a
mandatory instruction.
Your output MUST follow this exact structure:
## Step 1: [Phase name — e.g., "Prepare and gather documents"]
For each sub-step, include:
- **What the user sees** on screen at that moment (name the page, the visible fields,
any buttons, menus, or notifications)
- **Exactly what to do** (e.g. "Click the button labeled 'File and Serve' in the left sidebar"
— never just "click the file button")
- **What should appear next** so they know they did it right
- **Where to find the next target** if it's not immediately visible (scroll down, expand menu, etc.)
- **Exact text to look for or avoid** (button labels, field names, error messages)
If the user hesitates, clicks the wrong thing, backtracks, or encounters an error,
record it as a separate sub-step labeled:
- ⚠️ **Stumble:** [what went wrong]
- 🛑 **Fix:** [how they recovered]
- Or if it's a clear mistake you'd want the intern to avoid:
- ⚡ **Pitfall:** [what not to do and why]
If the user opens another application, switches tabs, or refers to an external
reference, note this as a context switch and explain exactly how they return.
After the full walkthrough, add:
## UX Issues Found (severity-ranked)
| Severity | Issue | Where it happens | Why it's confusing |
|----------|-------|------------------|--------------------|
## Suggested Improvements
1. [Actionable improvement]
2. [Actionable improvement]
3. [etc.]
"""
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def probe_duration(video_path: Path) -> float:
"""Get video duration in seconds."""
try:
dur = subprocess.check_output(
["ffprobe", "-v", "error", "-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1", str(video_path)],
stderr=subprocess.DEVNULL,
).decode().strip()
return float(dur)
except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
return 0.0
def read_video(video_path: Path) -> tuple[bytes, str]:
"""Read a video file and return (bytes, MIME type)."""
ext = video_path.suffix.lstrip(".").lower()
mime_map = {
"mp4": "video/mp4",
"mov": "video/quicktime",
"webm": "video/webm",
"mkv": "video/x-matroska",
}
mime = mime_map.get(ext, f"video/{ext}")
with open(video_path, "rb") as f:
data = f.read()
return data, mime
def build_payload(video_path: Path, duration: float) -> dict:
"""Build the OpenRouter chat completion payload with a video attachment."""
video_data, mime = read_video(video_path)
encoded = base64.b64encode(video_data).decode()
content = [
{"type": "text", "text": f"{UX_PROMPT}\n\n(Duration: {int(duration//60)}m{int(duration%60):02}s)"},
{
"type": "video_url",
"video_url": {
"url": f"data:{mime};base64,{encoded}",
},
},
]
return {
"model": DEFAULT_MODEL,
"messages": [{"role": "user", "content": content}],
"max_tokens": 8192,
"temperature": 0.3,
}
def call_openrouter(payload: dict) -> str:
"""Send request to OpenRouter and return the assistant's reply."""
headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json",
"HTTP-Referer": "https://github.com/notid/e-filing",
"X-Title": "eFiling Video Analyzer",
}
with httpx.Client(timeout=300.0) as client:
resp = client.post(
f"{OPENROUTER_BASE}/chat/completions",
headers=headers,
json=payload,
)
# Print full error for debugging
if resp.status_code >= 400:
print(f" API status {resp.status_code}: {resp.text[:1000]}", file=sys.stderr)
resp.raise_for_status()
data = resp.json()
choices = data.get("choices", [])
if not choices:
raise ValueError(f"No choices in OpenRouter response: {json.dumps(data, indent=2)[:500]}")
return choices[0]["message"]["content"]
# ---------------------------------------------------------------------------
# Output
# ---------------------------------------------------------------------------
def write_report(video_path: Path, analysis: str, model: str, duration: float) -> Path:
"""Write the analysis as a markdown file in docs/research/."""
output_dir = Path(__file__).resolve().parent.parent / "docs" / "research"
output_dir.mkdir(parents=True, exist_ok=True)
safe_name = re.sub(r"[^\w\s\-]", "", video_path.stem)
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d")
out_file = output_dir / f"{safe_name}_{timestamp}.md"
dur_min = int(duration // 60)
dur_sec = int(duration % 60)
header = f"""\
# eFiling — UX Analysis: {video_path.name}
| Field | Value |
|-------|-------|
| **Source video** | `{video_path.name}` |
| **Duration** | {dur_min}m {dur_sec}s |\n| **Analysis date** | {datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")} |
| **Model** | {model} |
---
"""
out_file.write_text(header + analysis)
return out_file
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description="Analyze screen recordings with Gemini via OpenRouter",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""\
examples:
# analyze one specific video
python -m app.analyze_videos "videos/E-Filing in Filevine.mp4"
# analyze all videos in videos/
python -m app.analyze_videos
""",
)
parser.add_argument(
"videos",
nargs="*",
default=[],
help="Video files to analyze (defaults to all .mp4 in videos/)",
)
parser.add_argument(
"--model",
type=str,
default=os.getenv("OPENROUTER_MODEL", DEFAULT_MODEL),
help=f"OpenRouter model (default: {DEFAULT_MODEL})",
)
args = parser.parse_args()
model_override = args.model # local var avoids scoping conflict
videos_dir = Path(__file__).resolve().parent.parent / "videos"
if args.videos:
video_paths = [Path(v) for v in args.videos]
elif videos_dir.exists():
video_paths = sorted(videos_dir.glob("*"))
else:
print("No videos found. Pass paths explicitly or put files in videos/", file=sys.stderr)
sys.exit(1)
if not video_paths:
print("No video files to analyze.", file=sys.stderr)
sys.exit(0)
# Patch module-level so build_payload picks it up
globals()['DEFAULT_MODEL'] = model_override
print(f"Analyzing {len(video_paths)} video(s) with model '{model_override}'...")
print("Mode: full-video upload (no frame extraction)")
print()
for i, vp in enumerate(video_paths, 1):
print(f"[{i}/{len(video_paths)}] {vp.name}")
duration = probe_duration(vp)
if duration <= 0:
print(f" SKIP — could not determine duration", file=sys.stderr)
continue
try:
payload = build_payload(vp, duration)
analysis = call_openrouter(payload)
out_file = write_report(vp, analysis, args.model, duration)
print(f" ✅ Saved to {out_file}")
except Exception as exc:
print(f" ❌ Error: {exc}", file=sys.stderr)
continue
print()
print("Done.")
if __name__ == "__main__":
main()