275 lines
9.0 KiB
Python
275 lines
9.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Analyze screen recordings using OpenRouter + Gemini Vision.
|
|
|
|
Sends the full video file directly to Gemini via OpenRouter for a
|
|
UX research-style analysis. Saves results as markdown in docs/research/.
|
|
|
|
Usage:
|
|
uv run python -m app.analyze_videos # analyze all .mp4 in videos/
|
|
uv run python -m app.analyze_videos videos/file.mp4 # single video
|
|
"""
|
|
|
|
import argparse
|
|
import base64
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
from dotenv import load_dotenv
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Config
|
|
# ---------------------------------------------------------------------------
|
|
|
|
load_dotenv()
|
|
|
|
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
|
|
if not OPENROUTER_API_KEY:
|
|
print(
|
|
"ERROR: OpenRouter API key not found.\n"
|
|
" Put OPENROUTER_API_KEY=sk-... in .env (repo root) or set the env var.\n"
|
|
" Get one at https://openrouter.ai/keys",
|
|
file=sys.stderr,
|
|
)
|
|
sys.exit(1)
|
|
|
|
OPENROUTER_BASE = "https://openrouter.ai/api/v1"
|
|
DEFAULT_MODEL = os.getenv("OPENROUTER_MODEL", "~google/gemini-flash-latest")
|
|
|
|
UX_PROMPT = """\
|
|
Write extremely detailed step-by-step instructions for an entry-level intern
|
|
who must execute this workflow flawlessly on their first try. No shortcuts,
|
|
no assumptions, no "the user knows to…" phrases. Every action must be explicit.
|
|
|
|
Treat every click, hover, scroll, tab switch, and window interaction as a
|
|
mandatory instruction.
|
|
|
|
Your output MUST follow this exact structure:
|
|
|
|
## Step 1: [Phase name — e.g., "Prepare and gather documents"]
|
|
|
|
For each sub-step, include:
|
|
- **What the user sees** on screen at that moment (name the page, the visible fields,
|
|
any buttons, menus, or notifications)
|
|
- **Exactly what to do** (e.g. "Click the button labeled 'File and Serve' in the left sidebar"
|
|
— never just "click the file button")
|
|
- **What should appear next** so they know they did it right
|
|
- **Where to find the next target** if it's not immediately visible (scroll down, expand menu, etc.)
|
|
- **Exact text to look for or avoid** (button labels, field names, error messages)
|
|
|
|
If the user hesitates, clicks the wrong thing, backtracks, or encounters an error,
|
|
record it as a separate sub-step labeled:
|
|
- ⚠️ **Stumble:** [what went wrong]
|
|
- 🛑 **Fix:** [how they recovered]
|
|
- Or if it's a clear mistake you'd want the intern to avoid:
|
|
- ⚡ **Pitfall:** [what not to do and why]
|
|
|
|
If the user opens another application, switches tabs, or refers to an external
|
|
reference, note this as a context switch and explain exactly how they return.
|
|
|
|
After the full walkthrough, add:
|
|
|
|
## UX Issues Found (severity-ranked)
|
|
| Severity | Issue | Where it happens | Why it's confusing |
|
|
|----------|-------|------------------|--------------------|
|
|
|
|
## Suggested Improvements
|
|
1. [Actionable improvement]
|
|
2. [Actionable improvement]
|
|
3. [etc.]
|
|
"""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def probe_duration(video_path: Path) -> float:
|
|
"""Get video duration in seconds."""
|
|
try:
|
|
dur = subprocess.check_output(
|
|
["ffprobe", "-v", "error", "-show_entries", "format=duration",
|
|
"-of", "default=noprint_wrappers=1:nokey=1", str(video_path)],
|
|
stderr=subprocess.DEVNULL,
|
|
).decode().strip()
|
|
return float(dur)
|
|
except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
|
|
return 0.0
|
|
|
|
|
|
def read_video(video_path: Path) -> tuple[bytes, str]:
|
|
"""Read a video file and return (bytes, MIME type)."""
|
|
ext = video_path.suffix.lstrip(".").lower()
|
|
mime_map = {
|
|
"mp4": "video/mp4",
|
|
"mov": "video/quicktime",
|
|
"webm": "video/webm",
|
|
"mkv": "video/x-matroska",
|
|
}
|
|
mime = mime_map.get(ext, f"video/{ext}")
|
|
with open(video_path, "rb") as f:
|
|
data = f.read()
|
|
return data, mime
|
|
|
|
|
|
def build_payload(video_path: Path, duration: float) -> dict:
|
|
"""Build the OpenRouter chat completion payload with a video attachment."""
|
|
video_data, mime = read_video(video_path)
|
|
encoded = base64.b64encode(video_data).decode()
|
|
|
|
content = [
|
|
{"type": "text", "text": f"{UX_PROMPT}\n\n(Duration: {int(duration//60)}m{int(duration%60):02}s)"},
|
|
{
|
|
"type": "video_url",
|
|
"video_url": {
|
|
"url": f"data:{mime};base64,{encoded}",
|
|
},
|
|
},
|
|
]
|
|
|
|
return {
|
|
"model": DEFAULT_MODEL,
|
|
"messages": [{"role": "user", "content": content}],
|
|
"max_tokens": 8192,
|
|
"temperature": 0.3,
|
|
}
|
|
|
|
|
|
def call_openrouter(payload: dict) -> str:
|
|
"""Send request to OpenRouter and return the assistant's reply."""
|
|
headers = {
|
|
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
|
"Content-Type": "application/json",
|
|
"HTTP-Referer": "https://github.com/notid/e-filing",
|
|
"X-Title": "eFiling Video Analyzer",
|
|
}
|
|
|
|
with httpx.Client(timeout=300.0) as client:
|
|
resp = client.post(
|
|
f"{OPENROUTER_BASE}/chat/completions",
|
|
headers=headers,
|
|
json=payload,
|
|
)
|
|
# Print full error for debugging
|
|
if resp.status_code >= 400:
|
|
print(f" API status {resp.status_code}: {resp.text[:1000]}", file=sys.stderr)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
|
|
choices = data.get("choices", [])
|
|
if not choices:
|
|
raise ValueError(f"No choices in OpenRouter response: {json.dumps(data, indent=2)[:500]}")
|
|
return choices[0]["message"]["content"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Output
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def write_report(video_path: Path, analysis: str, model: str, duration: float) -> Path:
|
|
"""Write the analysis as a markdown file in docs/research/."""
|
|
output_dir = Path(__file__).resolve().parent.parent / "docs" / "research"
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
safe_name = re.sub(r"[^\w\s\-]", "", video_path.stem)
|
|
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d")
|
|
out_file = output_dir / f"{safe_name}_{timestamp}.md"
|
|
|
|
dur_min = int(duration // 60)
|
|
dur_sec = int(duration % 60)
|
|
header = f"""\
|
|
# eFiling — UX Analysis: {video_path.name}
|
|
|
|
| Field | Value |
|
|
|-------|-------|
|
|
| **Source video** | `{video_path.name}` |
|
|
| **Duration** | {dur_min}m {dur_sec}s |\n| **Analysis date** | {datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")} |
|
|
| **Model** | {model} |
|
|
|
|
---
|
|
|
|
"""
|
|
out_file.write_text(header + analysis)
|
|
return out_file
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Analyze screen recordings with Gemini via OpenRouter",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""\
|
|
examples:
|
|
# analyze one specific video
|
|
python -m app.analyze_videos "videos/E-Filing in Filevine.mp4"
|
|
|
|
# analyze all videos in videos/
|
|
python -m app.analyze_videos
|
|
""",
|
|
)
|
|
parser.add_argument(
|
|
"videos",
|
|
nargs="*",
|
|
default=[],
|
|
help="Video files to analyze (defaults to all .mp4 in videos/)",
|
|
)
|
|
parser.add_argument(
|
|
"--model",
|
|
type=str,
|
|
default=os.getenv("OPENROUTER_MODEL", DEFAULT_MODEL),
|
|
help=f"OpenRouter model (default: {DEFAULT_MODEL})",
|
|
)
|
|
args = parser.parse_args()
|
|
model_override = args.model # local var avoids scoping conflict
|
|
videos_dir = Path(__file__).resolve().parent.parent / "videos"
|
|
if args.videos:
|
|
video_paths = [Path(v) for v in args.videos]
|
|
elif videos_dir.exists():
|
|
video_paths = sorted(videos_dir.glob("*"))
|
|
else:
|
|
print("No videos found. Pass paths explicitly or put files in videos/", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if not video_paths:
|
|
print("No video files to analyze.", file=sys.stderr)
|
|
sys.exit(0)
|
|
|
|
# Patch module-level so build_payload picks it up
|
|
globals()['DEFAULT_MODEL'] = model_override
|
|
print(f"Analyzing {len(video_paths)} video(s) with model '{model_override}'...")
|
|
print("Mode: full-video upload (no frame extraction)")
|
|
print()
|
|
|
|
for i, vp in enumerate(video_paths, 1):
|
|
print(f"[{i}/{len(video_paths)}] {vp.name}")
|
|
|
|
duration = probe_duration(vp)
|
|
if duration <= 0:
|
|
print(f" SKIP — could not determine duration", file=sys.stderr)
|
|
continue
|
|
|
|
try:
|
|
payload = build_payload(vp, duration)
|
|
analysis = call_openrouter(payload)
|
|
out_file = write_report(vp, analysis, args.model, duration)
|
|
print(f" ✅ Saved to {out_file}")
|
|
except Exception as exc:
|
|
print(f" ❌ Error: {exc}", file=sys.stderr)
|
|
continue
|
|
|
|
print()
|
|
|
|
print("Done.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|