From 8d6f361ef7befd770d1a008afa41cb4e9d880a38 Mon Sep 17 00:00:00 2001 From: Bryce Date: Wed, 13 May 2026 11:06:14 -0700 Subject: [PATCH] Add video analysis script, project setup, and docs/research --- .gitignore | 18 +++ app/analyze_videos.py | 295 +++++++++++++++++++++++++++++++++++++++++ app/pyproject.toml | 9 ++ docs/research/.gitkeep | 0 4 files changed, 322 insertions(+) create mode 100644 .gitignore create mode 100644 app/analyze_videos.py create mode 100644 app/pyproject.toml create mode 100644 docs/research/.gitkeep diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6026123 --- /dev/null +++ b/.gitignore @@ -0,0 +1,18 @@ +# Secrets +.env +.env.* + +# Python +__pycache__/ +*.py[cod] +*.egg-info/ +dist/ +build/ + +# Video analysis frames (intermediate) +.tmp_video_frames/ + +# IDE +.idea/ +.vscode/ +*.swp diff --git a/app/analyze_videos.py b/app/analyze_videos.py new file mode 100644 index 0000000..d4405de --- /dev/null +++ b/app/analyze_videos.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 +"""Analyze screen recordings using OpenRouter + Gemini Vision. + +Extracts key frames from videos and sends them to Gemini via OpenRouter, +prompting for a UX research-style analysis. Saves results as markdown +in docs/research/. + +Usage: + uv run python -m app.analyze_videos # analyze all .mp4 in videos/ + uv run python -m app.analyze_videos videos/file.mp4 # single video + NUM_FRAMES=8 uv run python -m app.analyze_videos # custom frame count +""" + +import argparse +import base64 +import json +import os +import re +import subprocess +import sys +from datetime import datetime, timezone +from pathlib import Path + +import httpx +from dotenv import load_dotenv + +# --------------------------------------------------------------------------- +# Config +# --------------------------------------------------------------------------- + +load_dotenv() # loads .env in repo root or parent directories + +OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") +if not OPENROUTER_API_KEY: + print( + "ERROR: OpenRouter API key not found.\n" + " Put OPENROUTER_API_KEY=sk-... in .env (repo root) or set the env var.\n" + " Get one at https://openrouter.ai/keys", + file=sys.stderr, + ) + sys.exit(1) + +OPENROUTER_BASE = "https://openrouter.ai/api/v1" +# Gemini models available on OpenRouter: +# google/gemini-2.0-flash-exp:free (free, good for testing) +# google/gemini-2.0-flash (fast, multimodal) +# google/gemini-2.5-flash-preview-04-17 (latest preview) +DEFAULT_MODEL = os.getenv("OPENROUTER_MODEL", "google/gemini-2.5-flash-preview-05-20") + +UX_PROMPT = """\ +Analyze this screen recording like a UX researcher. + +Track: +- user goals +- hesitation +- repeated actions +- likely confusion +- unnecessary clicks +- context switching +- inefficient workflow patterns +- UI discoverability issues +- moments where expectations appear violated + +Output: +1. overall workflow summary +2. friction timeline +3. inferred user intent +4. UX issues ranked by severity +5. suggested improvements + +Be specific about UI elements, button labels, menu paths, and exact behaviors +you observe in the frames provided. +""" + +# --------------------------------------------------------------------------- +# Frame extraction +# --------------------------------------------------------------------------- + +def extract_frames(video_path: Path, num_frames: int = 6) -> list[dict]: + """Extract evenly-spaced key frames from a video using ffmpeg.""" + if not video_path.exists(): + print(f"SKIP — file not found: {video_path}", file=sys.stderr) + return [] + + tmp_dir = Path(".tmp_video_frames") + tmp_dir.mkdir(exist_ok=True) + + # Estimate duration + try: + dur_output = subprocess.check_output( + [ + "ffprobe", + "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + str(video_path), + ], + stderr=subprocess.DEVNULL, + ).decode().strip() + duration = float(dur_output) + except (subprocess.CalledProcessError, FileNotFoundError, ValueError): + print(f"SKIP — could not probe video: {video_path}", file=sys.stderr) + return [] + + if duration <= 0: + print(f"SKIP — bad duration for: {video_path}", file=sys.stderr) + return [] + + # Pick evenly spaced timestamps (skip first/last 2% to avoid black frames) + margin = max(duration * 0.02, 1.0) + times = [ + str(margin + i * (duration - 2 * margin) / (num_frames - 1)) + for i in range(num_frames) + ] + + images = [] + for i, ts in enumerate(times): + out_path = tmp_dir / f"{video_path.stem}_frame_{i:03d}.jpg" + try: + subprocess.run( + [ + "ffmpeg", + "-y", + "-ss", ts, + "-i", str(video_path), + "-vframes:v", "1", + "-q:v", "2", # good quality JPEG + "-an", + str(out_path), + ], + capture_output=True, + check=True, + ) + if out_path.exists(): + images.append({"path": out_path}) + except (subprocess.CalledProcessError, FileNotFoundError): + continue + + return images + + +# --------------------------------------------------------------------------- +# OpenRouter / Gemini API +# --------------------------------------------------------------------------- + +def build_payload(images: list[dict]) -> dict: + """Build the OpenRouter chat completion payload with image content.""" + content = [{"type": "text", "text": UX_PROMPT}] + for img in images: + with open(img["path"], "rb") as f: + encoded = base64.b64encode(f.read()).decode() + ext = Path(img["path"]).suffix.lstrip(".") + content.append({ + "type": "image_url", + "image_url": { + "url": f"data:image/{ext};base64,{encoded}", + }, + }) + + return { + "model": DEFAULT_MODEL, + "messages": [{"role": "user", "content": content}], + "max_tokens": 8192, + "temperature": 0.3, + } + + +def call_openrouter(payload: dict) -> str: + """Send request to OpenRouter and return the assistant's reply.""" + headers = { + "Authorization": f"Bearer {OPENROUTER_API_KEY}", + "Content-Type": "application/json", + # Optional: pass-through headers for attribution / tracking + "HTTP-Referer": "https://github.com/notid/e-filing", + "X-Title": "eFiling Video Analyzer", + } + + with httpx.Client(timeout=120.0) as client: + resp = client.post( + f"{OPENROUTER_BASE}/chat/completions", + headers=headers, + json=payload, + ) + resp.raise_for_status() + data = resp.json() + + # Extract text from the response + choices = data.get("choices", []) + if not choices: + raise ValueError(f"No choices in OpenRouter response: {json.dumps(data, indent=2)[:500]}") + return choices[0]["message"]["content"] + + +# --------------------------------------------------------------------------- +# Output +# --------------------------------------------------------------------------- + +def write_report(video_path: Path, analysis: str, model: str, num_frames: int) -> Path: + """Write the analysis as a markdown file in docs/research/.""" + output_dir = Path(__file__).resolve().parent.parent / "docs" / "research" + output_dir.mkdir(parents=True, exist_ok=True) + + # Sanitize filename + safe_name = re.sub(r"[^\w\s\-]", "", video_path.stem) + timestamp = datetime.now(timezone.utc).strftime("%Y%m%d") + out_file = output_dir / f"{safe_name}_{timestamp}.md" + + header = f"""\ +# eFiling — UX Analysis: {video_path.name} + +| Field | Value | +|-------|-------| +| **Source video** | `{video_path.name}` | +| **Analysis date** | {datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")} | +| **Model** | {model} | +| **Frames analyzed** | {num_frames} | + +--- + +""" + out_file.write_text(header + analysis) + return out_file + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser(description="Analyze screen recordings with Gemini via OpenRouter") + parser.add_argument( + "videos", + nargs="*", + default=[], + help="Video files to analyze (defaults to all .mp4 in videos/)", + ) + parser.add_argument( + "--num-frames", + type=int, + default=int(os.getenv("NUM_FRAMES", "6")), + help="Number of frames to extract per video (default: 6)", + ) + parser.add_argument( + "--model", + type=str, + default=os.getenv("OPENROUTER_MODEL", DEFAULT_MODEL), + help=f"OpenRouter model (default: {DEFAULT_MODEL})", + ) + args = parser.parse_args() + + global DEFAULT_MODEL + DEFAULT_MODEL = args.model + + # Resolve video paths + videos_dir = Path(__file__).resolve().parent.parent / "videos" + if args.videos: + video_paths = [Path(v) for v in args.videos] + elif videos_dir.exists(): + video_paths = sorted(videos_dir.glob("*.mp4")) + else: + print("No videos found. Pass paths explicitly or put .mp4 files in videos/", file=sys.stderr) + sys.exit(1) + + if not video_paths: + print("No .mp4 files to analyze.", file=sys.stderr) + sys.exit(0) + + print(f"Analyzing {len(video_paths)} video(s) with model '{args.model}'...") + print() + + for i, vp in enumerate(video_paths, 1): + print(f"[{i}/{len(video_paths)}] {vp.name}") + + frames = extract_frames(vp, args.num_frames) + if not frames: + continue + + print(f" Extracted {len(frames)} frame(s)") + + try: + payload = build_payload(frames) + analysis = call_openrouter(payload) + out_file = write_report(vp, analysis, args.model, len(frames)) + print(f" ✅ Saved to {out_file}") + except Exception as exc: + print(f" ❌ Error: {exc}", file=sys.stderr) + continue + + print() + + print("Done.") + + +if __name__ == "__main__": + main() diff --git a/app/pyproject.toml b/app/pyproject.toml new file mode 100644 index 0000000..d66a565 --- /dev/null +++ b/app/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "efiling" +version = "0.1.0" +description = "Automate the e-filing workflow" +requires-python = ">=3.11" +dependencies = [ + "httpx>=0.27", + "python-dotenv>=1.0", +] diff --git a/docs/research/.gitkeep b/docs/research/.gitkeep new file mode 100644 index 0000000..e69de29