uses openrouter

This commit is contained in:
2026-05-27 09:30:18 -07:00
parent 3d9f82f1ea
commit f6f3296b0a
4 changed files with 40 additions and 13 deletions

1
.gitignore vendored
View File

@@ -5,3 +5,4 @@ layer
*.zip *.zip
.py_cache .py_cache
.venv .venv
.env

View File

@@ -2,11 +2,21 @@
import base64 import base64
import json import json
import os import os
import re
import urllib.request import urllib.request
from dotenv import load_dotenv
_env_path = os.path.join(os.path.dirname(__file__), '.env')
if os.path.exists(_env_path):
load_dotenv(_env_path)
import openai import openai
client = openai.OpenAI(api_key="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") client = openai.OpenAI(
api_key=os.environ["OPENROUTER_API_KEY"],
base_url="https://openrouter.ai/api/v1",
)
def slurp_file(filename): def slurp_file(filename):
@@ -36,7 +46,7 @@ IMPORTANT:
- Do NOT skip entries because some fields are missing. Extract what you can. - Do NOT skip entries because some fields are missing. Extract what you can.
- For statements/summaries, each row in an invoice table is a separate invoice entry. - For statements/summaries, each row in an invoice table is a separate invoice entry.
- If OCR fails completely and no text can be extracted at all, return an array with one object containing only the explanation field. - If OCR fails completely and no text can be extracted at all, return an array with one object containing only the explanation field.
- Your FINAL response to the user must be ONLY a JSON array. Do NOT wrap it in markdown code blocks. Do NOT add any prose before or after the JSON.""" - Your FINAL response must be ONLY a JSON array. Do NOT wrap it in markdown code blocks. Do NOT add any prose before or after the JSON."""
def analyze_pdf(pdf_path): def analyze_pdf(pdf_path):
@@ -45,28 +55,32 @@ def analyze_pdf(pdf_path):
base64_string = base64.b64encode(pdf_data).decode("utf-8") base64_string = base64.b64encode(pdf_data).decode("utf-8")
response = client.responses.create( response = client.chat.completions.create(
model="gpt-4o", model="openai/gpt-4o",
instructions=BASE_PROMPT, messages=[
input=[ {
"role": "system",
"content": BASE_PROMPT,
},
{ {
"role": "user", "role": "user",
"content": [ "content": [
{ {
"type": "input_file", "type": "file",
"file": {
"filename": os.path.basename(pdf_path), "filename": os.path.basename(pdf_path),
"file_data": f"data:application/pdf;base64,{base64_string}", "file_data": f"data:application/pdf;base64,{base64_string}",
}, },
},
{ {
"type": "input_text", "type": "text",
"text": "extract the invoice(s) and/or credit(s) details from this document.", "text": "extract the invoice(s) and/or credit(s) details from this document.",
}, },
], ],
} },
], ],
) )
text = response.output_text text = response.choices[0].message.content
import re
match = re.search(r'```(?:json)?\s*\n(.*?)\n```', text, re.DOTALL) match = re.search(r'```(?:json)?\s*\n(.*?)\n```', text, re.DOTALL)
if match: if match:
text = match.group(1) text = match.group(1)

View File

@@ -6,4 +6,5 @@ requires-python = ">=3.11"
dependencies = [ dependencies = [
"openai==1.109.1", "openai==1.109.1",
"pydantic==2.13.4", "pydantic==2.13.4",
"python-dotenv>=1.2.2",
] ]

11
uv.lock generated
View File

@@ -58,12 +58,14 @@ source = { virtual = "." }
dependencies = [ dependencies = [
{ name = "openai" }, { name = "openai" },
{ name = "pydantic" }, { name = "pydantic" },
{ name = "python-dotenv" },
] ]
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "openai", specifier = "==1.109.1" }, { name = "openai", specifier = "==1.109.1" },
{ name = "pydantic", specifier = "==2.13.4" }, { name = "pydantic", specifier = "==2.13.4" },
{ name = "python-dotenv", specifier = ">=1.2.2" },
] ]
[[package]] [[package]]
@@ -339,6 +341,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/4b/2d/69abac8f838090bbecd5df894befb2c2619e7996a98ddb949db9f3b93225/pydantic_core-2.46.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:d51026d73fcfd93610abc7b27789c26b313920fcfb20e27462d74a7f8b06e983", size = 2193071, upload-time = "2026-05-06T13:38:08.682Z" }, { url = "https://files.pythonhosted.org/packages/4b/2d/69abac8f838090bbecd5df894befb2c2619e7996a98ddb949db9f3b93225/pydantic_core-2.46.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:d51026d73fcfd93610abc7b27789c26b313920fcfb20e27462d74a7f8b06e983", size = 2193071, upload-time = "2026-05-06T13:38:08.682Z" },
] ]
[[package]]
name = "python-dotenv"
version = "1.2.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" },
]
[[package]] [[package]]
name = "sniffio" name = "sniffio"
version = "1.3.1" version = "1.3.1"