uses openrouter

This commit is contained in:
2026-05-27 09:30:18 -07:00
parent 3d9f82f1ea
commit f6f3296b0a
4 changed files with 40 additions and 13 deletions

View File

@@ -2,11 +2,21 @@
import base64
import json
import os
import re
import urllib.request
from dotenv import load_dotenv
_env_path = os.path.join(os.path.dirname(__file__), '.env')
if os.path.exists(_env_path):
load_dotenv(_env_path)
import openai
client = openai.OpenAI(api_key="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")
client = openai.OpenAI(
api_key=os.environ["OPENROUTER_API_KEY"],
base_url="https://openrouter.ai/api/v1",
)
def slurp_file(filename):
@@ -36,7 +46,7 @@ IMPORTANT:
- Do NOT skip entries because some fields are missing. Extract what you can.
- For statements/summaries, each row in an invoice table is a separate invoice entry.
- If OCR fails completely and no text can be extracted at all, return an array with one object containing only the explanation field.
- Your FINAL response to the user must be ONLY a JSON array. Do NOT wrap it in markdown code blocks. Do NOT add any prose before or after the JSON."""
- Your FINAL response must be ONLY a JSON array. Do NOT wrap it in markdown code blocks. Do NOT add any prose before or after the JSON."""
def analyze_pdf(pdf_path):
@@ -45,28 +55,32 @@ def analyze_pdf(pdf_path):
base64_string = base64.b64encode(pdf_data).decode("utf-8")
response = client.responses.create(
model="gpt-4o",
instructions=BASE_PROMPT,
input=[
response = client.chat.completions.create(
model="openai/gpt-4o",
messages=[
{
"role": "system",
"content": BASE_PROMPT,
},
{
"role": "user",
"content": [
{
"type": "input_file",
"filename": os.path.basename(pdf_path),
"file_data": f"data:application/pdf;base64,{base64_string}",
"type": "file",
"file": {
"filename": os.path.basename(pdf_path),
"file_data": f"data:application/pdf;base64,{base64_string}",
},
},
{
"type": "input_text",
"type": "text",
"text": "extract the invoice(s) and/or credit(s) details from this document.",
},
],
}
},
],
)
text = response.output_text
import re
text = response.choices[0].message.content
match = re.search(r'```(?:json)?\s*\n(.*?)\n```', text, re.DOTALL)
if match:
text = match.group(1)