switches to uv

This commit is contained in:
2026-05-27 08:58:58 -07:00
parent 366d776493
commit 127f1486be
5 changed files with 437 additions and 48 deletions

View File

@@ -1,62 +1,56 @@
#!/usr/bin/env python3
import openai
import json
import urllib
import os
import pprint
import urllib.request
import openai
openai.api_key = "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"
def slurp_file(filename):
with open(filename, 'r') as file:
data = file.read()
return data
BASE_PROMPT="""You extract invoice details from pdfs. Some pdfs are invoices, some are credits, and some are statements that may contain statements or credits. Numbers in parenthesis typically indicate credits. Always follow this json schema. Do not respond with anything except the raw json response. Do not respond in code blocks(```). If you don't find any invoices, make sure to fill out the explanation field at least.
```
{}
```
""".format(slurp_file('schema.json'))
""".format(slurp_file(os.path.join(os.path.dirname(__file__), 'schema.json')))
import sys
import pprint
# context = sys.argv[1]
# problem = sys.argv[2]
# other = sys.argv[3]
client = openai.OpenAI(api_key= "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")
client = openai.OpenAI(api_key="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")
client.api_key = "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"
def analyze_url(url):
assistant = client.beta.assistants.create(
name="pdf-reader",
instructions=BASE_PROMPT,
model="gpt-4o",
tools=[{"type": "file_search"}],
)
with urllib.request.urlopen(url) as response:
data = response.read()
with open("/tmp/test.pdf", "wb") as f:
f.write(data)
f.close()
# Upload the user provided file to OpenAI
with open('/tmp/test.pdf', 'rb') as f:
message_file = client.files.create(
file=f, purpose="assistants"
)
# Create a thread and attach the file to the message
thread = client.beta.threads.create(
def analyze_pdf(pdf_path):
assistant = client.beta.assistants.create(
name="pdf-reader",
instructions=BASE_PROMPT,
model="gpt-4o",
tools=[{"type": "file_search"}],
)
with open(pdf_path, 'rb') as f:
message_file = client.files.create(file=f, purpose="assistants")
thread = client.beta.threads.create(
messages=[
{
"role": "user",
"content": "extract the invoice(s) and/or credit(s) details from this invoice or statement",
# Attach the new file to the message.
"attachments": [
{ "file_id": message_file.id, "tools": [{"type": "file_search"}] }
],
}
{
"role": "user",
"content": "extract the invoice(s) and/or credit(s) details from this invoice or statement",
"attachments": [
{"file_id": message_file.id, "tools": [{"type": "file_search"}]}
],
}
]
)
print(thread.id)
)
print(thread.id)
run = client.beta.threads.runs.create_and_poll(
thread_id=thread.id, assistant_id=assistant.id
)
@@ -66,17 +60,22 @@ def analyze_url(url):
pprint.pprint(messages)
print("\n\n")
print("status", run.status)
print ("\n\n")
print("\n\n")
print("full run")
pprint.pprint(run)
return json.loads(messages[0].content[0].text.value)
def analyze_url(url):
with urllib.request.urlopen(url) as response:
data = response.read()
with open("/tmp/test.pdf", "wb") as f:
f.write(data)
return analyze_pdf("/tmp/test.pdf")
def handler(event, context):
print(event)
import urllib.request
url = event['url']
print ("URL IS", url)
messages = analyze_url(url)
print("URL IS", url)
return analyze_url(url)