Files
glimpse2/code/main.py
2024-06-07 10:51:38 -07:00

67 lines
2.1 KiB
Python
Executable File

#!/usr/bin/env python3
import openai
import json
openai.api_key = "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"
def slurp_file(filename):
with open(filename, 'r') as file:
data = file.read()
return data
BASE_PROMPT="""You extract invoice details from pdfs. Some pdfs are invoices, some are credits, and some are statements that may contain statements or credits. Numbers in parenthesis typically indicate credits. Always follow this json schema. Do not respond with anything except the raw json response. Do not respond in code blocks(```).
```
{}
```
""".format(slurp_file('schema.json'))
import sys
# context = sys.argv[1]
# problem = sys.argv[2]
# other = sys.argv[3]
client = openai.OpenAI(api_key= "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")
client.api_key = "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"
def handler(event, context):
print(event)
assistant = client.beta.assistants.create(
name="pdf-reader",
instructions=BASE_PROMPT,
model="gpt-4o",
tools=[{"type": "file_search"}],
)
import urllib.request
url = event['url']
print ("URL IS", url)
with urllib.request.urlopen(event['url']) as response:
data = response.read()
with open("/tmp/test.pdf", "wb") as f:
f.write(data)
# Upload the user provided file to OpenAI
message_file = client.files.create(
file=open("/tmp/test.pdf", "rb"), purpose="assistants"
)
# Create a thread and attach the file to the message
thread = client.beta.threads.create(
messages=[
{
"role": "user",
"content": "extract the invoice(s) and/or credit(s) details from this invoice or statement",
# Attach the new file to the message.
"attachments": [
{ "file_id": message_file.id, "tools": [{"type": "file_search"}] }
],
}
]
)
run = client.beta.threads.runs.create_and_poll(
thread_id=thread.id, assistant_id=assistant.id
)
messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))
print(run.usage)
return json.loads(messages[0].content[0].text.value)