65 lines
1.9 KiB
Python
Executable File
65 lines
1.9 KiB
Python
Executable File
|
|
#!/usr/bin/env python3
|
|
import openai
|
|
import json
|
|
|
|
openai.api_key = "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"
|
|
def slurp_file(filename):
|
|
with open(filename, 'r') as file:
|
|
data = file.read()
|
|
return data
|
|
|
|
BASE_PROMPT="""You extract invoice details from pdfs. Always follow this json schema. Do not respond with anything except the raw json response. Do not respond in code blocks(```).
|
|
```
|
|
{}
|
|
```
|
|
""".format(slurp_file('schema.json'))
|
|
|
|
import sys
|
|
# context = sys.argv[1]
|
|
# problem = sys.argv[2]
|
|
# other = sys.argv[3]
|
|
client = openai.OpenAI(api_key= "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")
|
|
client.api_key = "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"
|
|
def handler(event, context):
|
|
assistant = client.beta.assistants.create(
|
|
name="pdf-reader",
|
|
instructions=BASE_PROMPT,
|
|
model="gpt-4o",
|
|
tools=[{"type": "file_search"}],
|
|
)
|
|
import urllib.request
|
|
|
|
url = event.url
|
|
with urllib.request.urlopen(url) as response:
|
|
data = response.read()
|
|
with open("/tmp/test.pdf", "wb") as f:
|
|
f.write(data)
|
|
|
|
# Upload the user provided file to OpenAI
|
|
message_file = client.files.create(
|
|
file=open("/tmp/test.pdf", "rb"), purpose="assistants"
|
|
)
|
|
|
|
# Create a thread and attach the file to the message
|
|
thread = client.beta.threads.create(
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": "extract the details from this invoice",
|
|
# Attach the new file to the message.
|
|
"attachments": [
|
|
{ "file_id": message_file.id, "tools": [{"type": "file_search"}] }
|
|
],
|
|
}
|
|
]
|
|
)
|
|
run = client.beta.threads.runs.create_and_poll(
|
|
thread_id=thread.id, assistant_id=assistant.id
|
|
)
|
|
|
|
messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))
|
|
|
|
print(run.usage)
|
|
|
|
return json.loads(messages[0].content[0].text.value) |