This commit is contained in:
2024-05-30 21:13:47 -07:00
commit 6ea21f7d87
8 changed files with 848 additions and 0 deletions

65
code/main.py Executable file
View File

@@ -0,0 +1,65 @@
#!/usr/bin/env python3
import openai
import json
openai.api_key = "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"
def slurp_file(filename):
with open(filename, 'r') as file:
data = file.read()
return data
BASE_PROMPT="""You extract invoice details from pdfs. Always follow this json schema. Do not respond with anything except the raw json response. Do not respond in code blocks(```).
```
{}
```
""".format(slurp_file('schema.json'))
import sys
# context = sys.argv[1]
# problem = sys.argv[2]
# other = sys.argv[3]
client = openai.OpenAI(api_key= "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")
client.api_key = "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"
def handler(event, context):
assistant = client.beta.assistants.create(
name="pdf-reader",
instructions=BASE_PROMPT,
model="gpt-4o",
tools=[{"type": "file_search"}],
)
import urllib.request
url = event.url
with urllib.request.urlopen(url) as response:
data = response.read()
with open("/tmp/test.pdf", "wb") as f:
f.write(data)
# Upload the user provided file to OpenAI
message_file = client.files.create(
file=open("/tmp/test.pdf", "rb"), purpose="assistants"
)
# Create a thread and attach the file to the message
thread = client.beta.threads.create(
messages=[
{
"role": "user",
"content": "extract the details from this invoice",
# Attach the new file to the message.
"attachments": [
{ "file_id": message_file.id, "tools": [{"type": "file_search"}] }
],
}
]
)
run = client.beta.threads.runs.create_and_poll(
thread_id=thread.id, assistant_id=assistant.id
)
messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))
print(run.usage)
return json.loads(messages[0].content[0].text.value)