This commit is contained in:
2024-05-30 21:13:47 -07:00
commit 6ea21f7d87
8 changed files with 848 additions and 0 deletions

65
code/main.py Executable file
View File

@@ -0,0 +1,65 @@
#!/usr/bin/env python3
import openai
import json
openai.api_key = "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"
def slurp_file(filename):
with open(filename, 'r') as file:
data = file.read()
return data
BASE_PROMPT="""You extract invoice details from pdfs. Always follow this json schema. Do not respond with anything except the raw json response. Do not respond in code blocks(```).
```
{}
```
""".format(slurp_file('schema.json'))
import sys
# context = sys.argv[1]
# problem = sys.argv[2]
# other = sys.argv[3]
client = openai.OpenAI(api_key= "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")
client.api_key = "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"
def handler(event, context):
assistant = client.beta.assistants.create(
name="pdf-reader",
instructions=BASE_PROMPT,
model="gpt-4o",
tools=[{"type": "file_search"}],
)
import urllib.request
url = event.url
with urllib.request.urlopen(url) as response:
data = response.read()
with open("/tmp/test.pdf", "wb") as f:
f.write(data)
# Upload the user provided file to OpenAI
message_file = client.files.create(
file=open("/tmp/test.pdf", "rb"), purpose="assistants"
)
# Create a thread and attach the file to the message
thread = client.beta.threads.create(
messages=[
{
"role": "user",
"content": "extract the details from this invoice",
# Attach the new file to the message.
"attachments": [
{ "file_id": message_file.id, "tools": [{"type": "file_search"}] }
],
}
]
)
run = client.beta.threads.runs.create_and_poll(
thread_id=thread.id, assistant_id=assistant.id
)
messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))
print(run.usage)
return json.loads(messages[0].content[0].text.value)

32
code/schema.json Normal file
View File

@@ -0,0 +1,32 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Invoice Data Schema",
"description": "A schema to validate invoice data",
"type": "object",
"properties": {
"customer_identifier": {
"description": "The customer's name. e.g., ABC Corporation, Microsoft, etc.",
"type": "string"
},
"date": {
"description": "Invoice date in ISO 8601 format (YYYY-MM-DD).",
"type": "string",
"format": "date"
},
"invoice_number": {
"description": "Unique invoice number for the transaction.",
"type": "string"
},
"account_number": {
"description": "Customer's account number associated with the invoice.",
"type": "string"
},
"total": {
"description": "Total amount of the invoice, including taxes and fees. It should be a decimal number as a string.",
"type": "string",
"pattern": "^\\d+(\\.\\d{1,2})?$"
}
},
"required": ["customer_identifier", "date", "invoice_number", "account_number", "total"],
"additionalProperties": false
}