initial
This commit is contained in:
65
code/main.py
Executable file
65
code/main.py
Executable file
@@ -0,0 +1,65 @@
|
||||
|
||||
#!/usr/bin/env python3
|
||||
import openai
|
||||
import json
|
||||
|
||||
openai.api_key = "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"
|
||||
def slurp_file(filename):
|
||||
with open(filename, 'r') as file:
|
||||
data = file.read()
|
||||
return data
|
||||
|
||||
BASE_PROMPT="""You extract invoice details from pdfs. Always follow this json schema. Do not respond with anything except the raw json response. Do not respond in code blocks(```).
|
||||
```
|
||||
{}
|
||||
```
|
||||
""".format(slurp_file('schema.json'))
|
||||
|
||||
import sys
|
||||
# context = sys.argv[1]
|
||||
# problem = sys.argv[2]
|
||||
# other = sys.argv[3]
|
||||
client = openai.OpenAI(api_key= "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")
|
||||
client.api_key = "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"
|
||||
def handler(event, context):
|
||||
assistant = client.beta.assistants.create(
|
||||
name="pdf-reader",
|
||||
instructions=BASE_PROMPT,
|
||||
model="gpt-4o",
|
||||
tools=[{"type": "file_search"}],
|
||||
)
|
||||
import urllib.request
|
||||
|
||||
url = event.url
|
||||
with urllib.request.urlopen(url) as response:
|
||||
data = response.read()
|
||||
with open("/tmp/test.pdf", "wb") as f:
|
||||
f.write(data)
|
||||
|
||||
# Upload the user provided file to OpenAI
|
||||
message_file = client.files.create(
|
||||
file=open("/tmp/test.pdf", "rb"), purpose="assistants"
|
||||
)
|
||||
|
||||
# Create a thread and attach the file to the message
|
||||
thread = client.beta.threads.create(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "extract the details from this invoice",
|
||||
# Attach the new file to the message.
|
||||
"attachments": [
|
||||
{ "file_id": message_file.id, "tools": [{"type": "file_search"}] }
|
||||
],
|
||||
}
|
||||
]
|
||||
)
|
||||
run = client.beta.threads.runs.create_and_poll(
|
||||
thread_id=thread.id, assistant_id=assistant.id
|
||||
)
|
||||
|
||||
messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))
|
||||
|
||||
print(run.usage)
|
||||
|
||||
return json.loads(messages[0].content[0].text.value)
|
||||
32
code/schema.json
Normal file
32
code/schema.json
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "Invoice Data Schema",
|
||||
"description": "A schema to validate invoice data",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"customer_identifier": {
|
||||
"description": "The customer's name. e.g., ABC Corporation, Microsoft, etc.",
|
||||
"type": "string"
|
||||
},
|
||||
"date": {
|
||||
"description": "Invoice date in ISO 8601 format (YYYY-MM-DD).",
|
||||
"type": "string",
|
||||
"format": "date"
|
||||
},
|
||||
"invoice_number": {
|
||||
"description": "Unique invoice number for the transaction.",
|
||||
"type": "string"
|
||||
},
|
||||
"account_number": {
|
||||
"description": "Customer's account number associated with the invoice.",
|
||||
"type": "string"
|
||||
},
|
||||
"total": {
|
||||
"description": "Total amount of the invoice, including taxes and fees. It should be a decimal number as a string.",
|
||||
"type": "string",
|
||||
"pattern": "^\\d+(\\.\\d{1,2})?$"
|
||||
}
|
||||
},
|
||||
"required": ["customer_identifier", "date", "invoice_number", "account_number", "total"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
Reference in New Issue
Block a user