#!/usr/bin/env python3 import openai import json openai.api_key = "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" def slurp_file(filename): with open(filename, 'r') as file: data = file.read() return data BASE_PROMPT="""You extract invoice details from pdfs. Always follow this json schema. Do not respond with anything except the raw json response. Do not respond in code blocks(```). ``` {} ``` """.format(slurp_file('schema.json')) import sys # context = sys.argv[1] # problem = sys.argv[2] # other = sys.argv[3] client = openai.OpenAI(api_key= "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") client.api_key = "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" def handler(event, context): assistant = client.beta.assistants.create( name="pdf-reader", instructions=BASE_PROMPT, model="gpt-4o", tools=[{"type": "file_search"}], ) import urllib.request url = event.url with urllib.request.urlopen(url) as response: data = response.read() with open("/tmp/test.pdf", "wb") as f: f.write(data) # Upload the user provided file to OpenAI message_file = client.files.create( file=open("/tmp/test.pdf", "rb"), purpose="assistants" ) # Create a thread and attach the file to the message thread = client.beta.threads.create( messages=[ { "role": "user", "content": "extract the details from this invoice", # Attach the new file to the message. "attachments": [ { "file_id": message_file.id, "tools": [{"type": "file_search"}] } ], } ] ) run = client.beta.threads.runs.create_and_poll( thread_id=thread.id, assistant_id=assistant.id ) messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id)) print(run.usage) return json.loads(messages[0].content[0].text.value)