#!/usr/bin/env python3 import openai import json import urllib openai.api_key = "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" def slurp_file(filename): with open(filename, 'r') as file: data = file.read() return data BASE_PROMPT="""You extract invoice details from pdfs. Some pdfs are invoices, some are credits, and some are statements that may contain statements or credits. Numbers in parenthesis typically indicate credits. Always follow this json schema. Do not respond with anything except the raw json response. Do not respond in code blocks(```). If you don't find any invoices, make sure to fill out the explanation field at least. ``` {} ``` """.format(slurp_file('schema.json')) import sys # context = sys.argv[1] # problem = sys.argv[2] # other = sys.argv[3] client = openai.OpenAI(api_key= "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") client.api_key = "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" def analyze_url(url): assistant = client.beta.assistants.create( name="pdf-reader", instructions=BASE_PROMPT, model="gpt-4o", tools=[{"type": "file_search"}], ) with urllib.request.urlopen(url) as response: data = response.read() with open("/tmp/test.pdf", "wb") as f: f.write(data) f.close() # Upload the user provided file to OpenAI with open('/tmp/test.pdf', 'rb') as f: message_file = client.files.create( file=f, purpose="assistants" ) # Create a thread and attach the file to the message thread = client.beta.threads.create( messages=[ { "role": "user", "content": "extract the invoice(s) and/or credit(s) details from this invoice or statement", # Attach the new file to the message. "attachments": [ { "file_id": message_file.id, "tools": [{"type": "file_search"}] } ], } ] ) print(thread.id) run = client.beta.threads.runs.create_and_poll( thread_id=thread.id, assistant_id=assistant.id ) messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id)) print("messages", messages) print("status", run.status) print("full run", run) return json.loads(messages[0].content[0].text.value) def handler(event, context): print(event) import urllib.request url = event['url'] print ("URL IS", url) messages = analyze_url(url) return analyze_url(url)