From edb6b8327e6f235a2f6bcd45ed78ef34b3420815 Mon Sep 17 00:00:00 2001 From: Bryce Date: Thu, 24 Oct 2024 23:26:29 -0700 Subject: [PATCH] glimpse logging updates and testing updates --- code/main.py | 64 +++++++++++++++++++++++----------------- code/schema.json | 4 +++ terraform.tfstate | 32 ++++++++++---------- terraform.tfstate.backup | 32 ++++++++++---------- 4 files changed, 75 insertions(+), 57 deletions(-) diff --git a/code/main.py b/code/main.py index 8231df0..c0cf752 100755 --- a/code/main.py +++ b/code/main.py @@ -2,6 +2,7 @@ #!/usr/bin/env python3 import openai import json +import urllib openai.api_key = "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" def slurp_file(filename): @@ -9,7 +10,7 @@ def slurp_file(filename): data = file.read() return data -BASE_PROMPT="""You extract invoice details from pdfs. Some pdfs are invoices, some are credits, and some are statements that may contain statements or credits. Numbers in parenthesis typically indicate credits. Always follow this json schema. Do not respond with anything except the raw json response. Do not respond in code blocks(```). +BASE_PROMPT="""You extract invoice details from pdfs. Some pdfs are invoices, some are credits, and some are statements that may contain statements or credits. Numbers in parenthesis typically indicate credits. Always follow this json schema. Do not respond with anything except the raw json response. Do not respond in code blocks(```). If you don't find any invoices, make sure to fill out the explanation field at least. ``` {} ``` @@ -21,47 +22,56 @@ import sys # other = sys.argv[3] client = openai.OpenAI(api_key= "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") client.api_key = "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" -def handler(event, context): - print(event) + +def analyze_url(url): assistant = client.beta.assistants.create( name="pdf-reader", instructions=BASE_PROMPT, model="gpt-4o", tools=[{"type": "file_search"}], ) - import urllib.request - - url = event['url'] - print ("URL IS", url) - with urllib.request.urlopen(event['url']) as response: + with urllib.request.urlopen(url) as response: data = response.read() with open("/tmp/test.pdf", "wb") as f: f.write(data) + f.close() # Upload the user provided file to OpenAI - message_file = client.files.create( - file=open("/tmp/test.pdf", "rb"), purpose="assistants" - ) - - # Create a thread and attach the file to the message - thread = client.beta.threads.create( - messages=[ - { - "role": "user", - "content": "extract the invoice(s) and/or credit(s) details from this invoice or statement", - # Attach the new file to the message. - "attachments": [ - { "file_id": message_file.id, "tools": [{"type": "file_search"}] } - ], - } - ] - ) + with open('/tmp/test.pdf', 'rb') as f: + message_file = client.files.create( + file=f, purpose="assistants" + ) + + # Create a thread and attach the file to the message + thread = client.beta.threads.create( + messages=[ + { + "role": "user", + "content": "extract the invoice(s) and/or credit(s) details from this invoice or statement", + # Attach the new file to the message. + "attachments": [ + { "file_id": message_file.id, "tools": [{"type": "file_search"}] } + ], + } + ] + ) + print(thread.id) run = client.beta.threads.runs.create_and_poll( thread_id=thread.id, assistant_id=assistant.id ) messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id)) + print("messages", messages) + print("status", run.status) + print("full run", run) + return json.loads(messages[0].content[0].text.value) - print(run.usage) +def handler(event, context): + print(event) + import urllib.request - return json.loads(messages[0].content[0].text.value) \ No newline at end of file + url = event['url'] + print ("URL IS", url) + messages = analyze_url(url) + + return analyze_url(url) \ No newline at end of file diff --git a/code/schema.json b/code/schema.json index 2fe45e3..6b6e73b 100644 --- a/code/schema.json +++ b/code/schema.json @@ -6,6 +6,10 @@ "items": { "type": "object", "properties": { + "explanation": { + "description": "In the case of an error or no invoice, this field should always have a detailed explanation as to why.", + "type": "string" + }, "customer_identifier": { "description": "The customer's name. e.g., ABC Corporation, Microsoft, etc.", "type": "string" diff --git a/terraform.tfstate b/terraform.tfstate index 55df540..1260a7e 100644 --- a/terraform.tfstate +++ b/terraform.tfstate @@ -1,7 +1,7 @@ { "version": 4, - "terraform_version": "1.8.3", - "serial": 50, + "terraform_version": "1.9.2", + "serial": 56, "lineage": "3d9e9e5b-e59a-3f03-49c9-906d67028b61", "outputs": {}, "resources": [ @@ -16,16 +16,16 @@ "attributes": { "exclude_symlink_directories": null, "excludes": null, - "id": "cfdf624da45558bff9a94f88d4d77966b7171f2f", - "output_base64sha256": "ObwHNFJSs3OzPP8ZLVn0U0aRifejNlRtnr9SWB7rbGs=", - "output_base64sha512": "rh0XpxdZ4fLJsAfz5xVY5FRvsBwxJk8uH0BNB8QapEJr7G2OJMKgPaTvk5Kzkx0+hLRddICaZuLzno0smXr7Bw==", + "id": "b017e188ffb4ac90572fc0b0908332005a7cd1f1", + "output_base64sha256": "Q+QOG/IfIeef3tp45NMom0kxMIl5ZotY2hHGU6qawZ4=", + "output_base64sha512": "aCaHbU927cJtQLNtqUvbVhZE9tPZu8Wpc7JCZkhPs8IF6xpljNa379vKcHcLMprDJ+oN77E6QAN9Ob5F/TAjvA==", "output_file_mode": null, - "output_md5": "d152694d3476fff84ff4979df86e95e9", + "output_md5": "927e485721c9b9022fa51a7ad012b2bc", "output_path": "lambda_function_payload.zip", - "output_sha": "cfdf624da45558bff9a94f88d4d77966b7171f2f", - "output_sha256": "39bc07345252b373b33cff192d59f453469189f7a336546d9ebf52581eeb6c6b", - "output_sha512": "ae1d17a71759e1f2c9b007f3e71558e4546fb01c31264f2e1f404d07c41aa4426bec6d8e24c2a03da4ef9392b3931d3e84b45d74809a66e2f39e8d2c997afb07", - "output_size": 1714, + "output_sha": "b017e188ffb4ac90572fc0b0908332005a7cd1f1", + "output_sha256": "43e40e1bf21f21e79fdeda78e4d3289b4931308979668b58da11c653aa9ac19e", + "output_sha512": "6826876d4f76edc26d40b36da94bdb561644f6d3d9bbc5a973b24266484fb3c205eb1a658cd6b7efdbca70770b329ac327ea0defb13a40037d39be45fd3023bc", + "output_size": 3940, "source": [], "source_content": null, "source_content_filename": null, @@ -130,7 +130,9 @@ "force_detach_policies": false, "id": "glimpse2", "inline_policy": [], - "managed_policy_arns": [], + "managed_policy_arns": [ + "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + ], "max_session_duration": 3600, "name": "glimpse2", "name_prefix": "", @@ -161,7 +163,7 @@ "x86_64" ], "arn": "arn:aws:lambda:us-east-1:679918342773:function:glimpse2", - "code_sha256": "ObwHNFJSs3OzPP8ZLVn0U0aRifejNlRtnr9SWB7rbGs=", + "code_sha256": "Q+QOG/IfIeef3tp45NMom0kxMIl5ZotY2hHGU6qawZ4=", "code_signing_config_arn": "", "dead_letter_config": [], "description": "", @@ -186,7 +188,7 @@ "image_uri": "", "invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:679918342773:function:glimpse2/invocations", "kms_key_arn": "", - "last_modified": "2024-06-07T17:43:33.000+0000", + "last_modified": "2024-10-25T06:25:22.000+0000", "layers": [ "arn:aws:lambda:us-east-1:679918342773:layer:openai-layer:3" ], @@ -215,8 +217,8 @@ "signing_profile_version_arn": "", "skip_destroy": false, "snap_start": [], - "source_code_hash": "ObwHNFJSs3OzPP8ZLVn0U0aRifejNlRtnr9SWB7rbGs=", - "source_code_size": 1714, + "source_code_hash": "Q+QOG/IfIeef3tp45NMom0kxMIl5ZotY2hHGU6qawZ4=", + "source_code_size": 3940, "tags": {}, "tags_all": {}, "timeout": 30, diff --git a/terraform.tfstate.backup b/terraform.tfstate.backup index 6ed53b8..f2f3c34 100644 --- a/terraform.tfstate.backup +++ b/terraform.tfstate.backup @@ -1,7 +1,7 @@ { "version": 4, - "terraform_version": "1.8.3", - "serial": 48, + "terraform_version": "1.9.2", + "serial": 54, "lineage": "3d9e9e5b-e59a-3f03-49c9-906d67028b61", "outputs": {}, "resources": [ @@ -16,16 +16,16 @@ "attributes": { "exclude_symlink_directories": null, "excludes": null, - "id": "83bc11e065e027bfc2509b3ddcec47741d40f993", - "output_base64sha256": "e6wS99VKL4X2uGlzhnV4a++pz9Ra8db/ZRmtWqKkNAw=", - "output_base64sha512": "JWqbfYXtz7VDOdGpdJnCpU9cFR2CT3rxkcArQghTlSskTdAMM6Y2XZ63sNiu8oCdAVSEGoxK7K0umQyLXeZS3w==", + "id": "591123473fcc148c2eb55683ecc13950d7fb9604", + "output_base64sha256": "eO1ckJE5H3wuZAK+8TOLzw4amZ4l2TfsWrAz/DGwtUM=", + "output_base64sha512": "W1s8lbpmh0bqBXEYZH4xOAC8urV+rRVYjDumTPz4KfhfniDRADgDOKYtDg9x5bsQHIxoQ7P9EG9ZyV/PjFDv9Q==", "output_file_mode": null, - "output_md5": "98ad48dafff07cbe7f7adc6ae65cce00", + "output_md5": "42025c95847dced03a50a9eaa1d76fd2", "output_path": "lambda_function_payload.zip", - "output_sha": "83bc11e065e027bfc2509b3ddcec47741d40f993", - "output_sha256": "7bac12f7d54a2f85f6b869738675786befa9cfd45af1d6ff6519ad5aa2a4340c", - "output_sha512": "256a9b7d85edcfb54339d1a97499c2a54f5c151d824f7af191c02b420853952b244dd00c33a6365d9eb7b0d8aef2809d0154841a8c4aecad2e990c8b5de652df", - "output_size": 1656, + "output_sha": "591123473fcc148c2eb55683ecc13950d7fb9604", + "output_sha256": "78ed5c9091391f7c2e6402bef1338bcf0e1a999e25d937ec5ab033fc31b0b543", + "output_sha512": "5b5b3c95ba668746ea057118647e313800bcbab57ead15588c3ba64cfcf829f85f9e20d100380338a62d0e0f71e5bb101c8c6843b3fd106f59c95fcf8c50eff5", + "output_size": 1824, "source": [], "source_content": null, "source_content_filename": null, @@ -130,7 +130,9 @@ "force_detach_policies": false, "id": "glimpse2", "inline_policy": [], - "managed_policy_arns": [], + "managed_policy_arns": [ + "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + ], "max_session_duration": 3600, "name": "glimpse2", "name_prefix": "", @@ -161,7 +163,7 @@ "x86_64" ], "arn": "arn:aws:lambda:us-east-1:679918342773:function:glimpse2", - "code_sha256": "e6wS99VKL4X2uGlzhnV4a++pz9Ra8db/ZRmtWqKkNAw=", + "code_sha256": "eO1ckJE5H3wuZAK+8TOLzw4amZ4l2TfsWrAz/DGwtUM=", "code_signing_config_arn": "", "dead_letter_config": [], "description": "", @@ -186,7 +188,7 @@ "image_uri": "", "invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:679918342773:function:glimpse2/invocations", "kms_key_arn": "", - "last_modified": "2024-06-07T17:38:19.000+0000", + "last_modified": "2024-10-25T06:03:21.000+0000", "layers": [ "arn:aws:lambda:us-east-1:679918342773:layer:openai-layer:3" ], @@ -215,8 +217,8 @@ "signing_profile_version_arn": "", "skip_destroy": false, "snap_start": [], - "source_code_hash": "e6wS99VKL4X2uGlzhnV4a++pz9Ra8db/ZRmtWqKkNAw=", - "source_code_size": 1656, + "source_code_hash": "eO1ckJE5H3wuZAK+8TOLzw4amZ4l2TfsWrAz/DGwtUM=", + "source_code_size": 1824, "tags": {}, "tags_all": {}, "timeout": 30,