makes statements work.
This commit is contained in:
10
code/main.py
10
code/main.py
@@ -9,7 +9,7 @@ def slurp_file(filename):
|
|||||||
data = file.read()
|
data = file.read()
|
||||||
return data
|
return data
|
||||||
|
|
||||||
BASE_PROMPT="""You extract invoice details from pdfs. Always follow this json schema. Do not respond with anything except the raw json response. Do not respond in code blocks(```).
|
BASE_PROMPT="""You extract invoice details from pdfs. Some pdfs are invoices, some are credits, and some are statements that may contain statements or credits. Numbers in parenthesis typically indicate credits. Always follow this json schema. Do not respond with anything except the raw json response. Do not respond in code blocks(```).
|
||||||
```
|
```
|
||||||
{}
|
{}
|
||||||
```
|
```
|
||||||
@@ -22,6 +22,7 @@ import sys
|
|||||||
client = openai.OpenAI(api_key= "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")
|
client = openai.OpenAI(api_key= "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")
|
||||||
client.api_key = "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"
|
client.api_key = "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"
|
||||||
def handler(event, context):
|
def handler(event, context):
|
||||||
|
print(event)
|
||||||
assistant = client.beta.assistants.create(
|
assistant = client.beta.assistants.create(
|
||||||
name="pdf-reader",
|
name="pdf-reader",
|
||||||
instructions=BASE_PROMPT,
|
instructions=BASE_PROMPT,
|
||||||
@@ -30,8 +31,9 @@ def handler(event, context):
|
|||||||
)
|
)
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
url = event.url
|
url = event['url']
|
||||||
with urllib.request.urlopen(url) as response:
|
print ("URL IS", url)
|
||||||
|
with urllib.request.urlopen(event['url']) as response:
|
||||||
data = response.read()
|
data = response.read()
|
||||||
with open("/tmp/test.pdf", "wb") as f:
|
with open("/tmp/test.pdf", "wb") as f:
|
||||||
f.write(data)
|
f.write(data)
|
||||||
@@ -46,7 +48,7 @@ def handler(event, context):
|
|||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "extract the details from this invoice",
|
"content": "extract the invoice(s) and/or credit(s) details from this invoice or statement",
|
||||||
# Attach the new file to the message.
|
# Attach the new file to the message.
|
||||||
"attachments": [
|
"attachments": [
|
||||||
{ "file_id": message_file.id, "tools": [{"type": "file_search"}] }
|
{ "file_id": message_file.id, "tools": [{"type": "file_search"}] }
|
||||||
|
|||||||
@@ -1,32 +1,39 @@
|
|||||||
{
|
{
|
||||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
"title": "Invoice Data Schema",
|
"title": "Invoice schema",
|
||||||
"description": "A schema to validate invoice data",
|
"description": "A schema to validate invoice data",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"customer_identifier": {
|
"customer_identifier": {
|
||||||
"description": "The customer's name. e.g., ABC Corporation, Microsoft, etc.",
|
"description": "The customer's name. e.g., ABC Corporation, Microsoft, etc.",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
},
|
||||||
|
"vendor_identifier": {
|
||||||
|
"description": "The vendor's name",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"date": {
|
||||||
|
"description": "Invoice date in ISO 8601 format (YYYY-MM-DD).",
|
||||||
|
"type": "string",
|
||||||
|
"format": "date"
|
||||||
|
},
|
||||||
|
"invoice_number": {
|
||||||
|
"description": "Unique invoice number for the transaction.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"account_number": {
|
||||||
|
"description": "Customer's account number associated with the invoice. Not always present on the invoice.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"total": {
|
||||||
|
"description": "Total amount of the invoice, including taxes and fees. It should be a decimal number as a string.",
|
||||||
|
"type": "string",
|
||||||
|
"pattern": "^\\d+(\\.\\d{1,2})?$"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"date": {
|
"required": ["customer_identifier", "vendor_identifier", "date", "invoice_number", "total"],
|
||||||
"description": "Invoice date in ISO 8601 format (YYYY-MM-DD).",
|
"additionalProperties": false
|
||||||
"type": "string",
|
}
|
||||||
"format": "date"
|
|
||||||
},
|
|
||||||
"invoice_number": {
|
|
||||||
"description": "Unique invoice number for the transaction.",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"account_number": {
|
|
||||||
"description": "Customer's account number associated with the invoice.",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"total": {
|
|
||||||
"description": "Total amount of the invoice, including taxes and fees. It should be a decimal number as a string.",
|
|
||||||
"type": "string",
|
|
||||||
"pattern": "^\\d+(\\.\\d{1,2})?$"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["customer_identifier", "date", "invoice_number", "account_number", "total"],
|
|
||||||
"additionalProperties": false
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"version": 4,
|
"version": 4,
|
||||||
"terraform_version": "1.8.3",
|
"terraform_version": "1.8.3",
|
||||||
"serial": 36,
|
"serial": 50,
|
||||||
"lineage": "3d9e9e5b-e59a-3f03-49c9-906d67028b61",
|
"lineage": "3d9e9e5b-e59a-3f03-49c9-906d67028b61",
|
||||||
"outputs": {},
|
"outputs": {},
|
||||||
"resources": [
|
"resources": [
|
||||||
@@ -16,16 +16,16 @@
|
|||||||
"attributes": {
|
"attributes": {
|
||||||
"exclude_symlink_directories": null,
|
"exclude_symlink_directories": null,
|
||||||
"excludes": null,
|
"excludes": null,
|
||||||
"id": "e0f5cbbaf15ba9f6bd489db9345bb5583713008d",
|
"id": "cfdf624da45558bff9a94f88d4d77966b7171f2f",
|
||||||
"output_base64sha256": "kAea/gClyQLrEU+ooG5R0S18OT/nheJiIrxvecC1LrA=",
|
"output_base64sha256": "ObwHNFJSs3OzPP8ZLVn0U0aRifejNlRtnr9SWB7rbGs=",
|
||||||
"output_base64sha512": "dC70fiIcas64eAphZGf2ggIF0VyqhSfz1yELssjFL096AnlN5RwRuL44uq5ovdxWME/I9zLTal4IpD4lDqf+FA==",
|
"output_base64sha512": "rh0XpxdZ4fLJsAfz5xVY5FRvsBwxJk8uH0BNB8QapEJr7G2OJMKgPaTvk5Kzkx0+hLRddICaZuLzno0smXr7Bw==",
|
||||||
"output_file_mode": null,
|
"output_file_mode": null,
|
||||||
"output_md5": "3ef9dc50a088c997910c16c62d690a03",
|
"output_md5": "d152694d3476fff84ff4979df86e95e9",
|
||||||
"output_path": "lambda_function_payload.zip",
|
"output_path": "lambda_function_payload.zip",
|
||||||
"output_sha": "e0f5cbbaf15ba9f6bd489db9345bb5583713008d",
|
"output_sha": "cfdf624da45558bff9a94f88d4d77966b7171f2f",
|
||||||
"output_sha256": "90079afe00a5c902eb114fa8a06e51d12d7c393fe785e26222bc6f79c0b52eb0",
|
"output_sha256": "39bc07345252b373b33cff192d59f453469189f7a336546d9ebf52581eeb6c6b",
|
||||||
"output_sha512": "742ef47e221c6aceb8780a616467f6820205d15caa8527f3d7210bb2c8c52f4f7a02794de51c11b8be38baae68bddc56304fc8f732d36a5e08a43e250ea7fe14",
|
"output_sha512": "ae1d17a71759e1f2c9b007f3e71558e4546fb01c31264f2e1f404d07c41aa4426bec6d8e24c2a03da4ef9392b3931d3e84b45d74809a66e2f39e8d2c997afb07",
|
||||||
"output_size": 1817,
|
"output_size": 1714,
|
||||||
"source": [],
|
"source": [],
|
||||||
"source_content": null,
|
"source_content": null,
|
||||||
"source_content_filename": null,
|
"source_content_filename": null,
|
||||||
@@ -161,7 +161,7 @@
|
|||||||
"x86_64"
|
"x86_64"
|
||||||
],
|
],
|
||||||
"arn": "arn:aws:lambda:us-east-1:679918342773:function:glimpse2",
|
"arn": "arn:aws:lambda:us-east-1:679918342773:function:glimpse2",
|
||||||
"code_sha256": "kAea/gClyQLrEU+ooG5R0S18OT/nheJiIrxvecC1LrA=",
|
"code_sha256": "ObwHNFJSs3OzPP8ZLVn0U0aRifejNlRtnr9SWB7rbGs=",
|
||||||
"code_signing_config_arn": "",
|
"code_signing_config_arn": "",
|
||||||
"dead_letter_config": [],
|
"dead_letter_config": [],
|
||||||
"description": "",
|
"description": "",
|
||||||
@@ -186,7 +186,7 @@
|
|||||||
"image_uri": "",
|
"image_uri": "",
|
||||||
"invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:679918342773:function:glimpse2/invocations",
|
"invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:679918342773:function:glimpse2/invocations",
|
||||||
"kms_key_arn": "",
|
"kms_key_arn": "",
|
||||||
"last_modified": "2024-05-31T04:11:26.000+0000",
|
"last_modified": "2024-06-07T17:43:33.000+0000",
|
||||||
"layers": [
|
"layers": [
|
||||||
"arn:aws:lambda:us-east-1:679918342773:layer:openai-layer:3"
|
"arn:aws:lambda:us-east-1:679918342773:layer:openai-layer:3"
|
||||||
],
|
],
|
||||||
@@ -215,8 +215,8 @@
|
|||||||
"signing_profile_version_arn": "",
|
"signing_profile_version_arn": "",
|
||||||
"skip_destroy": false,
|
"skip_destroy": false,
|
||||||
"snap_start": [],
|
"snap_start": [],
|
||||||
"source_code_hash": "kAea/gClyQLrEU+ooG5R0S18OT/nheJiIrxvecC1LrA=",
|
"source_code_hash": "ObwHNFJSs3OzPP8ZLVn0U0aRifejNlRtnr9SWB7rbGs=",
|
||||||
"source_code_size": 1817,
|
"source_code_size": 1714,
|
||||||
"tags": {},
|
"tags": {},
|
||||||
"tags_all": {},
|
"tags_all": {},
|
||||||
"timeout": 30,
|
"timeout": 30,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"version": 4,
|
"version": 4,
|
||||||
"terraform_version": "1.8.3",
|
"terraform_version": "1.8.3",
|
||||||
"serial": 34,
|
"serial": 48,
|
||||||
"lineage": "3d9e9e5b-e59a-3f03-49c9-906d67028b61",
|
"lineage": "3d9e9e5b-e59a-3f03-49c9-906d67028b61",
|
||||||
"outputs": {},
|
"outputs": {},
|
||||||
"resources": [
|
"resources": [
|
||||||
@@ -16,16 +16,16 @@
|
|||||||
"attributes": {
|
"attributes": {
|
||||||
"exclude_symlink_directories": null,
|
"exclude_symlink_directories": null,
|
||||||
"excludes": null,
|
"excludes": null,
|
||||||
"id": "cd2da52ee06f5a5ebcb7268242d15663c75bb601",
|
"id": "83bc11e065e027bfc2509b3ddcec47741d40f993",
|
||||||
"output_base64sha256": "Jks5sSegvcGcJtmhgSv+46jgP/Z5htIgnKRonNQyNHM=",
|
"output_base64sha256": "e6wS99VKL4X2uGlzhnV4a++pz9Ra8db/ZRmtWqKkNAw=",
|
||||||
"output_base64sha512": "L8ytVDZYU9x9C6kchibgDbJF81HDp5xRqJB029ISzsqHtvqGMdCI6Isy+pBnx0x0CVdstv88OyHXjVci/J2v+w==",
|
"output_base64sha512": "JWqbfYXtz7VDOdGpdJnCpU9cFR2CT3rxkcArQghTlSskTdAMM6Y2XZ63sNiu8oCdAVSEGoxK7K0umQyLXeZS3w==",
|
||||||
"output_file_mode": null,
|
"output_file_mode": null,
|
||||||
"output_md5": "7ead91cdc01b34ae233fc6c269422a51",
|
"output_md5": "98ad48dafff07cbe7f7adc6ae65cce00",
|
||||||
"output_path": "lambda_function_payload.zip",
|
"output_path": "lambda_function_payload.zip",
|
||||||
"output_sha": "cd2da52ee06f5a5ebcb7268242d15663c75bb601",
|
"output_sha": "83bc11e065e027bfc2509b3ddcec47741d40f993",
|
||||||
"output_sha256": "264b39b127a0bdc19c26d9a1812bfee3a8e03ff67986d2209ca4689cd4323473",
|
"output_sha256": "7bac12f7d54a2f85f6b869738675786befa9cfd45af1d6ff6519ad5aa2a4340c",
|
||||||
"output_sha512": "2fccad54365853dc7d0ba91c8626e00db245f351c3a79c51a89074dbd212ceca87b6fa8631d088e88b32fa9067c74c7409576cb6ff3c3b21d78d5722fc9daffb",
|
"output_sha512": "256a9b7d85edcfb54339d1a97499c2a54f5c151d824f7af191c02b420853952b244dd00c33a6365d9eb7b0d8aef2809d0154841a8c4aecad2e990c8b5de652df",
|
||||||
"output_size": 1810,
|
"output_size": 1656,
|
||||||
"source": [],
|
"source": [],
|
||||||
"source_content": null,
|
"source_content": null,
|
||||||
"source_content_filename": null,
|
"source_content_filename": null,
|
||||||
@@ -161,7 +161,7 @@
|
|||||||
"x86_64"
|
"x86_64"
|
||||||
],
|
],
|
||||||
"arn": "arn:aws:lambda:us-east-1:679918342773:function:glimpse2",
|
"arn": "arn:aws:lambda:us-east-1:679918342773:function:glimpse2",
|
||||||
"code_sha256": "Jks5sSegvcGcJtmhgSv+46jgP/Z5htIgnKRonNQyNHM=",
|
"code_sha256": "e6wS99VKL4X2uGlzhnV4a++pz9Ra8db/ZRmtWqKkNAw=",
|
||||||
"code_signing_config_arn": "",
|
"code_signing_config_arn": "",
|
||||||
"dead_letter_config": [],
|
"dead_letter_config": [],
|
||||||
"description": "",
|
"description": "",
|
||||||
@@ -186,7 +186,7 @@
|
|||||||
"image_uri": "",
|
"image_uri": "",
|
||||||
"invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:679918342773:function:glimpse2/invocations",
|
"invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:679918342773:function:glimpse2/invocations",
|
||||||
"kms_key_arn": "",
|
"kms_key_arn": "",
|
||||||
"last_modified": "2024-05-31T04:10:40.000+0000",
|
"last_modified": "2024-06-07T17:38:19.000+0000",
|
||||||
"layers": [
|
"layers": [
|
||||||
"arn:aws:lambda:us-east-1:679918342773:layer:openai-layer:3"
|
"arn:aws:lambda:us-east-1:679918342773:layer:openai-layer:3"
|
||||||
],
|
],
|
||||||
@@ -215,8 +215,8 @@
|
|||||||
"signing_profile_version_arn": "",
|
"signing_profile_version_arn": "",
|
||||||
"skip_destroy": false,
|
"skip_destroy": false,
|
||||||
"snap_start": [],
|
"snap_start": [],
|
||||||
"source_code_hash": "Jks5sSegvcGcJtmhgSv+46jgP/Z5htIgnKRonNQyNHM=",
|
"source_code_hash": "e6wS99VKL4X2uGlzhnV4a++pz9Ra8db/ZRmtWqKkNAw=",
|
||||||
"source_code_size": 1810,
|
"source_code_size": 1656,
|
||||||
"tags": {},
|
"tags": {},
|
||||||
"tags_all": {},
|
"tags_all": {},
|
||||||
"timeout": 30,
|
"timeout": 30,
|
||||||
|
|||||||
Reference in New Issue
Block a user