makes statements work.
This commit is contained in:
10
code/main.py
10
code/main.py
@@ -9,7 +9,7 @@ def slurp_file(filename):
|
|||||||
data = file.read()
|
data = file.read()
|
||||||
return data
|
return data
|
||||||
|
|
||||||
BASE_PROMPT="""You extract invoice details from pdfs. Always follow this json schema. Do not respond with anything except the raw json response. Do not respond in code blocks(```).
|
BASE_PROMPT="""You extract invoice details from pdfs. Some pdfs are invoices, some are credits, and some are statements that may contain statements or credits. Numbers in parenthesis typically indicate credits. Always follow this json schema. Do not respond with anything except the raw json response. Do not respond in code blocks(```).
|
||||||
```
|
```
|
||||||
{}
|
{}
|
||||||
```
|
```
|
||||||
@@ -22,6 +22,7 @@ import sys
|
|||||||
client = openai.OpenAI(api_key= "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")
|
client = openai.OpenAI(api_key= "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")
|
||||||
client.api_key = "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"
|
client.api_key = "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"
|
||||||
def handler(event, context):
|
def handler(event, context):
|
||||||
|
print(event)
|
||||||
assistant = client.beta.assistants.create(
|
assistant = client.beta.assistants.create(
|
||||||
name="pdf-reader",
|
name="pdf-reader",
|
||||||
instructions=BASE_PROMPT,
|
instructions=BASE_PROMPT,
|
||||||
@@ -30,8 +31,9 @@ def handler(event, context):
|
|||||||
)
|
)
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
url = event.url
|
url = event['url']
|
||||||
with urllib.request.urlopen(url) as response:
|
print ("URL IS", url)
|
||||||
|
with urllib.request.urlopen(event['url']) as response:
|
||||||
data = response.read()
|
data = response.read()
|
||||||
with open("/tmp/test.pdf", "wb") as f:
|
with open("/tmp/test.pdf", "wb") as f:
|
||||||
f.write(data)
|
f.write(data)
|
||||||
@@ -46,7 +48,7 @@ def handler(event, context):
|
|||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "extract the details from this invoice",
|
"content": "extract the invoice(s) and/or credit(s) details from this invoice or statement",
|
||||||
# Attach the new file to the message.
|
# Attach the new file to the message.
|
||||||
"attachments": [
|
"attachments": [
|
||||||
{ "file_id": message_file.id, "tools": [{"type": "file_search"}] }
|
{ "file_id": message_file.id, "tools": [{"type": "file_search"}] }
|
||||||
|
|||||||
@@ -1,13 +1,19 @@
|
|||||||
{
|
{
|
||||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
"title": "Invoice Data Schema",
|
"title": "Invoice schema",
|
||||||
"description": "A schema to validate invoice data",
|
"description": "A schema to validate invoice data",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"customer_identifier": {
|
"customer_identifier": {
|
||||||
"description": "The customer's name. e.g., ABC Corporation, Microsoft, etc.",
|
"description": "The customer's name. e.g., ABC Corporation, Microsoft, etc.",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
"vendor_identifier": {
|
||||||
|
"description": "The vendor's name",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
"date": {
|
"date": {
|
||||||
"description": "Invoice date in ISO 8601 format (YYYY-MM-DD).",
|
"description": "Invoice date in ISO 8601 format (YYYY-MM-DD).",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
@@ -18,7 +24,7 @@
|
|||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"account_number": {
|
"account_number": {
|
||||||
"description": "Customer's account number associated with the invoice.",
|
"description": "Customer's account number associated with the invoice. Not always present on the invoice.",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"total": {
|
"total": {
|
||||||
@@ -27,6 +33,7 @@
|
|||||||
"pattern": "^\\d+(\\.\\d{1,2})?$"
|
"pattern": "^\\d+(\\.\\d{1,2})?$"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": ["customer_identifier", "date", "invoice_number", "account_number", "total"],
|
"required": ["customer_identifier", "vendor_identifier", "date", "invoice_number", "total"],
|
||||||
"additionalProperties": false
|
"additionalProperties": false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"version": 4,
|
"version": 4,
|
||||||
"terraform_version": "1.8.3",
|
"terraform_version": "1.8.3",
|
||||||
"serial": 36,
|
"serial": 50,
|
||||||
"lineage": "3d9e9e5b-e59a-3f03-49c9-906d67028b61",
|
"lineage": "3d9e9e5b-e59a-3f03-49c9-906d67028b61",
|
||||||
"outputs": {},
|
"outputs": {},
|
||||||
"resources": [
|
"resources": [
|
||||||
@@ -16,16 +16,16 @@
|
|||||||
"attributes": {
|
"attributes": {
|
||||||
"exclude_symlink_directories": null,
|
"exclude_symlink_directories": null,
|
||||||
"excludes": null,
|
"excludes": null,
|
||||||
"id": "e0f5cbbaf15ba9f6bd489db9345bb5583713008d",
|
"id": "cfdf624da45558bff9a94f88d4d77966b7171f2f",
|
||||||
"output_base64sha256": "kAea/gClyQLrEU+ooG5R0S18OT/nheJiIrxvecC1LrA=",
|
"output_base64sha256": "ObwHNFJSs3OzPP8ZLVn0U0aRifejNlRtnr9SWB7rbGs=",
|
||||||
"output_base64sha512": "dC70fiIcas64eAphZGf2ggIF0VyqhSfz1yELssjFL096AnlN5RwRuL44uq5ovdxWME/I9zLTal4IpD4lDqf+FA==",
|
"output_base64sha512": "rh0XpxdZ4fLJsAfz5xVY5FRvsBwxJk8uH0BNB8QapEJr7G2OJMKgPaTvk5Kzkx0+hLRddICaZuLzno0smXr7Bw==",
|
||||||
"output_file_mode": null,
|
"output_file_mode": null,
|
||||||
"output_md5": "3ef9dc50a088c997910c16c62d690a03",
|
"output_md5": "d152694d3476fff84ff4979df86e95e9",
|
||||||
"output_path": "lambda_function_payload.zip",
|
"output_path": "lambda_function_payload.zip",
|
||||||
"output_sha": "e0f5cbbaf15ba9f6bd489db9345bb5583713008d",
|
"output_sha": "cfdf624da45558bff9a94f88d4d77966b7171f2f",
|
||||||
"output_sha256": "90079afe00a5c902eb114fa8a06e51d12d7c393fe785e26222bc6f79c0b52eb0",
|
"output_sha256": "39bc07345252b373b33cff192d59f453469189f7a336546d9ebf52581eeb6c6b",
|
||||||
"output_sha512": "742ef47e221c6aceb8780a616467f6820205d15caa8527f3d7210bb2c8c52f4f7a02794de51c11b8be38baae68bddc56304fc8f732d36a5e08a43e250ea7fe14",
|
"output_sha512": "ae1d17a71759e1f2c9b007f3e71558e4546fb01c31264f2e1f404d07c41aa4426bec6d8e24c2a03da4ef9392b3931d3e84b45d74809a66e2f39e8d2c997afb07",
|
||||||
"output_size": 1817,
|
"output_size": 1714,
|
||||||
"source": [],
|
"source": [],
|
||||||
"source_content": null,
|
"source_content": null,
|
||||||
"source_content_filename": null,
|
"source_content_filename": null,
|
||||||
@@ -161,7 +161,7 @@
|
|||||||
"x86_64"
|
"x86_64"
|
||||||
],
|
],
|
||||||
"arn": "arn:aws:lambda:us-east-1:679918342773:function:glimpse2",
|
"arn": "arn:aws:lambda:us-east-1:679918342773:function:glimpse2",
|
||||||
"code_sha256": "kAea/gClyQLrEU+ooG5R0S18OT/nheJiIrxvecC1LrA=",
|
"code_sha256": "ObwHNFJSs3OzPP8ZLVn0U0aRifejNlRtnr9SWB7rbGs=",
|
||||||
"code_signing_config_arn": "",
|
"code_signing_config_arn": "",
|
||||||
"dead_letter_config": [],
|
"dead_letter_config": [],
|
||||||
"description": "",
|
"description": "",
|
||||||
@@ -186,7 +186,7 @@
|
|||||||
"image_uri": "",
|
"image_uri": "",
|
||||||
"invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:679918342773:function:glimpse2/invocations",
|
"invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:679918342773:function:glimpse2/invocations",
|
||||||
"kms_key_arn": "",
|
"kms_key_arn": "",
|
||||||
"last_modified": "2024-05-31T04:11:26.000+0000",
|
"last_modified": "2024-06-07T17:43:33.000+0000",
|
||||||
"layers": [
|
"layers": [
|
||||||
"arn:aws:lambda:us-east-1:679918342773:layer:openai-layer:3"
|
"arn:aws:lambda:us-east-1:679918342773:layer:openai-layer:3"
|
||||||
],
|
],
|
||||||
@@ -215,8 +215,8 @@
|
|||||||
"signing_profile_version_arn": "",
|
"signing_profile_version_arn": "",
|
||||||
"skip_destroy": false,
|
"skip_destroy": false,
|
||||||
"snap_start": [],
|
"snap_start": [],
|
||||||
"source_code_hash": "kAea/gClyQLrEU+ooG5R0S18OT/nheJiIrxvecC1LrA=",
|
"source_code_hash": "ObwHNFJSs3OzPP8ZLVn0U0aRifejNlRtnr9SWB7rbGs=",
|
||||||
"source_code_size": 1817,
|
"source_code_size": 1714,
|
||||||
"tags": {},
|
"tags": {},
|
||||||
"tags_all": {},
|
"tags_all": {},
|
||||||
"timeout": 30,
|
"timeout": 30,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"version": 4,
|
"version": 4,
|
||||||
"terraform_version": "1.8.3",
|
"terraform_version": "1.8.3",
|
||||||
"serial": 34,
|
"serial": 48,
|
||||||
"lineage": "3d9e9e5b-e59a-3f03-49c9-906d67028b61",
|
"lineage": "3d9e9e5b-e59a-3f03-49c9-906d67028b61",
|
||||||
"outputs": {},
|
"outputs": {},
|
||||||
"resources": [
|
"resources": [
|
||||||
@@ -16,16 +16,16 @@
|
|||||||
"attributes": {
|
"attributes": {
|
||||||
"exclude_symlink_directories": null,
|
"exclude_symlink_directories": null,
|
||||||
"excludes": null,
|
"excludes": null,
|
||||||
"id": "cd2da52ee06f5a5ebcb7268242d15663c75bb601",
|
"id": "83bc11e065e027bfc2509b3ddcec47741d40f993",
|
||||||
"output_base64sha256": "Jks5sSegvcGcJtmhgSv+46jgP/Z5htIgnKRonNQyNHM=",
|
"output_base64sha256": "e6wS99VKL4X2uGlzhnV4a++pz9Ra8db/ZRmtWqKkNAw=",
|
||||||
"output_base64sha512": "L8ytVDZYU9x9C6kchibgDbJF81HDp5xRqJB029ISzsqHtvqGMdCI6Isy+pBnx0x0CVdstv88OyHXjVci/J2v+w==",
|
"output_base64sha512": "JWqbfYXtz7VDOdGpdJnCpU9cFR2CT3rxkcArQghTlSskTdAMM6Y2XZ63sNiu8oCdAVSEGoxK7K0umQyLXeZS3w==",
|
||||||
"output_file_mode": null,
|
"output_file_mode": null,
|
||||||
"output_md5": "7ead91cdc01b34ae233fc6c269422a51",
|
"output_md5": "98ad48dafff07cbe7f7adc6ae65cce00",
|
||||||
"output_path": "lambda_function_payload.zip",
|
"output_path": "lambda_function_payload.zip",
|
||||||
"output_sha": "cd2da52ee06f5a5ebcb7268242d15663c75bb601",
|
"output_sha": "83bc11e065e027bfc2509b3ddcec47741d40f993",
|
||||||
"output_sha256": "264b39b127a0bdc19c26d9a1812bfee3a8e03ff67986d2209ca4689cd4323473",
|
"output_sha256": "7bac12f7d54a2f85f6b869738675786befa9cfd45af1d6ff6519ad5aa2a4340c",
|
||||||
"output_sha512": "2fccad54365853dc7d0ba91c8626e00db245f351c3a79c51a89074dbd212ceca87b6fa8631d088e88b32fa9067c74c7409576cb6ff3c3b21d78d5722fc9daffb",
|
"output_sha512": "256a9b7d85edcfb54339d1a97499c2a54f5c151d824f7af191c02b420853952b244dd00c33a6365d9eb7b0d8aef2809d0154841a8c4aecad2e990c8b5de652df",
|
||||||
"output_size": 1810,
|
"output_size": 1656,
|
||||||
"source": [],
|
"source": [],
|
||||||
"source_content": null,
|
"source_content": null,
|
||||||
"source_content_filename": null,
|
"source_content_filename": null,
|
||||||
@@ -161,7 +161,7 @@
|
|||||||
"x86_64"
|
"x86_64"
|
||||||
],
|
],
|
||||||
"arn": "arn:aws:lambda:us-east-1:679918342773:function:glimpse2",
|
"arn": "arn:aws:lambda:us-east-1:679918342773:function:glimpse2",
|
||||||
"code_sha256": "Jks5sSegvcGcJtmhgSv+46jgP/Z5htIgnKRonNQyNHM=",
|
"code_sha256": "e6wS99VKL4X2uGlzhnV4a++pz9Ra8db/ZRmtWqKkNAw=",
|
||||||
"code_signing_config_arn": "",
|
"code_signing_config_arn": "",
|
||||||
"dead_letter_config": [],
|
"dead_letter_config": [],
|
||||||
"description": "",
|
"description": "",
|
||||||
@@ -186,7 +186,7 @@
|
|||||||
"image_uri": "",
|
"image_uri": "",
|
||||||
"invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:679918342773:function:glimpse2/invocations",
|
"invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:679918342773:function:glimpse2/invocations",
|
||||||
"kms_key_arn": "",
|
"kms_key_arn": "",
|
||||||
"last_modified": "2024-05-31T04:10:40.000+0000",
|
"last_modified": "2024-06-07T17:38:19.000+0000",
|
||||||
"layers": [
|
"layers": [
|
||||||
"arn:aws:lambda:us-east-1:679918342773:layer:openai-layer:3"
|
"arn:aws:lambda:us-east-1:679918342773:layer:openai-layer:3"
|
||||||
],
|
],
|
||||||
@@ -215,8 +215,8 @@
|
|||||||
"signing_profile_version_arn": "",
|
"signing_profile_version_arn": "",
|
||||||
"skip_destroy": false,
|
"skip_destroy": false,
|
||||||
"snap_start": [],
|
"snap_start": [],
|
||||||
"source_code_hash": "Jks5sSegvcGcJtmhgSv+46jgP/Z5htIgnKRonNQyNHM=",
|
"source_code_hash": "e6wS99VKL4X2uGlzhnV4a++pz9Ra8db/ZRmtWqKkNAw=",
|
||||||
"source_code_size": 1810,
|
"source_code_size": 1656,
|
||||||
"tags": {},
|
"tags": {},
|
||||||
"tags_all": {},
|
"tags_all": {},
|
||||||
"timeout": 30,
|
"timeout": 30,
|
||||||
|
|||||||
Reference in New Issue
Block a user