Begins the process of AI-powered invoices

This commit is contained in:
Bryce
2023-08-02 22:34:56 -07:00
parent 7ed6a11ffd
commit 460e2077b9
9 changed files with 599 additions and 88 deletions

12
package-lock.json generated
View File

@@ -2827,9 +2827,9 @@
"peer": true "peer": true
}, },
"node_modules/yaml": { "node_modules/yaml": {
"version": "2.2.2", "version": "2.3.1",
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.2.2.tgz", "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.1.tgz",
"integrity": "sha512-CBKFWExMn46Foo4cldiChEzn7S7SRV+wqiluAb6xmueD/fGyRHIhX8m14vVGgeFWjN540nKCNVj6P21eQjgTuA==", "integrity": "sha512-2eHWfjaoXgTBC2jNM1LRef62VQa0umtvRiDSk6HSzW7RvS5YtkabJrwYLLEKWBc8a5U2PTSCs+dJjUTJdlHsWQ==",
"dev": true, "dev": true,
"peer": true, "peer": true,
"engines": { "engines": {
@@ -4976,9 +4976,9 @@
"peer": true "peer": true
}, },
"yaml": { "yaml": {
"version": "2.2.2", "version": "2.3.1",
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.2.2.tgz", "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.1.tgz",
"integrity": "sha512-CBKFWExMn46Foo4cldiChEzn7S7SRV+wqiluAb6xmueD/fGyRHIhX8m14vVGgeFWjN540nKCNVj6P21eQjgTuA==", "integrity": "sha512-2eHWfjaoXgTBC2jNM1LRef62VQa0umtvRiDSk6HSzW7RvS5YtkabJrwYLLEKWBc8a5U2PTSCs+dJjUTJdlHsWQ==",
"dev": true, "dev": true,
"peer": true "peer": true
} }

View File

@@ -1131,6 +1131,10 @@ input:checked + .toggle-bg {
grid-column: span 6 / span 6; grid-column: span 6 / span 6;
} }
.col-start-1 {
grid-column-start: 1;
}
.m-4 { .m-4 {
margin: 1rem; margin: 1rem;
} }
@@ -1376,12 +1380,20 @@ input:checked + .toggle-bg {
max-width: 1024px; max-width: 1024px;
} }
.flex-1 { .max-w-sm {
flex: 1 1 0%; max-width: 24rem;
} }
.flex-none { .max-w-md {
flex: none; max-width: 28rem;
}
.max-w-xl {
max-width: 36rem;
}
.flex-1 {
flex: 1 1 0%;
} }
.flex-shrink { .flex-shrink {
@@ -1392,6 +1404,14 @@ input:checked + .toggle-bg {
flex-shrink: 0; flex-shrink: 0;
} }
.basis-1\/2 {
flex-basis: 50%;
}
.basis-1\/4 {
flex-basis: 25%;
}
.-translate-x-full { .-translate-x-full {
--tw-translate-x: -100%; --tw-translate-x: -100%;
transform: translate(var(--tw-translate-x), var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y)); transform: translate(var(--tw-translate-x), var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));
@@ -1476,6 +1496,10 @@ input:checked + .toggle-bg {
grid-template-columns: repeat(7, minmax(0, 1fr)); grid-template-columns: repeat(7, minmax(0, 1fr));
} }
.flex-row {
flex-direction: row;
}
.flex-row-reverse { .flex-row-reverse {
flex-direction: row-reverse; flex-direction: row-reverse;
} }
@@ -1488,10 +1512,6 @@ input:checked + .toggle-bg {
flex-wrap: wrap; flex-wrap: wrap;
} }
.content-start {
align-content: flex-start;
}
.items-start { .items-start {
align-items: flex-start; align-items: flex-start;
} }
@@ -1524,10 +1544,6 @@ input:checked + .toggle-bg {
justify-content: space-between; justify-content: space-between;
} }
.justify-items-start {
justify-items: start;
}
.gap-1 { .gap-1 {
gap: 0.25rem; gap: 0.25rem;
} }
@@ -1603,10 +1619,6 @@ input:checked + .toggle-bg {
border-color: rgb(243 244 246 / var(--tw-divide-opacity)); border-color: rgb(243 244 246 / var(--tw-divide-opacity));
} }
.justify-self-start {
justify-self: start;
}
.overflow-auto { .overflow-auto {
overflow: auto; overflow: auto;
} }
@@ -1676,6 +1688,10 @@ input:checked + .toggle-bg {
border-width: 2px; border-width: 2px;
} }
.border-4 {
border-width: 4px;
}
.border-b { .border-b {
border-bottom-width: 1px; border-bottom-width: 1px;
} }
@@ -1688,6 +1704,15 @@ input:checked + .toggle-bg {
border-top-width: 1px; border-top-width: 1px;
} }
.border-dashed {
border-style: dashed;
}
.border-blue-300 {
--tw-border-opacity: 1;
border-color: rgb(102 196 242 / var(--tw-border-opacity));
}
.border-blue-600 { .border-blue-600 {
--tw-border-opacity: 1; --tw-border-opacity: 1;
border-color: rgb(0 125 187 / var(--tw-border-opacity)); border-color: rgb(0 125 187 / var(--tw-border-opacity));
@@ -1733,6 +1758,11 @@ input:checked + .toggle-bg {
background-color: rgb(204 235 251 / var(--tw-bg-opacity)); background-color: rgb(204 235 251 / var(--tw-bg-opacity));
} }
.bg-blue-200 {
--tw-bg-opacity: 1;
background-color: rgb(153 215 247 / var(--tw-bg-opacity));
}
.bg-blue-300 { .bg-blue-300 {
--tw-bg-opacity: 1; --tw-bg-opacity: 1;
background-color: rgb(102 196 242 / var(--tw-bg-opacity)); background-color: rgb(102 196 242 / var(--tw-bg-opacity));
@@ -1783,6 +1813,11 @@ input:checked + .toggle-bg {
background-color: rgb(228 240 213 / var(--tw-bg-opacity)); background-color: rgb(228 240 213 / var(--tw-bg-opacity));
} }
.bg-green-200 {
--tw-bg-opacity: 1;
background-color: rgb(201 225 171 / var(--tw-bg-opacity));
}
.bg-green-500 { .bg-green-500 {
--tw-bg-opacity: 1; --tw-bg-opacity: 1;
background-color: rgb(121 181 46 / var(--tw-bg-opacity)); background-color: rgb(121 181 46 / var(--tw-bg-opacity));
@@ -1798,6 +1833,11 @@ input:checked + .toggle-bg {
background-color: rgb(255 205 205 / var(--tw-bg-opacity)); background-color: rgb(255 205 205 / var(--tw-bg-opacity));
} }
.bg-red-200 {
--tw-bg-opacity: 1;
background-color: rgb(255 154 154 / var(--tw-bg-opacity));
}
.bg-red-50 { .bg-red-50 {
--tw-bg-opacity: 1; --tw-bg-opacity: 1;
background-color: rgb(255 230 230 / var(--tw-bg-opacity)); background-color: rgb(255 230 230 / var(--tw-bg-opacity));
@@ -1817,21 +1857,6 @@ input:checked + .toggle-bg {
background-color: rgb(253 246 178 / var(--tw-bg-opacity)); background-color: rgb(253 246 178 / var(--tw-bg-opacity));
} }
.bg-green-200 {
--tw-bg-opacity: 1;
background-color: rgb(201 225 171 / var(--tw-bg-opacity));
}
.bg-blue-200 {
--tw-bg-opacity: 1;
background-color: rgb(153 215 247 / var(--tw-bg-opacity));
}
.bg-red-200 {
--tw-bg-opacity: 1;
background-color: rgb(255 154 154 / var(--tw-bg-opacity));
}
.bg-opacity-50 { .bg-opacity-50 {
--tw-bg-opacity: 0.5; --tw-bg-opacity: 0.5;
} }
@@ -2336,6 +2361,11 @@ input:checked + .toggle-bg {
border-color: rgb(209 213 219 / var(--tw-border-opacity)); border-color: rgb(209 213 219 / var(--tw-border-opacity));
} }
.hover\:bg-blue-300:hover {
--tw-bg-opacity: 1;
background-color: rgb(102 196 242 / var(--tw-bg-opacity));
}
.hover\:bg-blue-600:hover { .hover\:bg-blue-600:hover {
--tw-bg-opacity: 1; --tw-bg-opacity: 1;
background-color: rgb(0 125 187 / var(--tw-bg-opacity)); background-color: rgb(0 125 187 / var(--tw-bg-opacity));
@@ -2361,6 +2391,11 @@ input:checked + .toggle-bg {
background-color: rgb(228 240 213 / var(--tw-bg-opacity)); background-color: rgb(228 240 213 / var(--tw-bg-opacity));
} }
.hover\:bg-green-300:hover {
--tw-bg-opacity: 1;
background-color: rgb(175 211 130 / var(--tw-bg-opacity));
}
.hover\:bg-green-600:hover { .hover\:bg-green-600:hover {
--tw-bg-opacity: 1; --tw-bg-opacity: 1;
background-color: rgb(97 145 37 / var(--tw-bg-opacity)); background-color: rgb(97 145 37 / var(--tw-bg-opacity));
@@ -2376,26 +2411,16 @@ input:checked + .toggle-bg {
background-color: rgb(228 240 213 / var(--tw-bg-opacity)); background-color: rgb(228 240 213 / var(--tw-bg-opacity));
} }
.hover\:bg-white:hover {
--tw-bg-opacity: 1;
background-color: rgb(255 255 255 / var(--tw-bg-opacity));
}
.hover\:bg-green-300:hover {
--tw-bg-opacity: 1;
background-color: rgb(175 211 130 / var(--tw-bg-opacity));
}
.hover\:bg-blue-300:hover {
--tw-bg-opacity: 1;
background-color: rgb(102 196 242 / var(--tw-bg-opacity));
}
.hover\:bg-red-300:hover { .hover\:bg-red-300:hover {
--tw-bg-opacity: 1; --tw-bg-opacity: 1;
background-color: rgb(255 104 104 / var(--tw-bg-opacity)); background-color: rgb(255 104 104 / var(--tw-bg-opacity));
} }
.hover\:bg-white:hover {
--tw-bg-opacity: 1;
background-color: rgb(255 255 255 / var(--tw-bg-opacity));
}
.hover\:text-blue-600:hover { .hover\:text-blue-600:hover {
--tw-text-opacity: 1; --tw-text-opacity: 1;
color: rgb(0 125 187 / var(--tw-text-opacity)); color: rgb(0 125 187 / var(--tw-text-opacity));
@@ -2457,6 +2482,11 @@ input:checked + .toggle-bg {
box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000); box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
} }
.focus\:ring-blue-200:focus {
--tw-ring-opacity: 1;
--tw-ring-color: rgb(153 215 247 / var(--tw-ring-opacity));
}
.focus\:ring-blue-300:focus { .focus\:ring-blue-300:focus {
--tw-ring-opacity: 1; --tw-ring-opacity: 1;
--tw-ring-color: rgb(102 196 242 / var(--tw-ring-opacity)); --tw-ring-color: rgb(102 196 242 / var(--tw-ring-opacity));
@@ -2477,6 +2507,11 @@ input:checked + .toggle-bg {
--tw-ring-color: rgb(209 213 219 / var(--tw-ring-opacity)); --tw-ring-color: rgb(209 213 219 / var(--tw-ring-opacity));
} }
.focus\:ring-green-200:focus {
--tw-ring-opacity: 1;
--tw-ring-color: rgb(201 225 171 / var(--tw-ring-opacity));
}
.focus\:ring-green-300:focus { .focus\:ring-green-300:focus {
--tw-ring-opacity: 1; --tw-ring-opacity: 1;
--tw-ring-color: rgb(175 211 130 / var(--tw-ring-opacity)); --tw-ring-color: rgb(175 211 130 / var(--tw-ring-opacity));
@@ -2487,16 +2522,6 @@ input:checked + .toggle-bg {
--tw-ring-color: rgb(121 181 46 / var(--tw-ring-opacity)); --tw-ring-color: rgb(121 181 46 / var(--tw-ring-opacity));
} }
.focus\:ring-green-200:focus {
--tw-ring-opacity: 1;
--tw-ring-color: rgb(201 225 171 / var(--tw-ring-opacity));
}
.focus\:ring-blue-200:focus {
--tw-ring-opacity: 1;
--tw-ring-color: rgb(153 215 247 / var(--tw-ring-opacity));
}
.focus\:ring-red-200:focus { .focus\:ring-red-200:focus {
--tw-ring-opacity: 1; --tw-ring-opacity: 1;
--tw-ring-color: rgb(255 154 154 / var(--tw-ring-opacity)); --tw-ring-color: rgb(255 154 154 / var(--tw-ring-opacity));
@@ -2565,6 +2590,11 @@ input:checked + .toggle-bg {
background-color: rgb(0 125 187 / var(--tw-bg-opacity)); background-color: rgb(0 125 187 / var(--tw-bg-opacity));
} }
:is(.dark .dark\:bg-blue-700) {
--tw-bg-opacity: 1;
background-color: rgb(0 94 140 / var(--tw-bg-opacity));
}
:is(.dark .dark\:bg-blue-900) { :is(.dark .dark\:bg-blue-900) {
--tw-bg-opacity: 1; --tw-bg-opacity: 1;
background-color: rgb(0 31 47 / var(--tw-bg-opacity)); background-color: rgb(0 31 47 / var(--tw-bg-opacity));
@@ -2599,11 +2629,21 @@ input:checked + .toggle-bg {
background-color: rgb(97 145 37 / var(--tw-bg-opacity)); background-color: rgb(97 145 37 / var(--tw-bg-opacity));
} }
:is(.dark .dark\:bg-green-700) {
--tw-bg-opacity: 1;
background-color: rgb(73 109 28 / var(--tw-bg-opacity));
}
:is(.dark .dark\:bg-green-900) { :is(.dark .dark\:bg-green-900) {
--tw-bg-opacity: 1; --tw-bg-opacity: 1;
background-color: rgb(24 36 9 / var(--tw-bg-opacity)); background-color: rgb(24 36 9 / var(--tw-bg-opacity));
} }
:is(.dark .dark\:bg-red-700) {
--tw-bg-opacity: 1;
background-color: rgb(153 2 2 / var(--tw-bg-opacity));
}
:is(.dark .dark\:bg-red-900) { :is(.dark .dark\:bg-red-900) {
--tw-bg-opacity: 1; --tw-bg-opacity: 1;
background-color: rgb(51 1 1 / var(--tw-bg-opacity)); background-color: rgb(51 1 1 / var(--tw-bg-opacity));
@@ -2614,26 +2654,6 @@ input:checked + .toggle-bg {
background-color: rgb(99 49 18 / var(--tw-bg-opacity)); background-color: rgb(99 49 18 / var(--tw-bg-opacity));
} }
:is(.dark .dark\:bg-green-500) {
--tw-bg-opacity: 1;
background-color: rgb(121 181 46 / var(--tw-bg-opacity));
}
:is(.dark .dark\:bg-green-700) {
--tw-bg-opacity: 1;
background-color: rgb(73 109 28 / var(--tw-bg-opacity));
}
:is(.dark .dark\:bg-blue-700) {
--tw-bg-opacity: 1;
background-color: rgb(0 94 140 / var(--tw-bg-opacity));
}
:is(.dark .dark\:bg-red-700) {
--tw-bg-opacity: 1;
background-color: rgb(153 2 2 / var(--tw-bg-opacity));
}
:is(.dark .dark\:bg-opacity-80) { :is(.dark .dark\:bg-opacity-80) {
--tw-bg-opacity: 0.8; --tw-bg-opacity: 0.8;
} }
@@ -2717,6 +2737,11 @@ input:checked + .toggle-bg {
--tw-ring-offset-color: #1F2937; --tw-ring-offset-color: #1F2937;
} }
:is(.dark .dark\:hover\:bg-blue-600:hover) {
--tw-bg-opacity: 1;
background-color: rgb(0 125 187 / var(--tw-bg-opacity));
}
:is(.dark .dark\:hover\:bg-blue-700:hover) { :is(.dark .dark\:hover\:bg-blue-700:hover) {
--tw-bg-opacity: 1; --tw-bg-opacity: 1;
background-color: rgb(0 94 140 / var(--tw-bg-opacity)); background-color: rgb(0 94 140 / var(--tw-bg-opacity));
@@ -2747,11 +2772,6 @@ input:checked + .toggle-bg {
background-color: rgb(73 109 28 / var(--tw-bg-opacity)); background-color: rgb(73 109 28 / var(--tw-bg-opacity));
} }
:is(.dark .dark\:hover\:bg-blue-600:hover) {
--tw-bg-opacity: 1;
background-color: rgb(0 125 187 / var(--tw-bg-opacity));
}
:is(.dark .dark\:hover\:bg-red-600:hover) { :is(.dark .dark\:hover\:bg-red-600:hover) {
--tw-bg-opacity: 1; --tw-bg-opacity: 1;
background-color: rgb(204 2 2 / var(--tw-bg-opacity)); background-color: rgb(204 2 2 / var(--tw-bg-opacity));

View File

@@ -2112,4 +2112,19 @@
:db/cardinality :db.cardinality/one :db/cardinality :db.cardinality/one
:db/unique :db.unique/identity} :db/unique :db.unique/identity}
{:db/ident :textract-invoice/job-id
:db/doc "The Textract job id used to parse the pdf"
:db/valueType :db.type/string
:db/cardinality :db.cardinality/one
:db/unique :db.unique/identity}
{:db/ident :textract-invoice/textract-status
:db/doc "The raw textract status, e.g., SUCCEEDED, IN_PROGRESS"
:db/valueType :db.type/string
:db/cardinality :db.cardinality/one}
{:db/ident :textract-invoice/pdf-url
:db/doc "A url to the pdf on s3"
:db/valueType :db.type/string
:db/cardinality :db.cardinality/one}
] ]

View File

@@ -0,0 +1,162 @@
(ns amazonica.aws.textract
(:require
[auto-ap.solr :as solr]
[unilog.context :as lc]))
(require '[amazonica.core :as amz])
(import '[com.amazonaws.services.textract AmazonTextractClient ])
(import '[com.amazonaws.services.textract AmazonTextractClient ])
(import '[com.amazonaws.services.textract.model S3Object ])
(import '[com.amazonaws.services.textract.model StartExpenseAnalysisRequest ])
(import '[com.amazonaws.services.textract.model GetExpenseAnalysisRequest ])
(import '[com.amazonaws.services.textract.model DocumentLocation])
(amz/set-client AmazonTextractClient *ns*)
(in-ns 'user)
(require '[clojure.java.io :as io])
(require '[cheshire.core :as cheshire])
(require '[amazonica.aws.s3 :as s3])
(require '[auto-ap.graphql.utils :refer [cleanse-query]])
(require '[iol-ion.tx :as itx])
(require '[config.core :refer [env]])
(require '[amazonica.aws.textract :as txtract])
(import '[com.amazonaws.services.textract AmazonTextractClient ])
(import '[com.amazonaws.services.textract.model S3Object ])
(import '[com.amazonaws.services.textract.model StartExpenseAnalysisRequest ])
(import '[com.amazonaws.services.textract.model GetExpenseAnalysisRequest ])
(import '[com.amazonaws.services.textract.model DocumentLocation])
(import '[java.util UUID])
(defn textract-file [s3-location]
(let [job-id (:job-id (txtract/start-expense-analysis {:document-location {:s3-object {:bucket (:data-bucket env) :name s3-location}}}))
result (loop [result (txtract/get-expense-analysis {:job-id job-id})]
(println "checking..." (:job-status result))
(Thread/sleep 2000)
(if (= "IN_PROGRESS" (:job-status result))
(recur (txtract/get-expense-analysis {:job-id job-id}))
result))] result))
(defn lookup [tx]
(->> (:expense-documents tx)
(mapcat :summary-fields)
(concat (->> tx :expense-documents ))
(map (fn [sf]
(-> sf
(update :label-detection dissoc :geometry)
(update :value-detection dissoc :geometry))))
#_(group-by (fn [sf]
[(get-in sf ["Type" "Text"])
(get-in sf ["LabelDetection" "Text"])]
))))
(defn find-best [field-descriptors]
{:raw field-descriptors
:best
(->> field-descriptors
(sort-by #(* (-> % :type :confidence)
(-> % :value-detection :confidence)))
last
:value-detection
:text)})
(require '[auto-ap.solr :as solr])
(require '[auto-ap.logging :as alog])
(require '[com.brunobonacci.mulog :as mu])
(require '[auto-ap.datomic.clients :as d-clients])
(require '[auto-ap.time :as atime])
(defn textract->coalesced [tx]
(let [lookup (lookup tx)
]
{:total (find-best (filter (fn [node] (= "TOTAL" (:text (:type node)))) lookup))
:account-number (find-best (filter (fn [node] (= "CUSTOMER_NUMBER" (:text (:type node)))) lookup))
:customer-identifier (find-best (filter (fn [node] (= "RECEIVER_NAME" (:text (:type node)))) lookup))
:vendor-name (find-best (filter (fn [node] (= "VENDOR_NAME" (:text (:type node)))) lookup))
:date (find-best (filter (fn [node] (= "ORDER_DATE" (:text (:type node)))) lookup))
:invoice-number (find-best (filter (fn [node] (= "INVOICE_RECEIPT_ID" (:text (:type node)))) lookup))
}))
(defn clean-customer [c]
(clojure.string/replace c #"\W+" " "))
(require '[datomic.api :as dc])
(require '[auto-ap.datomic :refer [conn]])
(defn coalesced->invoice [i]
(mu/with-context {:inference i}
(let [vendor-id (->> (solr/query solr/impl "vendors" {"query" (format "name:(%s) ", (:best (:vendor-name i))) "fields" "score, *"})
(filter (fn [d] (> (:score d) 4.0)))
(map (comp #(Long/parseLong %) :id))
first)
account-number (:best (:account-number i))
customer-identifier (:best (:customer-identifier i))
client-id (or
(when (not-empty account-number)
(:db/id (d-clients/exact-match (:best (:account-number i)))))
(when (:best (:customer-identifier i))
(->> (solr/query solr/impl "clients" {"query" (format "name:(%s) ", (clean-customer customer-identifier)) "fields" "score, *"})
#_(filter (fn [d] (> (:score d) 4.0)))
(map (comp #(Long/parseLong %) :id))
first)))
location (when client-id
(->> (dc/pull (dc/db conn) '[:client/locations] client-id)
:client/locations
first))
invoice-number (:best (:invoice-number i))
total (Double/parseDouble (some->> i
:total
:best
(re-find #"([0-9.\-]+)")
second) )
date (or (atime/parse (:best (:date i)) "MM/dd/yyyy")
(atime/parse (:best (:date i)) "MM/dd/yy"))]
(when-not vendor-id
(alog/warn ::cant-find-vendor
:search-results (solr/query solr/impl "vendors" {"query" (format "name:(%s) ", (:best (:vendor-name i))) "fields" "score, *"})
:vendor-name (:vendor-name i)))
(when-not client-id
(alog/warn ::cant-find-customer))
(when (and client-id date invoice-number vendor-id total)
{:db/id (itx/random-tempid)
:invoice/client client-id
:invoice/client-identifier (or account-number customer-identifier)
:invoice/vendor vendor-id
:invoice/invoice-number invoice-number
:invoice/total total
:invoice/date date
:invoice/location location
:invoice/import-status :import-status/pending
:invoice/outstanding-balance total
:invoice/status :invoice-status/unpaid}))))
(defn file->textract->invoice [f]
(let [s3-location (str "textract-files/" (UUID/randomUUID) "." (last (str/split f #"[\\.]")))
file (io/file f)]
(with-open [stream (io/input-stream f)]
(s3/put-object (:data-bucket env)
s3-location
stream
{:content-type "application/pdf"
:content-length (.length file)}))
(-> (textract-file s3-location)
(textract->coalesced)
(coalesced->invoice))))
#_(def result (with-open [x (io/reader "batch.json")]
(json/parse-stream x)))

View File

@@ -0,0 +1,12 @@
(ns amazonica.aws.textract
(:require [amazonica.core :as amz])
(:import [com.amazonaws.services.textract AmazonTextractClient ]))
#_
(import '[com.amazonaws.services.textract AmazonTextractClient ])
#_(import '[com.amazonaws.services.textract.model S3Object ])
#_(import '[com.amazonaws.services.textract.model StartExpenseAnalysisRequest ])
#_(import '[com.amazonaws.services.textract.model GetExpenseAnalysisRequest ])
#_(import '[com.amazonaws.services.textract.model DocumentLocation])
(amz/set-client AmazonTextractClient *ns*)

View File

@@ -207,5 +207,5 @@
"EZCater XLS Import"]) "EZCater XLS Import"])
(page*)) (page*))
"EZCater upload"))) "Invoice Glimpse")))

View File

@@ -11,6 +11,7 @@
[auto-ap.ssr.search :as search] [auto-ap.ssr.search :as search]
[auto-ap.ssr.company-dropdown :as company-dropdown] [auto-ap.ssr.company-dropdown :as company-dropdown]
[auto-ap.ssr.company.reports :as company-reports] [auto-ap.ssr.company.reports :as company-reports]
[auto-ap.ssr.invoice.glimpse :as invoice-glimpse]
[auto-ap.routes.ezcater-xls :as ezcater-xls] [auto-ap.routes.ezcater-xls :as ezcater-xls]
[auto-ap.ssr.company :as company])) [auto-ap.ssr.company :as company]))
@@ -39,6 +40,9 @@
:company-reports (wrap-client-redirect-unauthenticated (wrap-secure company-reports/page)) :company-reports (wrap-client-redirect-unauthenticated (wrap-secure company-reports/page))
:company-reports-table (wrap-client-redirect-unauthenticated (wrap-secure company-reports/table)) :company-reports-table (wrap-client-redirect-unauthenticated (wrap-secure company-reports/table))
:company-reports-delete (wrap-client-redirect-unauthenticated (wrap-admin company-reports/delete-report)) :company-reports-delete (wrap-client-redirect-unauthenticated (wrap-admin company-reports/delete-report))
:invoice-glimpse (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/page))
:invoice-glimpse-upload (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/upload))
:invoice-glimpse-job (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/job-progress))
:transaction-insights (wrap-client-redirect-unauthenticated (wrap-admin insights/page)) :transaction-insights (wrap-client-redirect-unauthenticated (wrap-admin insights/page))
:transaction-insight-table (wrap-client-redirect-unauthenticated (wrap-admin insights/insight-table)) :transaction-insight-table (wrap-client-redirect-unauthenticated (wrap-admin insights/insight-table))
:transaction-insight-rows (wrap-client-redirect-unauthenticated (wrap-admin insights/transaction-rows)) :transaction-insight-rows (wrap-client-redirect-unauthenticated (wrap-admin insights/transaction-rows))

View File

@@ -0,0 +1,295 @@
(ns auto-ap.ssr.invoice.glimpse
(:require
[amazonica.aws.s3 :as s3]
[amazonica.aws.textract :as textract]
[auto-ap.datomic :refer [conn pull-attr pull-id]]
[auto-ap.datomic.clients :as d-clients]
[auto-ap.logging :as alog]
[auto-ap.solr :as solr]
[auto-ap.ssr-routes :as ssr-routes]
[auto-ap.ssr.components :as com]
[auto-ap.ssr.ui :refer [base-page]]
[auto-ap.ssr.utils :refer [html-response path->name]]
[auto-ap.time :as atime]
[bidi.bidi :as bidi]
[cemerick.url :as url]
[clojure.java.io :as io]
[clojure.string :as str]
[com.brunobonacci.mulog :as mu]
[config.core :refer [env]]
[datomic.api :as dc]
[hiccup2.core :as hiccup]
[iol-ion.tx :refer [random-tempid]])
(:import
(java.util UUID)))
(def bucket-name (:data-bucket env))
(defn lookup [tx]
(->> (:expense-documents tx)
(mapcat :summary-fields)
(concat (->> tx :expense-documents ))
(map (fn [sf]
(-> sf
(update :label-detection dissoc :geometry)
(update :value-detection dissoc :geometry))))
#_(group-by (fn [sf]
[(get-in sf ["Type" "Text"])
(get-in sf ["LabelDetection" "Text"])]
))))
(defn find-best [field-descriptors]
{:raw field-descriptors
:best
(->> field-descriptors
(sort-by #(* (-> % :type :confidence)
(-> % :value-detection :confidence)))
last
:value-detection
:text)})
(defn textract->coalesced [tx]
(let [lookup (lookup tx)
]
{:total (find-best (filter (fn [node] (= "TOTAL" (:text (:type node)))) lookup))
:account-number (find-best (filter (fn [node] (= "CUSTOMER_NUMBER" (:text (:type node)))) lookup))
:customer-identifier (find-best (filter (fn [node] (= "RECEIVER_NAME" (:text (:type node)))) lookup))
:vendor-name (find-best (filter (fn [node] (= "VENDOR_NAME" (:text (:type node)))) lookup))
:date (find-best (filter (fn [node] (= "ORDER_DATE" (:text (:type node)))) lookup))
:invoice-number (find-best (filter (fn [node] (= "INVOICE_RECEIPT_ID" (:text (:type node)))) lookup))
}))
(defn clean-customer [c]
(clojure.string/replace c #"\W+" " "))
(defn coalesced->invoice [i]
(mu/with-context {:inference i}
(let [vendor-id (->> (solr/query solr/impl "vendors" {"query" (format "name:(%s) ", (:best (:vendor-name i))) "fields" "score, *"})
(filter (fn [d] (> (:score d) 4.0)))
(map (comp #(Long/parseLong %) :id))
first)
account-number (:best (:account-number i))
customer-identifier (:best (:customer-identifier i))
client-id (or
(when (not-empty account-number)
(:db/id (d-clients/exact-match (:best (:account-number i)))))
(when (:best (:customer-identifier i))
(->> (solr/query solr/impl "clients" {"query" (format "name:(%s) ", (clean-customer customer-identifier)) "fields" "score, *"})
#_(filter (fn [d] (> (:score d) 4.0)))
(map (comp #(Long/parseLong %) :id))
first)))
location (when client-id
(->> (dc/pull (dc/db conn) '[:client/locations] client-id)
:client/locations
first))
invoice-number (:best (:invoice-number i))
total (Double/parseDouble (some->> i
:total
:best
(re-find #"([0-9.\-]+)")
second) )
date (or (atime/parse (:best (:date i)) "MM/dd/yyyy")
(atime/parse (:best (:date i)) "MM/dd/yy"))]
(when-not vendor-id
(alog/warn ::cant-find-vendor
:search-results (solr/query solr/impl "vendors" {"query" (format "name:(%s) ", (:best (:vendor-name i))) "fields" "score, *"})
:vendor-name (:vendor-name i)))
(when-not client-id
(alog/warn ::cant-find-customer))
(when (and client-id date invoice-number vendor-id total)
{:db/id (random-tempid)
:invoice/client client-id
:invoice/client-identifier (or account-number customer-identifier)
:invoice/vendor vendor-id
:invoice/invoice-number invoice-number
:invoice/total total
:invoice/date date
:invoice/location location
:invoice/import-status :import-status/pending
:invoice/outstanding-balance total
:invoice/status :invoice-status/unpaid}))))
(defn upload-form* []
[:div
[:form.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer
{:action (bidi/path-for ssr-routes/only-routes
:invoice-glimpse-upload)
:method "POST"
:id "invoice"}
"Drop an invoice here"]
[:script
(hiccup/raw
"
invoice_dropzone = new Dropzone(\"#invoice\", {
success: function(file, response) {
window.location.href = file.xhr.responseURL;
},
disablePreviews: true
}); ")]])
(defn refresh-job [job-id]
(let [{:keys [:db/id :textract-invoice/textract-status]} (dc/pull (dc/db conn) '[:db/id :textract-invoice/textract-status] [:textract-invoice/job-id job-id])]
(when (= "IN_PROGRESS" textract-status)
(let [result (textract/get-expense-analysis {:job-id job-id})]
@(dc/transact conn [{:db/id id :textract-invoice/textract-status (:job-status result)}])))
(dc/pull (dc/db conn) '[*] [:textract-invoice/job-id job-id])))
(defn textract->invoice-form* [job-id]
(let [coalesced (-> (textract/get-expense-analysis {:job-id job-id})
(textract->coalesced))
candidate-invoice (-> coalesced
(coalesced->invoice))]
[:form
[:div.grid.grid-cols-6.gap-4
[:div.col-span-6
(com/field {:label "Client"}
(com/text-input {:name (path->name [:invoice/client])
:value (pull-attr (dc/db conn) :client/name (:invoice/client candidate-invoice))
:placeholder "Client"
:disabled true
:autofocus true}))]
[:div.col-span-6
(com/field {:label "Vendor"}
(com/text-input {:name (path->name [:invoice/vendor])
:value (pull-attr (dc/db conn) :vendor/name (:invoice/vendor candidate-invoice))
:placeholder "Vendor"
:disabled true
:autofocus true}))]
[:div.col-span-3
(com/field {:label "Date"}
(com/text-input {:name (path->name [:invoice/date])
:value (atime/unparse-local (:invoice/date candidate-invoice)
atime/normal-date)
:placeholder "Date"
:disabled true
:autofocus true}))]
[:div.col-span-3.col-start-1.text-xs
"Alternates: "
(butlast
(interleave
(map (fn [x] (com/link {:href "#"} (pr-str x))) (set (map (comp :text :value-detection) (:raw (:date coalesced)))))
(repeat ", ")))]
[:div.col-span-2.col-start-1
(com/field {:label "Total"}
(com/text-input {:name (path->name [:invoice/total])
:value (:invoice/total candidate-invoice)
:placeholder "Total"
:disabled true
:autofocus true}))]
[:div.col-span-3.col-start-1.text-xs
"Alternates: "
(butlast
(interleave
(map (fn [x] (com/link {:href "#"} (pr-str x))) (set (map (comp :text :value-detection) (:raw (:total coalesced)))))
(repeat ", ")))]
[:div.col-span-2.col-start-1
(com/field {:label "Invoice Number"}
(com/text-input {:name (path->name [:invoice/invoice-number])
:value (:invoice/invoice-number candidate-invoice)
:placeholder "Invoice Number"
:disabled true
:autofocus true}))]]]))
(defn job-progress* [job-id]
(when (pull-id (dc/db conn) [:textract-invoice/job-id job-id])
(let [textract-invoice (refresh-job job-id)]
(cond
(= "IN_PROGRESS" (:textract-invoice/textract-status textract-invoice))
[:div.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer
{:hx-get (str (bidi/path-for ssr-routes/only-routes
:invoice-glimpse-job)
"?" (url/map->query {:job-id job-id}))
:hx-trigger "load delay:5s"
:hx-swap "outerHTML"}
"Analyzing job " (subs (:textract-invoice/job-id textract-invoice) 0 8) "..."]
(= "SUCCEEDED" (:textract-invoice/textract-status textract-invoice))
[:div.px-4
[:a.mb-2 {:href (bidi/path-for ssr-routes/only-routes
:invoice-glimpse)}
(com/button {:color :secondary} "New import")]
[:div.flex.flex-row.space-x-4
[:div {:style {:width "805"}}
(com/card {}
[:iframe.p-4 {:src (:textract-invoice/pdf-url textract-invoice) :width 791 :height 1024}])]
[:div {:class "basis-1/4"}
(com/card {}
[:div.p-4
(textract->invoice-form* job-id)])]]]))))
(defn job-progress [request]
(html-response (job-progress* (get (:query-params request) "job-id"))))
(defn page* [job-id]
[:div.mt-4
(com/card {}
[:div.px-4.py-3.space-y-4.flex.flex-col
[:h1.text-2xl.mb-3.font-bold "Invoice Glimpse"]
[:p.text-sm.italic "Import your invoices with the power of AI."]
[:div.flex.flex-row.space-x-4 (com/pill {:color :primary} "Beta")
[:span "Note: This upload is expirimental. Please only use PDFs with a single invoice in them."]]
(when job-id
(job-progress* job-id))
(when-not job-id
(upload-form*))])])
(defn begin-textract-file [s3-location]
(let [analysis (textract/start-expense-analysis {:document-location {:s3-object {:bucket (:data-bucket env) :name s3-location}}})
textract-invoice {:textract-invoice/job-id (:job-id analysis)
:textract-invoice/textract-status "IN_PROGRESS"
:textract-invoice/pdf-url (str "http://" bucket-name ".s3-website-us-east-1.amazonaws.com/" s3-location)}]
@(dc/transact conn [textract-invoice])
textract-invoice))
(defn upload [{:keys [identity] :as request}]
(let [file (or (get (:params request) :file)
(get (:params request) "file"))]
(mu/log ::uploading-file
:file file)
(with-open [s (io/input-stream (:tempfile file))]
(try
(let [s3-location (str "textract-files/" (UUID/randomUUID) "." (last (str/split (:filename file) #"[\\.]")))
_ (with-open [stream (io/input-stream (:tempfile file))]
(s3/put-object (:data-bucket env)
s3-location
stream
{:content-type "application/pdf"
:content-length (.length (:tempfile file))}))
textract-invoice (begin-textract-file s3-location)]
{:headers {"Location"
(str (bidi/path-for ssr-routes/only-routes
:invoice-glimpse)
"?" (url/map->query {:job-id (:textract-invoice/job-id textract-invoice)}))}
:status 302})
(catch Exception e
(alog/error ::cant-begin-textract
:error e)
(html-response [:div (.getMessage e)]))))))
(defn page [{:keys [matched-route request-method] :as request}]
(mu/log ::method
:method request-method)
(base-page
request
(com/page {:nav (com/admin-aside-nav)
:active-client (:client (:session request))
:identity (:identity request)
:app-params {:hx-get (bidi/path-for ssr-routes/only-routes
:invoice-glimpse)
:hx-trigger "clientSelected from:body"
:hx-select "#app-contents"
:hx-swap "outerHTML swap:300ms"}}
(com/breadcrumbs {}
[:a {:href (bidi/path-for ssr-routes/only-routes
:admin)}
"Invoice"]
[:a {:href (bidi/path-for ssr-routes/only-routes
:invoice-glimpse)}
"Glimpse"])
(page* (get (:query-params request) "job-id")))
"Invoice Glimpse"))

View File

@@ -2,6 +2,9 @@
(def routes {"logout" :logout (def routes {"logout" :logout
"search" :search "search" :search
"invoice" {"/glimpse" {"" {:get :invoice-glimpse
:post :invoice-glimpse-upload}
"/job" {:get :invoice-glimpse-job}}}
"admin" {"/history" {"" :admin-history "admin" {"/history" {"" :admin-history
"/" :admin-history "/" :admin-history
#"/search/?" :admin-history-search #"/search/?" :admin-history-search