Begins the process of AI-powered invoices
This commit is contained in:
12
src/clj/amazonica/aws/textract.clj
Normal file
12
src/clj/amazonica/aws/textract.clj
Normal file
@@ -0,0 +1,12 @@
|
||||
(ns amazonica.aws.textract
|
||||
(:require [amazonica.core :as amz])
|
||||
(:import [com.amazonaws.services.textract AmazonTextractClient ]))
|
||||
|
||||
#_
|
||||
(import '[com.amazonaws.services.textract AmazonTextractClient ])
|
||||
#_(import '[com.amazonaws.services.textract.model S3Object ])
|
||||
#_(import '[com.amazonaws.services.textract.model StartExpenseAnalysisRequest ])
|
||||
#_(import '[com.amazonaws.services.textract.model GetExpenseAnalysisRequest ])
|
||||
|
||||
#_(import '[com.amazonaws.services.textract.model DocumentLocation])
|
||||
(amz/set-client AmazonTextractClient *ns*)
|
||||
@@ -207,5 +207,5 @@
|
||||
"EZCater XLS Import"])
|
||||
(page*))
|
||||
|
||||
"EZCater upload")))
|
||||
"Invoice Glimpse")))
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
[auto-ap.ssr.search :as search]
|
||||
[auto-ap.ssr.company-dropdown :as company-dropdown]
|
||||
[auto-ap.ssr.company.reports :as company-reports]
|
||||
[auto-ap.ssr.invoice.glimpse :as invoice-glimpse]
|
||||
[auto-ap.routes.ezcater-xls :as ezcater-xls]
|
||||
[auto-ap.ssr.company :as company]))
|
||||
|
||||
@@ -39,6 +40,9 @@
|
||||
:company-reports (wrap-client-redirect-unauthenticated (wrap-secure company-reports/page))
|
||||
:company-reports-table (wrap-client-redirect-unauthenticated (wrap-secure company-reports/table))
|
||||
:company-reports-delete (wrap-client-redirect-unauthenticated (wrap-admin company-reports/delete-report))
|
||||
:invoice-glimpse (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/page))
|
||||
:invoice-glimpse-upload (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/upload))
|
||||
:invoice-glimpse-job (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/job-progress))
|
||||
:transaction-insights (wrap-client-redirect-unauthenticated (wrap-admin insights/page))
|
||||
:transaction-insight-table (wrap-client-redirect-unauthenticated (wrap-admin insights/insight-table))
|
||||
:transaction-insight-rows (wrap-client-redirect-unauthenticated (wrap-admin insights/transaction-rows))
|
||||
|
||||
295
src/clj/auto_ap/ssr/invoice/glimpse.clj
Normal file
295
src/clj/auto_ap/ssr/invoice/glimpse.clj
Normal file
@@ -0,0 +1,295 @@
|
||||
|
||||
(ns auto-ap.ssr.invoice.glimpse
|
||||
(:require
|
||||
[amazonica.aws.s3 :as s3]
|
||||
[amazonica.aws.textract :as textract]
|
||||
[auto-ap.datomic :refer [conn pull-attr pull-id]]
|
||||
[auto-ap.datomic.clients :as d-clients]
|
||||
[auto-ap.logging :as alog]
|
||||
[auto-ap.solr :as solr]
|
||||
[auto-ap.ssr-routes :as ssr-routes]
|
||||
[auto-ap.ssr.components :as com]
|
||||
[auto-ap.ssr.ui :refer [base-page]]
|
||||
[auto-ap.ssr.utils :refer [html-response path->name]]
|
||||
[auto-ap.time :as atime]
|
||||
[bidi.bidi :as bidi]
|
||||
[cemerick.url :as url]
|
||||
[clojure.java.io :as io]
|
||||
[clojure.string :as str]
|
||||
[com.brunobonacci.mulog :as mu]
|
||||
[config.core :refer [env]]
|
||||
[datomic.api :as dc]
|
||||
[hiccup2.core :as hiccup]
|
||||
[iol-ion.tx :refer [random-tempid]])
|
||||
(:import
|
||||
(java.util UUID)))
|
||||
|
||||
(def bucket-name (:data-bucket env))
|
||||
|
||||
(defn lookup [tx]
|
||||
(->> (:expense-documents tx)
|
||||
(mapcat :summary-fields)
|
||||
(concat (->> tx :expense-documents ))
|
||||
(map (fn [sf]
|
||||
(-> sf
|
||||
(update :label-detection dissoc :geometry)
|
||||
(update :value-detection dissoc :geometry))))
|
||||
#_(group-by (fn [sf]
|
||||
[(get-in sf ["Type" "Text"])
|
||||
(get-in sf ["LabelDetection" "Text"])]
|
||||
))))
|
||||
(defn find-best [field-descriptors]
|
||||
{:raw field-descriptors
|
||||
:best
|
||||
(->> field-descriptors
|
||||
(sort-by #(* (-> % :type :confidence)
|
||||
(-> % :value-detection :confidence)))
|
||||
last
|
||||
:value-detection
|
||||
:text)})
|
||||
|
||||
(defn textract->coalesced [tx]
|
||||
(let [lookup (lookup tx)
|
||||
]
|
||||
{:total (find-best (filter (fn [node] (= "TOTAL" (:text (:type node)))) lookup))
|
||||
:account-number (find-best (filter (fn [node] (= "CUSTOMER_NUMBER" (:text (:type node)))) lookup))
|
||||
:customer-identifier (find-best (filter (fn [node] (= "RECEIVER_NAME" (:text (:type node)))) lookup))
|
||||
:vendor-name (find-best (filter (fn [node] (= "VENDOR_NAME" (:text (:type node)))) lookup))
|
||||
:date (find-best (filter (fn [node] (= "ORDER_DATE" (:text (:type node)))) lookup))
|
||||
:invoice-number (find-best (filter (fn [node] (= "INVOICE_RECEIPT_ID" (:text (:type node)))) lookup))
|
||||
}))
|
||||
|
||||
(defn clean-customer [c]
|
||||
(clojure.string/replace c #"\W+" " "))
|
||||
|
||||
(defn coalesced->invoice [i]
|
||||
(mu/with-context {:inference i}
|
||||
(let [vendor-id (->> (solr/query solr/impl "vendors" {"query" (format "name:(%s) ", (:best (:vendor-name i))) "fields" "score, *"})
|
||||
(filter (fn [d] (> (:score d) 4.0)))
|
||||
(map (comp #(Long/parseLong %) :id))
|
||||
first)
|
||||
account-number (:best (:account-number i))
|
||||
customer-identifier (:best (:customer-identifier i))
|
||||
client-id (or
|
||||
(when (not-empty account-number)
|
||||
(:db/id (d-clients/exact-match (:best (:account-number i)))))
|
||||
(when (:best (:customer-identifier i))
|
||||
(->> (solr/query solr/impl "clients" {"query" (format "name:(%s) ", (clean-customer customer-identifier)) "fields" "score, *"})
|
||||
#_(filter (fn [d] (> (:score d) 4.0)))
|
||||
(map (comp #(Long/parseLong %) :id))
|
||||
first)))
|
||||
location (when client-id
|
||||
(->> (dc/pull (dc/db conn) '[:client/locations] client-id)
|
||||
:client/locations
|
||||
first))
|
||||
invoice-number (:best (:invoice-number i))
|
||||
total (Double/parseDouble (some->> i
|
||||
:total
|
||||
:best
|
||||
(re-find #"([0-9.\-]+)")
|
||||
second) )
|
||||
date (or (atime/parse (:best (:date i)) "MM/dd/yyyy")
|
||||
(atime/parse (:best (:date i)) "MM/dd/yy"))]
|
||||
(when-not vendor-id
|
||||
(alog/warn ::cant-find-vendor
|
||||
:search-results (solr/query solr/impl "vendors" {"query" (format "name:(%s) ", (:best (:vendor-name i))) "fields" "score, *"})
|
||||
:vendor-name (:vendor-name i)))
|
||||
(when-not client-id
|
||||
(alog/warn ::cant-find-customer))
|
||||
(when (and client-id date invoice-number vendor-id total)
|
||||
{:db/id (random-tempid)
|
||||
:invoice/client client-id
|
||||
:invoice/client-identifier (or account-number customer-identifier)
|
||||
:invoice/vendor vendor-id
|
||||
:invoice/invoice-number invoice-number
|
||||
:invoice/total total
|
||||
:invoice/date date
|
||||
:invoice/location location
|
||||
:invoice/import-status :import-status/pending
|
||||
:invoice/outstanding-balance total
|
||||
:invoice/status :invoice-status/unpaid}))))
|
||||
|
||||
(defn upload-form* []
|
||||
[:div
|
||||
[:form.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer
|
||||
{:action (bidi/path-for ssr-routes/only-routes
|
||||
:invoice-glimpse-upload)
|
||||
:method "POST"
|
||||
:id "invoice"}
|
||||
"Drop an invoice here"]
|
||||
[:script
|
||||
(hiccup/raw
|
||||
"
|
||||
invoice_dropzone = new Dropzone(\"#invoice\", {
|
||||
success: function(file, response) {
|
||||
window.location.href = file.xhr.responseURL;
|
||||
},
|
||||
disablePreviews: true
|
||||
}); ")]])
|
||||
|
||||
(defn refresh-job [job-id]
|
||||
(let [{:keys [:db/id :textract-invoice/textract-status]} (dc/pull (dc/db conn) '[:db/id :textract-invoice/textract-status] [:textract-invoice/job-id job-id])]
|
||||
(when (= "IN_PROGRESS" textract-status)
|
||||
(let [result (textract/get-expense-analysis {:job-id job-id})]
|
||||
@(dc/transact conn [{:db/id id :textract-invoice/textract-status (:job-status result)}])))
|
||||
(dc/pull (dc/db conn) '[*] [:textract-invoice/job-id job-id])))
|
||||
|
||||
(defn textract->invoice-form* [job-id]
|
||||
(let [coalesced (-> (textract/get-expense-analysis {:job-id job-id})
|
||||
(textract->coalesced))
|
||||
candidate-invoice (-> coalesced
|
||||
(coalesced->invoice))]
|
||||
[:form
|
||||
[:div.grid.grid-cols-6.gap-4
|
||||
[:div.col-span-6
|
||||
(com/field {:label "Client"}
|
||||
(com/text-input {:name (path->name [:invoice/client])
|
||||
:value (pull-attr (dc/db conn) :client/name (:invoice/client candidate-invoice))
|
||||
:placeholder "Client"
|
||||
:disabled true
|
||||
:autofocus true}))]
|
||||
[:div.col-span-6
|
||||
(com/field {:label "Vendor"}
|
||||
(com/text-input {:name (path->name [:invoice/vendor])
|
||||
:value (pull-attr (dc/db conn) :vendor/name (:invoice/vendor candidate-invoice))
|
||||
:placeholder "Vendor"
|
||||
:disabled true
|
||||
:autofocus true}))]
|
||||
[:div.col-span-3
|
||||
(com/field {:label "Date"}
|
||||
(com/text-input {:name (path->name [:invoice/date])
|
||||
:value (atime/unparse-local (:invoice/date candidate-invoice)
|
||||
atime/normal-date)
|
||||
:placeholder "Date"
|
||||
:disabled true
|
||||
:autofocus true}))]
|
||||
[:div.col-span-3.col-start-1.text-xs
|
||||
"Alternates: "
|
||||
(butlast
|
||||
(interleave
|
||||
(map (fn [x] (com/link {:href "#"} (pr-str x))) (set (map (comp :text :value-detection) (:raw (:date coalesced)))))
|
||||
(repeat ", ")))]
|
||||
[:div.col-span-2.col-start-1
|
||||
(com/field {:label "Total"}
|
||||
(com/text-input {:name (path->name [:invoice/total])
|
||||
:value (:invoice/total candidate-invoice)
|
||||
:placeholder "Total"
|
||||
:disabled true
|
||||
:autofocus true}))]
|
||||
[:div.col-span-3.col-start-1.text-xs
|
||||
"Alternates: "
|
||||
(butlast
|
||||
(interleave
|
||||
(map (fn [x] (com/link {:href "#"} (pr-str x))) (set (map (comp :text :value-detection) (:raw (:total coalesced)))))
|
||||
(repeat ", ")))]
|
||||
|
||||
[:div.col-span-2.col-start-1
|
||||
(com/field {:label "Invoice Number"}
|
||||
(com/text-input {:name (path->name [:invoice/invoice-number])
|
||||
:value (:invoice/invoice-number candidate-invoice)
|
||||
:placeholder "Invoice Number"
|
||||
:disabled true
|
||||
:autofocus true}))]]]))
|
||||
|
||||
(defn job-progress* [job-id]
|
||||
(when (pull-id (dc/db conn) [:textract-invoice/job-id job-id])
|
||||
(let [textract-invoice (refresh-job job-id)]
|
||||
(cond
|
||||
(= "IN_PROGRESS" (:textract-invoice/textract-status textract-invoice))
|
||||
[:div.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer
|
||||
{:hx-get (str (bidi/path-for ssr-routes/only-routes
|
||||
:invoice-glimpse-job)
|
||||
"?" (url/map->query {:job-id job-id}))
|
||||
:hx-trigger "load delay:5s"
|
||||
:hx-swap "outerHTML"}
|
||||
"Analyzing job " (subs (:textract-invoice/job-id textract-invoice) 0 8) "..."]
|
||||
|
||||
(= "SUCCEEDED" (:textract-invoice/textract-status textract-invoice))
|
||||
[:div.px-4
|
||||
[:a.mb-2 {:href (bidi/path-for ssr-routes/only-routes
|
||||
:invoice-glimpse)}
|
||||
(com/button {:color :secondary} "New import")]
|
||||
[:div.flex.flex-row.space-x-4
|
||||
[:div {:style {:width "805"}}
|
||||
(com/card {}
|
||||
[:iframe.p-4 {:src (:textract-invoice/pdf-url textract-invoice) :width 791 :height 1024}])]
|
||||
[:div {:class "basis-1/4"}
|
||||
(com/card {}
|
||||
[:div.p-4
|
||||
(textract->invoice-form* job-id)])]]]))))
|
||||
|
||||
(defn job-progress [request]
|
||||
(html-response (job-progress* (get (:query-params request) "job-id"))))
|
||||
|
||||
(defn page* [job-id]
|
||||
[:div.mt-4
|
||||
(com/card {}
|
||||
[:div.px-4.py-3.space-y-4.flex.flex-col
|
||||
[:h1.text-2xl.mb-3.font-bold "Invoice Glimpse"]
|
||||
[:p.text-sm.italic "Import your invoices with the power of AI."]
|
||||
[:div.flex.flex-row.space-x-4 (com/pill {:color :primary} "Beta")
|
||||
[:span "Note: This upload is expirimental. Please only use PDFs with a single invoice in them."]]
|
||||
(when job-id
|
||||
(job-progress* job-id))
|
||||
|
||||
(when-not job-id
|
||||
(upload-form*))])])
|
||||
|
||||
(defn begin-textract-file [s3-location]
|
||||
(let [analysis (textract/start-expense-analysis {:document-location {:s3-object {:bucket (:data-bucket env) :name s3-location}}})
|
||||
textract-invoice {:textract-invoice/job-id (:job-id analysis)
|
||||
:textract-invoice/textract-status "IN_PROGRESS"
|
||||
:textract-invoice/pdf-url (str "http://" bucket-name ".s3-website-us-east-1.amazonaws.com/" s3-location)}]
|
||||
@(dc/transact conn [textract-invoice])
|
||||
textract-invoice))
|
||||
|
||||
(defn upload [{:keys [identity] :as request}]
|
||||
(let [file (or (get (:params request) :file)
|
||||
(get (:params request) "file"))]
|
||||
(mu/log ::uploading-file
|
||||
:file file)
|
||||
(with-open [s (io/input-stream (:tempfile file))]
|
||||
(try
|
||||
(let [s3-location (str "textract-files/" (UUID/randomUUID) "." (last (str/split (:filename file) #"[\\.]")))
|
||||
_ (with-open [stream (io/input-stream (:tempfile file))]
|
||||
(s3/put-object (:data-bucket env)
|
||||
s3-location
|
||||
stream
|
||||
{:content-type "application/pdf"
|
||||
:content-length (.length (:tempfile file))}))
|
||||
textract-invoice (begin-textract-file s3-location)]
|
||||
|
||||
{:headers {"Location"
|
||||
(str (bidi/path-for ssr-routes/only-routes
|
||||
:invoice-glimpse)
|
||||
"?" (url/map->query {:job-id (:textract-invoice/job-id textract-invoice)}))}
|
||||
:status 302})
|
||||
(catch Exception e
|
||||
(alog/error ::cant-begin-textract
|
||||
:error e)
|
||||
(html-response [:div (.getMessage e)]))))))
|
||||
|
||||
(defn page [{:keys [matched-route request-method] :as request}]
|
||||
(mu/log ::method
|
||||
:method request-method)
|
||||
(base-page
|
||||
request
|
||||
(com/page {:nav (com/admin-aside-nav)
|
||||
:active-client (:client (:session request))
|
||||
:identity (:identity request)
|
||||
:app-params {:hx-get (bidi/path-for ssr-routes/only-routes
|
||||
:invoice-glimpse)
|
||||
:hx-trigger "clientSelected from:body"
|
||||
:hx-select "#app-contents"
|
||||
:hx-swap "outerHTML swap:300ms"}}
|
||||
(com/breadcrumbs {}
|
||||
[:a {:href (bidi/path-for ssr-routes/only-routes
|
||||
:admin)}
|
||||
"Invoice"]
|
||||
[:a {:href (bidi/path-for ssr-routes/only-routes
|
||||
:invoice-glimpse)}
|
||||
"Glimpse"])
|
||||
(page* (get (:query-params request) "job-id")))
|
||||
|
||||
"Invoice Glimpse"))
|
||||
|
||||
Reference in New Issue
Block a user