452 lines
24 KiB
Clojure
452 lines
24 KiB
Clojure
|
|
(ns auto-ap.ssr.invoice.glimpse
|
|
(:require
|
|
[amazonica.aws.s3 :as s3]
|
|
[amazonica.aws.textract :as textract]
|
|
[auto-ap.datomic :refer [conn pull-attr pull-id]]
|
|
[auto-ap.datomic.clients :as d-clients]
|
|
[auto-ap.logging :as alog]
|
|
[auto-ap.solr :as solr]
|
|
[auto-ap.ssr-routes :as ssr-routes]
|
|
[auto-ap.ssr.components :as com]
|
|
[auto-ap.ssr.ui :refer [base-page]]
|
|
[auto-ap.ssr.utils :refer [html-response path->name]]
|
|
[auto-ap.time :as atime]
|
|
[bidi.bidi :as bidi]
|
|
[cemerick.url :as url]
|
|
[clj-time.coerce :as coerce]
|
|
[cheshire.core :as cheshire]
|
|
[clojure.java.io :as io]
|
|
[clojure.string :as str]
|
|
[com.brunobonacci.mulog :as mu]
|
|
[config.core :refer [env]]
|
|
[datomic.api :as dc]
|
|
[hiccup2.core :as hiccup]
|
|
[iol-ion.tx :refer [random-tempid]]
|
|
[auto-ap.client-routes :as client-routes]
|
|
[auto-ap.datomic.vendors :as d-vendors]
|
|
[clj-time.core :as time])
|
|
(:import
|
|
(java.util UUID)))
|
|
|
|
(def bucket-name (:data-bucket env))
|
|
|
|
(defn lookup [tx]
|
|
(->> (:expense-documents tx)
|
|
(mapcat :summary-fields)
|
|
(concat (->> tx :expense-documents ))
|
|
(map (fn [sf]
|
|
(-> sf
|
|
(update :label-detection dissoc :geometry)
|
|
(update :value-detection dissoc :geometry))))))
|
|
|
|
(defn stack-rank [valid-values field-descriptors]
|
|
(->> field-descriptors
|
|
(filter (comp valid-values :text :type))
|
|
(sort-by #(* (-> % :type :confidence)
|
|
(-> % :value-detection :confidence)))
|
|
(reverse)
|
|
(map (comp :text :value-detection))
|
|
(filter #(not (str/blank? %)))))
|
|
|
|
(defn clean-customer [c]
|
|
(clojure.string/replace c #"\W+" " "))
|
|
|
|
(defn deduplicate [xs]
|
|
(first
|
|
(reduce
|
|
(fn [[so-far seen-parsed?] [raw parsed]]
|
|
(if (seen-parsed? parsed)
|
|
[so-far seen-parsed?]
|
|
[(conj so-far [raw parsed])
|
|
(conj seen-parsed? parsed)]))
|
|
[[] #{}]
|
|
xs)))
|
|
|
|
(defn textract->textract-invoice [id tx]
|
|
(let [lookup (lookup tx)
|
|
total-options (->> (stack-rank #{"AMOUNT_DUE"} lookup)
|
|
(map (fn [t]
|
|
[t (some->> t
|
|
(re-find #"([0-9.\-]+)")
|
|
second
|
|
Double/parseDouble)]))
|
|
(concat (->> (stack-rank #{"TOTAL"} lookup)
|
|
(map (fn [t]
|
|
[t (some->> t
|
|
(re-find #"([0-9.\-]+)")
|
|
second
|
|
Double/parseDouble)]))))
|
|
(deduplicate))
|
|
customer-identifier-options (->> (stack-rank #{"CUSTOMER_NUMBER"} lookup)
|
|
(map (fn [t]
|
|
[t (:db/id (d-clients/exact-match t))]))
|
|
(filter second)
|
|
(concat (->> (stack-rank #{"RECEIVER_NAME"} lookup)
|
|
(map (fn [t]
|
|
[t (->> (solr/query solr/impl "clients" {"query" (format "name:(%s) ", (clean-customer t)) "fields" "score, *"})
|
|
#_(filter (fn [d] (> (:score d) 4.0)))
|
|
(map (comp #(Long/parseLong %) :id))
|
|
first)]))))
|
|
deduplicate)
|
|
vendor-name-options (->> (stack-rank #{"VENDOR_NAME"} lookup)
|
|
(map (fn [t]
|
|
[t (->> (solr/query solr/impl "vendors" {"query" (format "name:(%s) ", t) "fields" "score, *"})
|
|
(filter (fn [d] (> (:score d) 4.0)))
|
|
(map (comp #(Long/parseLong %) :id))
|
|
first)]))
|
|
(deduplicate))
|
|
date-options (->> (stack-rank #{"INVOICE_RECEIPT_DATE" "ORDER_DATE" "DELIVERY_DATE"} lookup)
|
|
(map (fn [t]
|
|
[t (or (some-> (and (re-find #"\d{1,2}\/\d{1,2}/\d{4,4}" t) (atime/parse t "MM/dd/yyyy"))
|
|
(coerce/to-date))
|
|
(some-> (and (re-find #"\d{1,2}\/\d{1,2}/\d{2,2}" t) (atime/parse t "MM/dd/yy"))
|
|
(coerce/to-date)))]))
|
|
(deduplicate))
|
|
invoice-number-options (->> (stack-rank #{"INVOICE_RECEIPT_ID" "PO_NUMBER"} lookup)
|
|
(map (fn [t]
|
|
[t t]))
|
|
(deduplicate))]
|
|
#:textract-invoice
|
|
{:db/id id
|
|
:textract-status "SUCCEEDED"
|
|
:total (first total-options)
|
|
:total-options (seq total-options)
|
|
:customer-identifier (first customer-identifier-options)
|
|
:customer-identifier-options (seq customer-identifier-options)
|
|
:vendor-name (first vendor-name-options)
|
|
:vendor-name-options (seq vendor-name-options)
|
|
:date (first date-options)
|
|
:date-options (seq date-options)
|
|
:invoice-number (first invoice-number-options)
|
|
:invoice-number-options (seq invoice-number-options)}))
|
|
|
|
(defn upload-form* []
|
|
[:div
|
|
[:form.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer
|
|
{:action (bidi/path-for ssr-routes/only-routes
|
|
:invoice-glimpse-upload)
|
|
:method "POST"
|
|
:id "invoice"}
|
|
"Drop an invoice here"]
|
|
[:script
|
|
(hiccup/raw
|
|
"
|
|
invoice_dropzone = new Dropzone(\"#invoice\", {
|
|
success: function(file, response) {
|
|
window.location.href = file.xhr.responseURL;
|
|
},
|
|
acceptedFiles: 'application/pdf,.pdf',
|
|
disablePreviews: true
|
|
}); ")]])
|
|
|
|
(defn customer-identifier-id->customer-identifier-client [[ci client]]
|
|
(when client
|
|
(let [real-client (dc/pull (dc/db conn)
|
|
[:client/name :db/id]
|
|
client)]
|
|
[ci [(:db/id real-client) (:client/name real-client)]])))
|
|
|
|
(defn vendor-name-tuple->vendor-tuple [[vn vendor]]
|
|
(when vendor
|
|
(let [real-vendor (dc/pull (dc/db conn)
|
|
[:vendor/name :db/id]
|
|
vendor)]
|
|
[vn [(:db/id real-vendor) (:vendor/name real-vendor)]])))
|
|
|
|
(defn get-job [id]
|
|
(-> (dc/pull (dc/db conn) '[*] id)
|
|
(update :textract-invoice/customer-identifier customer-identifier-id->customer-identifier-client)
|
|
(update :textract-invoice/customer-identifier-options #(map customer-identifier-id->customer-identifier-client %) )
|
|
(update :textract-invoice/vendor-name vendor-name-tuple->vendor-tuple)
|
|
(update :textract-invoice/vendor-name-options #(map vendor-name-tuple->vendor-tuple %) )))
|
|
|
|
(defn refresh-job [id]
|
|
(let [{:keys [:db/id :textract-invoice/job-id :textract-invoice/textract-status]} (get-job id)]
|
|
(when (and job-id (= "IN_PROGRESS" textract-status))
|
|
(let [result (textract/get-expense-analysis {:job-id job-id})
|
|
new-status (:job-status result)]
|
|
(cond (= "SUCCEEDED" new-status)
|
|
@(dc/transact conn [[:upsert-entity (textract->textract-invoice id result)]])
|
|
:else
|
|
@(dc/transact conn [{:db/id id :textract-invoice/textract-status new-status}]))))
|
|
(get-job id)))
|
|
|
|
|
|
(defn pill-list* [{:keys [selected options class ->text ->value id field]}]
|
|
(let [options (->> options
|
|
(filter (complement #{selected}))
|
|
(map (fn [x]
|
|
[:div.shrink
|
|
(com/pill {:color :secondary}
|
|
(com/link {:hx-patch (str (bidi/path-for ssr-routes/only-routes :invoice-glimpse-update-textract-invoice :textract-invoice-id id) "?" (url/map->query {field (if ->value (->value x) (->text x))}))
|
|
:hx-target "closest form"
|
|
:href "#"} (->text x)))]) ))]
|
|
(when (seq options)
|
|
[:div.col-span-6.col-start-1.text-xs
|
|
"Alternates: "
|
|
[:div.flex.gap-2.flex-wrap {:class class}
|
|
options]])))
|
|
|
|
(defn textract->invoice-form* [textract-invoice]
|
|
[:form {:hx-post (bidi/path-for ssr-routes/only-routes
|
|
:invoice-glimpse-create-invoice
|
|
:textract-invoice-id (:db/id textract-invoice))}
|
|
[:div.grid.grid-cols-6.gap-4.mb-4
|
|
[:div.col-span-6
|
|
(com/field {:label "Client"}
|
|
(com/text-input {:name (path->name [:invoice/client])
|
|
:value (-> textract-invoice :textract-invoice/customer-identifier second second)
|
|
:placeholder "Client"
|
|
:disabled true
|
|
:autofocus true}))]
|
|
(pill-list* {:selected (:textract-invoice/customer-identifier textract-invoice)
|
|
:options (:textract-invoice/customer-identifier-options textract-invoice)
|
|
:id (:db/id textract-invoice)
|
|
:class "flex-col"
|
|
:field "client"
|
|
:->text (fn [[customer-identifier [id client-name]]]
|
|
(format "%s (%s)" client-name customer-identifier))
|
|
:->value (fn [[client-identifier [id client-name]]]
|
|
id)})
|
|
[:div.col-span-6
|
|
(com/field {:label "Vendor"}
|
|
(com/text-input {:name (path->name [:invoice/vendor])
|
|
:value (-> textract-invoice :textract-invoice/vendor-name second second)
|
|
:disabled true
|
|
:placeholder "Vendor"}))]
|
|
(pill-list* {:selected (:textract-invoice/vendor-name textract-invoice)
|
|
:options (:textract-invoice/vendor-name-options textract-invoice)
|
|
:id (:db/id textract-invoice)
|
|
:class "flex-row"
|
|
:field "vendor"
|
|
:->text (fn [[vendor-identifier [id vendor-name]]]
|
|
(format "%s (%s)" vendor-name vendor-identifier))
|
|
:->value (fn [[vendor-identifier [id vendor-name]]]
|
|
id)})
|
|
[:div.col-span-3
|
|
(com/field {:label "Date"}
|
|
(com/date-input {:name "date"
|
|
:value (-> textract-invoice
|
|
:textract-invoice/date
|
|
second
|
|
(coerce/to-date-time)
|
|
(atime/unparse-local atime/iso-date))
|
|
:placeholder "Date"}))]
|
|
(pill-list* {:selected (:textract-invoice/date textract-invoice)
|
|
:options (:textract-invoice/date-options textract-invoice)
|
|
:id (:db/id textract-invoice)
|
|
:field "date"
|
|
:->text (fn [[_ date]]
|
|
(-> date
|
|
(coerce/to-date-time)
|
|
(atime/unparse-local atime/iso-date)))})
|
|
[:div.col-span-2.col-start-1
|
|
(com/field {:label "Total"}
|
|
(com/money-input {:name "total"
|
|
:value (-> textract-invoice
|
|
:textract-invoice/total
|
|
second)
|
|
:placeholder "Total"}))]
|
|
(pill-list* {:selected (:textract-invoice/total textract-invoice)
|
|
:options (:textract-invoice/total-options textract-invoice)
|
|
:id (:db/id textract-invoice)
|
|
:field "total"
|
|
:->text (fn [[_ amount]]
|
|
(str amount))})
|
|
[:div.col-span-2.col-start-1
|
|
(com/field {:label "Invoice Number"}
|
|
(com/text-input {:name "invoice-number"
|
|
:value (-> textract-invoice
|
|
:textract-invoice/invoice-number
|
|
first)
|
|
:placeholder "Invoice Number"}))]
|
|
(pill-list* {:selected (:textract-invoice/invoice-number textract-invoice)
|
|
:field "invoice-number"
|
|
:id (:db/id textract-invoice)
|
|
:options (:textract-invoice/invoice-number-options textract-invoice)
|
|
:->text (fn [[_ invoice-number]]
|
|
(str invoice-number))})]
|
|
(com/button {:color :primary} "Save")])
|
|
|
|
(defn job-progress* [id]
|
|
(let [textract-invoice (refresh-job id)]
|
|
(cond
|
|
(= "IN_PROGRESS" (:textract-invoice/textract-status textract-invoice))
|
|
[:div.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer
|
|
{:hx-get (str (bidi/path-for ssr-routes/only-routes
|
|
:invoice-glimpse-textract-invoice
|
|
:textract-invoice-id (:db/id textract-invoice)))
|
|
:hx-trigger "load delay:5s"
|
|
:hx-swap "outerHTML"}
|
|
"Analyzing job " (some-> textract-invoice
|
|
:textract-invoice/job-id
|
|
(subs 0 8)) "..."]
|
|
(= "SUCCEEDED" (:textract-invoice/textract-status textract-invoice))
|
|
[:div.px-4
|
|
|
|
[:div.flex.flex-row.space-x-4
|
|
[:div {:style {:width "805"}}
|
|
(com/card {}
|
|
[:iframe.p-4 {:src (:textract-invoice/pdf-url textract-invoice) :width 791 :height 700}])]
|
|
[:div {:class "basis-1/4"}
|
|
(com/card {}
|
|
[:div.p-4
|
|
(textract->invoice-form* textract-invoice)])]]])))
|
|
|
|
(defn page* [id]
|
|
[:div#invoice-glimpse-content.mt-4
|
|
(com/card {}
|
|
[:div.px-4.py-3.space-y-4.flex.flex-col
|
|
[:div.flex.gap-x-4 [:h1.text-2xl.font-bold "Invoice Glimpse"] [:div (com/pill {:color :primary} "Beta")]
|
|
(when id
|
|
[:div.ml-auto [:a.mb-2 {:href (bidi/path-for ssr-routes/only-routes
|
|
:invoice-glimpse)}
|
|
(com/button {:color :secondary} "New glimpse")]])]
|
|
[:p.text-sm.italic "Import your invoices with the power of AI. Please only use PDFs with a single invoice in them."]
|
|
|
|
(when id
|
|
(job-progress* id))
|
|
(when-not id
|
|
(upload-form*))])])
|
|
|
|
(defn begin-textract-file [s3-location]
|
|
(let [tempid (random-tempid)
|
|
|
|
id (get-in @(dc/transact conn [{:db/id tempid
|
|
:textract-invoice/textract-status "IN_PROGRESS"
|
|
:textract-invoice/pdf-url (str "https://" bucket-name "/" s3-location)}])
|
|
[:tempids tempid])]
|
|
(future (let [analysis (textract/start-expense-analysis {:document-location {:s3-object {:bucket (:data-bucket env) :name s3-location}}
|
|
:client-request-token (str id)})]
|
|
@(dc/transact conn [{:db/id id
|
|
:textract-invoice/job-id (:job-id analysis)}])))
|
|
(get-job id)))
|
|
|
|
(defn textract-invoice->invoice [textract-invoice]
|
|
(mu/with-context {:textract-invoice textract-invoice}
|
|
(let [[_ [vendor-id]] (:textract-invoice/vendor-name textract-invoice)
|
|
[_ [client-id]] (:textract-invoice/customer-identifier textract-invoice)
|
|
[_ total] (:textract-invoice/total textract-invoice)
|
|
[_ date] (:textract-invoice/date textract-invoice)
|
|
[_ invoice-number] (:textract-invoice/invoice-number textract-invoice)
|
|
vendor (dc/pull (dc/db conn) d-vendors/default-read vendor-id)
|
|
location (when client-id
|
|
(->> (dc/pull (dc/db conn) '[:client/locations] client-id)
|
|
:client/locations
|
|
first))
|
|
due (and (:vendor/terms vendor)
|
|
(time/plus (coerce/to-date-time date) (time/days (d-vendors/terms-for-client-id vendor client-id))))
|
|
scheduled-payment (and (d-vendors/automatically-paid-for-client-id? vendor client-id)
|
|
due)]
|
|
(when (and client-id date invoice-number vendor-id total)
|
|
(cond-> {:db/id (random-tempid)
|
|
:invoice/client client-id
|
|
:invoice/client-identifier (first (:textract-invoice/customer-identifier textract-invoice))
|
|
:invoice/vendor vendor-id
|
|
:invoice/invoice-number invoice-number
|
|
:invoice/total total
|
|
:invoice/date date
|
|
|
|
:invoice/location location
|
|
:invoice/import-status :import-status/imported
|
|
:invoice/outstanding-balance total
|
|
:invoice/status :invoice-status/unpaid}
|
|
scheduled-payment (assoc :invoice/scheduled-payment (some-> scheduled-payment coerce/to-date))
|
|
(instance? org.joda.time.DateTime due) (assoc :invoice/due (some-> due coerce/to-date))
|
|
(instance? org.joda.time.DateTime scheduled-payment) (assoc :invoice/scheduled-payment (some-> scheduled-payment coerce/to-date)))))))
|
|
|
|
(defn update-textract-invoice- [id {:strs [date total invoice-number client vendor]}]
|
|
@(dc/transact-async conn [[:upsert-entity (cond-> {:db/id id}
|
|
date (assoc :textract-invoice/date [date (coerce/to-date (atime/parse date atime/iso-date))])
|
|
total (assoc :textract-invoice/total [total (Double/parseDouble total)])
|
|
invoice-number (assoc :textract-invoice/invoice-number [invoice-number invoice-number])
|
|
client (assoc :textract-invoice/customer-identifier [(pull-attr (dc/db conn) :client/name (Long/parseLong client)) (Long/parseLong client)])
|
|
vendor (assoc :textract-invoice/vendor-name [(pull-attr (dc/db conn) :vendor/name (Long/parseLong vendor)) (Long/parseLong vendor)]))]])
|
|
(get-job id))
|
|
|
|
(defn upload [{:keys [identity] :as request}]
|
|
(let [file (or (get (:params request) :file)
|
|
(get (:params request) "file"))]
|
|
(mu/log ::uploading-file
|
|
:file file)
|
|
(try
|
|
(let [s3-location (str "textract-files/" (UUID/randomUUID) "." (last (str/split (:filename file) #"[\\.]")))
|
|
_ (with-open [stream (io/input-stream (:tempfile file))]
|
|
(s3/put-object (:data-bucket env)
|
|
s3-location
|
|
stream
|
|
{:content-type "application/pdf"
|
|
:content-length (.length (:tempfile file))}))
|
|
textract-invoice (begin-textract-file s3-location)]
|
|
{:headers {"Location"
|
|
(str (bidi/path-for ssr-routes/only-routes
|
|
:invoice-glimpse-textract-invoice
|
|
:textract-invoice-id (:db/id textract-invoice)))}
|
|
:status 302})
|
|
(catch Exception e
|
|
(alog/error ::cant-begin-textract
|
|
:error e)
|
|
(html-response [:div (.getMessage e)])))))
|
|
|
|
(defn update-textract-invoice [{:as request}]
|
|
(let [current-job (update-textract-invoice- (Long/parseLong (:textract-invoice-id (:route-params request))) (:query-params request))]
|
|
(html-response (textract->invoice-form* current-job))))
|
|
|
|
(defn create-invoice [request]
|
|
(let [current-job (update-textract-invoice- (Long/parseLong (:textract-invoice-id (:route-params request))) (:form-params request))
|
|
new-invoice (textract-invoice->invoice current-job)
|
|
new-invoice-id (get-in @(dc/transact conn [[:propose-invoice new-invoice]])
|
|
[:tempids (:db/id new-invoice)])
|
|
_ (when new-invoice-id @(dc/transact conn [{:db/id (:db/id current-job)
|
|
:textract-invoice/invoice new-invoice-id}]))]
|
|
(if new-invoice-id
|
|
(html-response (page* nil)
|
|
:headers {"hx-push-url" (bidi/path-for ssr-routes/only-routes :invoice-glimpse)
|
|
"hx-retarget" "#invoice-glimpse-content"
|
|
"hx-trigger" (cheshire/generate-string {"notification" (str (hiccup/html [:div "Successfully created "
|
|
(com/link {:href (str (bidi/path-for client-routes/routes
|
|
:invoices)
|
|
"?exact-match-id="
|
|
new-invoice-id)}
|
|
(format "invoice %s" (:invoice/invoice-number new-invoice)))
|
|
"."]))})})
|
|
(html-response [:div "This invoice already exists."]
|
|
:status 400))))
|
|
|
|
(defn page [{:keys [matched-route request-method] :as request}]
|
|
(mu/log ::method
|
|
:method request-method)
|
|
(base-page
|
|
request
|
|
(com/page {:nav (com/admin-aside-nav)
|
|
:client-selection (:client-selection (:session request))
|
|
:client (:client request)
|
|
:identity (:identity request)
|
|
:app-params {:hx-get (bidi/path-for ssr-routes/only-routes
|
|
:invoice-glimpse)
|
|
:hx-trigger "clientSelected from:body"
|
|
:hx-select "#app-contents"
|
|
:hx-swap "outerHTML swap:300ms"}}
|
|
(com/breadcrumbs {}
|
|
[:a {:href (bidi/path-for ssr-routes/only-routes
|
|
:admin)}
|
|
"Invoice"]
|
|
[:a {:href (bidi/path-for ssr-routes/only-routes
|
|
:invoice-glimpse)}
|
|
"Glimpse"])
|
|
(page* (some-> request
|
|
:route-params
|
|
:textract-invoice-id
|
|
Long/parseLong)))
|
|
|
|
"Invoice Glimpse"))
|
|
|
|
(defn textract-invoice [request]
|
|
(if (get-in request [:headers "hx-request"])
|
|
(html-response (job-progress* (some-> request
|
|
:route-params
|
|
:textract-invoice-id
|
|
Long/parseLong)))
|
|
(page request)))
|