Files
integreat/src/clj/auto_ap/ssr/invoice/glimpse.clj
2023-09-14 12:59:32 -07:00

452 lines
24 KiB
Clojure

(ns auto-ap.ssr.invoice.glimpse
(:require
[amazonica.aws.s3 :as s3]
[amazonica.aws.textract :as textract]
[auto-ap.datomic :refer [conn pull-attr pull-id]]
[auto-ap.datomic.clients :as d-clients]
[auto-ap.logging :as alog]
[auto-ap.solr :as solr]
[auto-ap.ssr-routes :as ssr-routes]
[auto-ap.ssr.components :as com]
[auto-ap.ssr.ui :refer [base-page]]
[auto-ap.ssr.utils :refer [html-response path->name]]
[auto-ap.time :as atime]
[bidi.bidi :as bidi]
[cemerick.url :as url]
[clj-time.coerce :as coerce]
[cheshire.core :as cheshire]
[clojure.java.io :as io]
[clojure.string :as str]
[com.brunobonacci.mulog :as mu]
[config.core :refer [env]]
[datomic.api :as dc]
[hiccup2.core :as hiccup]
[iol-ion.tx :refer [random-tempid]]
[auto-ap.client-routes :as client-routes]
[auto-ap.datomic.vendors :as d-vendors]
[clj-time.core :as time])
(:import
(java.util UUID)))
(def bucket-name (:data-bucket env))
(defn lookup [tx]
(->> (:expense-documents tx)
(mapcat :summary-fields)
(concat (->> tx :expense-documents ))
(map (fn [sf]
(-> sf
(update :label-detection dissoc :geometry)
(update :value-detection dissoc :geometry))))))
(defn stack-rank [valid-values field-descriptors]
(->> field-descriptors
(filter (comp valid-values :text :type))
(sort-by #(* (-> % :type :confidence)
(-> % :value-detection :confidence)))
(reverse)
(map (comp :text :value-detection))
(filter #(not (str/blank? %)))))
(defn clean-customer [c]
(clojure.string/replace c #"\W+" " "))
(defn deduplicate [xs]
(first
(reduce
(fn [[so-far seen-parsed?] [raw parsed]]
(if (seen-parsed? parsed)
[so-far seen-parsed?]
[(conj so-far [raw parsed])
(conj seen-parsed? parsed)]))
[[] #{}]
xs)))
(defn textract->textract-invoice [id tx]
(let [lookup (lookup tx)
total-options (->> (stack-rank #{"AMOUNT_DUE"} lookup)
(map (fn [t]
[t (some->> t
(re-find #"([0-9.\-]+)")
second
Double/parseDouble)]))
(concat (->> (stack-rank #{"TOTAL"} lookup)
(map (fn [t]
[t (some->> t
(re-find #"([0-9.\-]+)")
second
Double/parseDouble)]))))
(deduplicate))
customer-identifier-options (->> (stack-rank #{"CUSTOMER_NUMBER"} lookup)
(map (fn [t]
[t (:db/id (d-clients/exact-match t))]))
(filter second)
(concat (->> (stack-rank #{"RECEIVER_NAME"} lookup)
(map (fn [t]
[t (->> (solr/query solr/impl "clients" {"query" (format "name:(%s) ", (clean-customer t)) "fields" "score, *"})
#_(filter (fn [d] (> (:score d) 4.0)))
(map (comp #(Long/parseLong %) :id))
first)]))))
deduplicate)
vendor-name-options (->> (stack-rank #{"VENDOR_NAME"} lookup)
(map (fn [t]
[t (->> (solr/query solr/impl "vendors" {"query" (format "name:(%s) ", t) "fields" "score, *"})
(filter (fn [d] (> (:score d) 4.0)))
(map (comp #(Long/parseLong %) :id))
first)]))
(deduplicate))
date-options (->> (stack-rank #{"INVOICE_RECEIPT_DATE" "ORDER_DATE" "DELIVERY_DATE"} lookup)
(map (fn [t]
[t (or (some-> (and (re-find #"\d{1,2}\/\d{1,2}/\d{4,4}" t) (atime/parse t "MM/dd/yyyy"))
(coerce/to-date))
(some-> (and (re-find #"\d{1,2}\/\d{1,2}/\d{2,2}" t) (atime/parse t "MM/dd/yy"))
(coerce/to-date)))]))
(deduplicate))
invoice-number-options (->> (stack-rank #{"INVOICE_RECEIPT_ID" "PO_NUMBER"} lookup)
(map (fn [t]
[t t]))
(deduplicate))]
#:textract-invoice
{:db/id id
:textract-status "SUCCEEDED"
:total (first total-options)
:total-options (seq total-options)
:customer-identifier (first customer-identifier-options)
:customer-identifier-options (seq customer-identifier-options)
:vendor-name (first vendor-name-options)
:vendor-name-options (seq vendor-name-options)
:date (first date-options)
:date-options (seq date-options)
:invoice-number (first invoice-number-options)
:invoice-number-options (seq invoice-number-options)}))
(defn upload-form* []
[:div
[:form.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer
{:action (bidi/path-for ssr-routes/only-routes
:invoice-glimpse-upload)
:method "POST"
:id "invoice"}
"Drop an invoice here"]
[:script
(hiccup/raw
"
invoice_dropzone = new Dropzone(\"#invoice\", {
success: function(file, response) {
window.location.href = file.xhr.responseURL;
},
acceptedFiles: 'application/pdf,.pdf',
disablePreviews: true
}); ")]])
(defn customer-identifier-id->customer-identifier-client [[ci client]]
(when client
(let [real-client (dc/pull (dc/db conn)
[:client/name :db/id]
client)]
[ci [(:db/id real-client) (:client/name real-client)]])))
(defn vendor-name-tuple->vendor-tuple [[vn vendor]]
(when vendor
(let [real-vendor (dc/pull (dc/db conn)
[:vendor/name :db/id]
vendor)]
[vn [(:db/id real-vendor) (:vendor/name real-vendor)]])))
(defn get-job [id]
(-> (dc/pull (dc/db conn) '[*] id)
(update :textract-invoice/customer-identifier customer-identifier-id->customer-identifier-client)
(update :textract-invoice/customer-identifier-options #(map customer-identifier-id->customer-identifier-client %) )
(update :textract-invoice/vendor-name vendor-name-tuple->vendor-tuple)
(update :textract-invoice/vendor-name-options #(map vendor-name-tuple->vendor-tuple %) )))
(defn refresh-job [id]
(let [{:keys [:db/id :textract-invoice/job-id :textract-invoice/textract-status]} (get-job id)]
(when (and job-id (= "IN_PROGRESS" textract-status))
(let [result (textract/get-expense-analysis {:job-id job-id})
new-status (:job-status result)]
(cond (= "SUCCEEDED" new-status)
@(dc/transact conn [[:upsert-entity (textract->textract-invoice id result)]])
:else
@(dc/transact conn [{:db/id id :textract-invoice/textract-status new-status}]))))
(get-job id)))
(defn pill-list* [{:keys [selected options class ->text ->value id field]}]
(let [options (->> options
(filter (complement #{selected}))
(map (fn [x]
[:div.shrink
(com/pill {:color :secondary}
(com/link {:hx-patch (str (bidi/path-for ssr-routes/only-routes :invoice-glimpse-update-textract-invoice :textract-invoice-id id) "?" (url/map->query {field (if ->value (->value x) (->text x))}))
:hx-target "closest form"
:href "#"} (->text x)))]) ))]
(when (seq options)
[:div.col-span-6.col-start-1.text-xs
"Alternates: "
[:div.flex.gap-2.flex-wrap {:class class}
options]])))
(defn textract->invoice-form* [textract-invoice]
[:form {:hx-post (bidi/path-for ssr-routes/only-routes
:invoice-glimpse-create-invoice
:textract-invoice-id (:db/id textract-invoice))}
[:div.grid.grid-cols-6.gap-4.mb-4
[:div.col-span-6
(com/field {:label "Client"}
(com/text-input {:name (path->name [:invoice/client])
:value (-> textract-invoice :textract-invoice/customer-identifier second second)
:placeholder "Client"
:disabled true
:autofocus true}))]
(pill-list* {:selected (:textract-invoice/customer-identifier textract-invoice)
:options (:textract-invoice/customer-identifier-options textract-invoice)
:id (:db/id textract-invoice)
:class "flex-col"
:field "client"
:->text (fn [[customer-identifier [id client-name]]]
(format "%s (%s)" client-name customer-identifier))
:->value (fn [[client-identifier [id client-name]]]
id)})
[:div.col-span-6
(com/field {:label "Vendor"}
(com/text-input {:name (path->name [:invoice/vendor])
:value (-> textract-invoice :textract-invoice/vendor-name second second)
:disabled true
:placeholder "Vendor"}))]
(pill-list* {:selected (:textract-invoice/vendor-name textract-invoice)
:options (:textract-invoice/vendor-name-options textract-invoice)
:id (:db/id textract-invoice)
:class "flex-row"
:field "vendor"
:->text (fn [[vendor-identifier [id vendor-name]]]
(format "%s (%s)" vendor-name vendor-identifier))
:->value (fn [[vendor-identifier [id vendor-name]]]
id)})
[:div.col-span-3
(com/field {:label "Date"}
(com/date-input {:name "date"
:value (-> textract-invoice
:textract-invoice/date
second
(coerce/to-date-time)
(atime/unparse-local atime/iso-date))
:placeholder "Date"}))]
(pill-list* {:selected (:textract-invoice/date textract-invoice)
:options (:textract-invoice/date-options textract-invoice)
:id (:db/id textract-invoice)
:field "date"
:->text (fn [[_ date]]
(-> date
(coerce/to-date-time)
(atime/unparse-local atime/iso-date)))})
[:div.col-span-2.col-start-1
(com/field {:label "Total"}
(com/money-input {:name "total"
:value (-> textract-invoice
:textract-invoice/total
second)
:placeholder "Total"}))]
(pill-list* {:selected (:textract-invoice/total textract-invoice)
:options (:textract-invoice/total-options textract-invoice)
:id (:db/id textract-invoice)
:field "total"
:->text (fn [[_ amount]]
(str amount))})
[:div.col-span-2.col-start-1
(com/field {:label "Invoice Number"}
(com/text-input {:name "invoice-number"
:value (-> textract-invoice
:textract-invoice/invoice-number
first)
:placeholder "Invoice Number"}))]
(pill-list* {:selected (:textract-invoice/invoice-number textract-invoice)
:field "invoice-number"
:id (:db/id textract-invoice)
:options (:textract-invoice/invoice-number-options textract-invoice)
:->text (fn [[_ invoice-number]]
(str invoice-number))})]
(com/button {:color :primary} "Save")])
(defn job-progress* [id]
(let [textract-invoice (refresh-job id)]
(cond
(= "IN_PROGRESS" (:textract-invoice/textract-status textract-invoice))
[:div.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer
{:hx-get (str (bidi/path-for ssr-routes/only-routes
:invoice-glimpse-textract-invoice
:textract-invoice-id (:db/id textract-invoice)))
:hx-trigger "load delay:5s"
:hx-swap "outerHTML"}
"Analyzing job " (some-> textract-invoice
:textract-invoice/job-id
(subs 0 8)) "..."]
(= "SUCCEEDED" (:textract-invoice/textract-status textract-invoice))
[:div.px-4
[:div.flex.flex-row.space-x-4
[:div {:style {:width "805"}}
(com/card {}
[:iframe.p-4 {:src (:textract-invoice/pdf-url textract-invoice) :width 791 :height 700}])]
[:div {:class "basis-1/4"}
(com/card {}
[:div.p-4
(textract->invoice-form* textract-invoice)])]]])))
(defn page* [id]
[:div#invoice-glimpse-content.mt-4
(com/card {}
[:div.px-4.py-3.space-y-4.flex.flex-col
[:div.flex.gap-x-4 [:h1.text-2xl.font-bold "Invoice Glimpse"] [:div (com/pill {:color :primary} "Beta")]
(when id
[:div.ml-auto [:a.mb-2 {:href (bidi/path-for ssr-routes/only-routes
:invoice-glimpse)}
(com/button {:color :secondary} "New glimpse")]])]
[:p.text-sm.italic "Import your invoices with the power of AI. Please only use PDFs with a single invoice in them."]
(when id
(job-progress* id))
(when-not id
(upload-form*))])])
(defn begin-textract-file [s3-location]
(let [tempid (random-tempid)
id (get-in @(dc/transact conn [{:db/id tempid
:textract-invoice/textract-status "IN_PROGRESS"
:textract-invoice/pdf-url (str "https://" bucket-name "/" s3-location)}])
[:tempids tempid])]
(future (let [analysis (textract/start-expense-analysis {:document-location {:s3-object {:bucket (:data-bucket env) :name s3-location}}
:client-request-token (str id)})]
@(dc/transact conn [{:db/id id
:textract-invoice/job-id (:job-id analysis)}])))
(get-job id)))
(defn textract-invoice->invoice [textract-invoice]
(mu/with-context {:textract-invoice textract-invoice}
(let [[_ [vendor-id]] (:textract-invoice/vendor-name textract-invoice)
[_ [client-id]] (:textract-invoice/customer-identifier textract-invoice)
[_ total] (:textract-invoice/total textract-invoice)
[_ date] (:textract-invoice/date textract-invoice)
[_ invoice-number] (:textract-invoice/invoice-number textract-invoice)
vendor (dc/pull (dc/db conn) d-vendors/default-read vendor-id)
location (when client-id
(->> (dc/pull (dc/db conn) '[:client/locations] client-id)
:client/locations
first))
due (and (:vendor/terms vendor)
(time/plus (coerce/to-date-time date) (time/days (d-vendors/terms-for-client-id vendor client-id))))
scheduled-payment (and (d-vendors/automatically-paid-for-client-id? vendor client-id)
due)]
(when (and client-id date invoice-number vendor-id total)
(cond-> {:db/id (random-tempid)
:invoice/client client-id
:invoice/client-identifier (first (:textract-invoice/customer-identifier textract-invoice))
:invoice/vendor vendor-id
:invoice/invoice-number invoice-number
:invoice/total total
:invoice/date date
:invoice/location location
:invoice/import-status :import-status/imported
:invoice/outstanding-balance total
:invoice/status :invoice-status/unpaid}
scheduled-payment (assoc :invoice/scheduled-payment (some-> scheduled-payment coerce/to-date))
(instance? org.joda.time.DateTime due) (assoc :invoice/due (some-> due coerce/to-date))
(instance? org.joda.time.DateTime scheduled-payment) (assoc :invoice/scheduled-payment (some-> scheduled-payment coerce/to-date)))))))
(defn update-textract-invoice- [id {:strs [date total invoice-number client vendor]}]
@(dc/transact-async conn [[:upsert-entity (cond-> {:db/id id}
date (assoc :textract-invoice/date [date (coerce/to-date (atime/parse date atime/iso-date))])
total (assoc :textract-invoice/total [total (Double/parseDouble total)])
invoice-number (assoc :textract-invoice/invoice-number [invoice-number invoice-number])
client (assoc :textract-invoice/customer-identifier [(pull-attr (dc/db conn) :client/name (Long/parseLong client)) (Long/parseLong client)])
vendor (assoc :textract-invoice/vendor-name [(pull-attr (dc/db conn) :vendor/name (Long/parseLong vendor)) (Long/parseLong vendor)]))]])
(get-job id))
(defn upload [{:keys [identity] :as request}]
(let [file (or (get (:params request) :file)
(get (:params request) "file"))]
(mu/log ::uploading-file
:file file)
(try
(let [s3-location (str "textract-files/" (UUID/randomUUID) "." (last (str/split (:filename file) #"[\\.]")))
_ (with-open [stream (io/input-stream (:tempfile file))]
(s3/put-object (:data-bucket env)
s3-location
stream
{:content-type "application/pdf"
:content-length (.length (:tempfile file))}))
textract-invoice (begin-textract-file s3-location)]
{:headers {"Location"
(str (bidi/path-for ssr-routes/only-routes
:invoice-glimpse-textract-invoice
:textract-invoice-id (:db/id textract-invoice)))}
:status 302})
(catch Exception e
(alog/error ::cant-begin-textract
:error e)
(html-response [:div (.getMessage e)])))))
(defn update-textract-invoice [{:as request}]
(let [current-job (update-textract-invoice- (Long/parseLong (:textract-invoice-id (:route-params request))) (:query-params request))]
(html-response (textract->invoice-form* current-job))))
(defn create-invoice [request]
(let [current-job (update-textract-invoice- (Long/parseLong (:textract-invoice-id (:route-params request))) (:form-params request))
new-invoice (textract-invoice->invoice current-job)
new-invoice-id (get-in @(dc/transact conn [[:propose-invoice new-invoice]])
[:tempids (:db/id new-invoice)])
_ (when new-invoice-id @(dc/transact conn [{:db/id (:db/id current-job)
:textract-invoice/invoice new-invoice-id}]))]
(if new-invoice-id
(html-response (page* nil)
:headers {"hx-push-url" (bidi/path-for ssr-routes/only-routes :invoice-glimpse)
"hx-retarget" "#invoice-glimpse-content"
"hx-trigger" (cheshire/generate-string {"notification" (str (hiccup/html [:div "Successfully created "
(com/link {:href (str (bidi/path-for client-routes/routes
:invoices)
"?exact-match-id="
new-invoice-id)}
(format "invoice %s" (:invoice/invoice-number new-invoice)))
"."]))})})
(html-response [:div "This invoice already exists."]
:status 400))))
(defn page [{:keys [matched-route request-method] :as request}]
(mu/log ::method
:method request-method)
(base-page
request
(com/page {:nav (com/admin-aside-nav)
:client-selection (:client-selection (:session request))
:client (:client request)
:identity (:identity request)
:app-params {:hx-get (bidi/path-for ssr-routes/only-routes
:invoice-glimpse)
:hx-trigger "clientSelected from:body"
:hx-select "#app-contents"
:hx-swap "outerHTML swap:300ms"}}
(com/breadcrumbs {}
[:a {:href (bidi/path-for ssr-routes/only-routes
:admin)}
"Invoice"]
[:a {:href (bidi/path-for ssr-routes/only-routes
:invoice-glimpse)}
"Glimpse"])
(page* (some-> request
:route-params
:textract-invoice-id
Long/parseLong)))
"Invoice Glimpse"))
(defn textract-invoice [request]
(if (get-in request [:headers "hx-request"])
(html-response (job-progress* (some-> request
:route-params
:textract-invoice-id
Long/parseLong)))
(page request)))