(ns auto-ap.ssr.invoice.glimpse (:require [amazonica.aws.s3 :as s3] [amazonica.aws.textract :as textract] [auto-ap.datomic :refer [conn pull-attr pull-id]] [auto-ap.datomic.clients :as d-clients] [auto-ap.logging :as alog] [auto-ap.solr :as solr] [auto-ap.ssr-routes :as ssr-routes] [auto-ap.ssr.components :as com] [auto-ap.ssr.ui :refer [base-page]] [auto-ap.ssr.utils :refer [html-response path->name]] [auto-ap.time :as atime] [bidi.bidi :as bidi] [cemerick.url :as url] [clj-time.coerce :as coerce] [cheshire.core :as cheshire] [clojure.java.io :as io] [clojure.string :as str] [com.brunobonacci.mulog :as mu] [config.core :refer [env]] [datomic.api :as dc] [hiccup2.core :as hiccup] [iol-ion.tx :refer [random-tempid]] [auto-ap.client-routes :as client-routes] [auto-ap.datomic.vendors :as d-vendors] [clj-time.core :as time]) (:import (java.util UUID))) (def bucket-name (:data-bucket env)) (defn lookup [tx] (->> (:expense-documents tx) (mapcat :summary-fields) (concat (->> tx :expense-documents )) (map (fn [sf] (-> sf (update :label-detection dissoc :geometry) (update :value-detection dissoc :geometry)))))) (defn stack-rank [valid-values field-descriptors] (->> field-descriptors (filter (comp valid-values :text :type)) (sort-by #(* (-> % :type :confidence) (-> % :value-detection :confidence))) (reverse) (map (comp :text :value-detection)) (filter #(not (str/blank? %))))) (defn clean-customer [c] (clojure.string/replace c #"\W+" " ")) (defn deduplicate [xs] (first (reduce (fn [[so-far seen-parsed?] [raw parsed]] (if (seen-parsed? parsed) [so-far seen-parsed?] [(conj so-far [raw parsed]) (conj seen-parsed? parsed)])) [[] #{}] xs))) (defn textract->textract-invoice [id tx] (let [lookup (lookup tx) total-options (->> (stack-rank #{"AMOUNT_DUE"} lookup) (map (fn [t] [t (some->> t (re-find #"([0-9.\-]+)") second Double/parseDouble)])) (concat (->> (stack-rank #{"TOTAL"} lookup) (map (fn [t] [t (some->> t (re-find #"([0-9.\-]+)") second Double/parseDouble)])))) (deduplicate)) customer-identifier-options (->> (stack-rank #{"CUSTOMER_NUMBER"} lookup) (map (fn [t] [t (:db/id (d-clients/exact-match t))])) (filter second) (concat (->> (stack-rank #{"RECEIVER_NAME"} lookup) (map (fn [t] [t (->> (solr/query solr/impl "clients" {"query" (format "name:(%s) ", (clean-customer t)) "fields" "score, *"}) #_(filter (fn [d] (> (:score d) 4.0))) (map (comp #(Long/parseLong %) :id)) first)])))) deduplicate) vendor-name-options (->> (stack-rank #{"VENDOR_NAME"} lookup) (mapcat (fn [t] (for [m (->> (solr/query solr/impl "vendors" {"query" (format "name:(%s) ", t) "fields" "score, *"}) (filter (fn [d] (> (:score d) 2.0))) (map (comp #(Long/parseLong %) :id)))] [t m]))) (deduplicate)) date-options (->> (stack-rank #{"INVOICE_RECEIPT_DATE" "ORDER_DATE" "DELIVERY_DATE"} lookup) (map (fn [t] [t (or (some-> (and (re-find #"\d{1,2}\/\d{1,2}/\d{4,4}" t) (atime/parse t "MM/dd/yyyy")) (coerce/to-date)) (some-> (and (re-find #"\d{1,2}\/\d{1,2}/\d{2,2}" t) (atime/parse t "MM/dd/yy")) (coerce/to-date)))])) (deduplicate)) invoice-number-options (->> (stack-rank #{"INVOICE_RECEIPT_ID" "PO_NUMBER"} lookup) (map (fn [t] [t t])) (deduplicate))] #:textract-invoice {:db/id id :textract-status "SUCCEEDED" :total (first total-options) :total-options (seq total-options) :customer-identifier (first customer-identifier-options) :customer-identifier-options (seq customer-identifier-options) :vendor-name (first vendor-name-options) :vendor-name-options (seq vendor-name-options) :date (first date-options) :date-options (seq date-options) :invoice-number (first invoice-number-options) :invoice-number-options (seq invoice-number-options)})) (defn upload-form* [] [:div [:form.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer {:action (bidi/path-for ssr-routes/only-routes :invoice-glimpse-upload) :method "POST" :id "invoice"} "Drop an invoice here"] [:script (hiccup/raw " invoice_dropzone = new Dropzone(\"#invoice\", { success: function(file, response) { window.location.href = file.xhr.responseURL; }, acceptedFiles: 'application/pdf,.pdf', disablePreviews: true }); ")]]) (defn customer-identifier-id->customer-identifier-client [[ci client]] (when client (let [real-client (dc/pull (dc/db conn) [:client/name :db/id] client)] [ci [(:db/id real-client) (:client/name real-client)]]))) (defn vendor-name-tuple->vendor-tuple [[vn vendor]] (when vendor (let [real-vendor (dc/pull (dc/db conn) [:vendor/name :db/id] vendor)] [vn [(:db/id real-vendor) (:vendor/name real-vendor)]]))) (defn get-job [id] (-> (dc/pull (dc/db conn) '[*] id) (update :textract-invoice/customer-identifier customer-identifier-id->customer-identifier-client) (update :textract-invoice/customer-identifier-options #(map customer-identifier-id->customer-identifier-client %) ) (update :textract-invoice/vendor-name vendor-name-tuple->vendor-tuple) (update :textract-invoice/vendor-name-options #(map vendor-name-tuple->vendor-tuple %) ))) (defn refresh-job [id] (let [{:keys [:db/id :textract-invoice/job-id :textract-invoice/textract-status]} (get-job id)] (when (and job-id (= "IN_PROGRESS" textract-status)) (let [result (textract/get-expense-analysis {:job-id job-id}) new-status (:job-status result)] (cond (= "SUCCEEDED" new-status) @(dc/transact conn [[:upsert-entity (textract->textract-invoice id result)]]) :else @(dc/transact conn [{:db/id id :textract-invoice/textract-status new-status}])))) (get-job id))) (defn pill-list* [{:keys [selected options class ->text ->value id field]}] (let [options (->> options (filter (complement #{selected})) (map (fn [x] [:div.shrink (com/pill {:color :secondary} (com/link {:hx-patch (str (bidi/path-for ssr-routes/only-routes :invoice-glimpse-update-textract-invoice :textract-invoice-id id) "?" (url/map->query {field (if ->value (->value x) (->text x))})) :hx-target "closest form" :href "#"} (->text x)))]) ))] (when (seq options) [:div.col-span-6.col-start-1.text-xs "Alternates: " [:div.flex.gap-2.flex-wrap {:class class} options]]))) (defn textract->invoice-form* [textract-invoice] (alog/info ::textract-invoiceform-rendering :textract-invoice textract-invoice) [:form {:hx-post (bidi/path-for ssr-routes/only-routes :invoice-glimpse-create-invoice :textract-invoice-id (:db/id textract-invoice))} [:div.grid.grid-cols-6.gap-4.mb-4 [:div.col-span-6 (com/field {:label "Client"} (com/text-input {:name (path->name [:invoice/client]) :value (-> textract-invoice :textract-invoice/customer-identifier second second) :placeholder "Client" :disabled true :autofocus true}))] (pill-list* {:selected (:textract-invoice/customer-identifier textract-invoice) :options (:textract-invoice/customer-identifier-options textract-invoice) :id (:db/id textract-invoice) :class "flex-col" :field "client" :->text (fn [[customer-identifier [id client-name]]] (format "%s (%s)" client-name customer-identifier)) :->value (fn [[client-identifier [id client-name]]] id)}) [:div.col-span-6 (com/field {:label "Vendor"} (com/text-input {:name (path->name [:invoice/vendor]) :value (-> textract-invoice :textract-invoice/vendor-name second second) :disabled true :placeholder "Vendor"}))] (pill-list* {:selected (:textract-invoice/vendor-name textract-invoice) :options (:textract-invoice/vendor-name-options textract-invoice) :id (:db/id textract-invoice) :class "flex-row" :field "vendor" :->text (fn [[vendor-identifier [id vendor-name]]] (format "%s (%s)" vendor-name vendor-identifier)) :->value (fn [[vendor-identifier [id vendor-name]]] id)}) [:div.col-span-3 (com/field {:label "Date"} (com/date-input {:name "date" :value (-> textract-invoice :textract-invoice/date second (coerce/to-date-time) (atime/unparse-local atime/normal-date)) :placeholder "Date"}))] (pill-list* {:selected (:textract-invoice/date textract-invoice) :options (:textract-invoice/date-options textract-invoice) :id (:db/id textract-invoice) :field "date" :->text (fn [[_ date]] (-> date (coerce/to-date-time) (atime/unparse-local atime/normal-date)))}) [:div.col-span-2.col-start-1 (com/field {:label "Total"} (com/money-input {:name "total" :value (-> textract-invoice :textract-invoice/total second) :placeholder "Total"}))] (pill-list* {:selected (:textract-invoice/total textract-invoice) :options (:textract-invoice/total-options textract-invoice) :id (:db/id textract-invoice) :field "total" :->text (fn [[_ amount]] (str amount))}) [:div.col-span-2.col-start-1 (com/field {:label "Invoice Number"} (com/text-input {:name "invoice-number" :value (-> textract-invoice :textract-invoice/invoice-number first) :placeholder "Invoice Number"}))] (pill-list* {:selected (:textract-invoice/invoice-number textract-invoice) :field "invoice-number" :id (:db/id textract-invoice) :options (:textract-invoice/invoice-number-options textract-invoice) :->text (fn [[_ invoice-number]] (str invoice-number))})] (com/button {:color :primary} "Save")]) (defn job-progress* [id] (let [textract-invoice (refresh-job id)] (cond (= "IN_PROGRESS" (:textract-invoice/textract-status textract-invoice)) [:div.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer {:hx-get (str (bidi/path-for ssr-routes/only-routes :invoice-glimpse-textract-invoice :textract-invoice-id (:db/id textract-invoice))) :hx-trigger "load delay:5s" :hx-swap "outerHTML"} "Analyzing job " (some-> textract-invoice :textract-invoice/job-id (subs 0 8)) "..."] (= "SUCCEEDED" (:textract-invoice/textract-status textract-invoice)) [:div.px-4 [:div.flex.flex-row.space-x-4 [:div {:style {:width "805"}} (com/card {} [:iframe.p-4 {:src (:textract-invoice/pdf-url textract-invoice) :width 791 :height 700}])] [:div {:class "basis-1/4"} (com/card {} [:div.p-4 (textract->invoice-form* textract-invoice)])]]]))) (defn page* [id] [:div#invoice-glimpse-content.mt-4 (com/card {} [:div.px-4.py-3.space-y-4.flex.flex-col [:div.flex.gap-x-4 [:h1.text-2xl.font-bold "Invoice Glimpse"] [:div (com/pill {:color :primary} "Beta")] (when id [:div.ml-auto [:a.mb-2 {:href (bidi/path-for ssr-routes/only-routes :invoice-glimpse)} (com/button {:color :secondary} "New glimpse")]])] [:p.text-sm.italic "Import your invoices with the power of AI. Please only use PDFs with a single invoice in them."] (when id (job-progress* id)) (when-not id (upload-form*))])]) (defn begin-textract-file [s3-location] (let [tempid (random-tempid) id (get-in @(dc/transact conn [{:db/id tempid :textract-invoice/textract-status "IN_PROGRESS" :textract-invoice/pdf-url (str "https://" bucket-name "/" s3-location)}]) [:tempids tempid])] (future (let [analysis (textract/start-expense-analysis {:document-location {:s3-object {:bucket (:data-bucket env) :name s3-location}} :client-request-token (str id)})] @(dc/transact conn [{:db/id id :textract-invoice/job-id (:job-id analysis)}]))) (get-job id))) (defn textract-invoice->invoice [textract-invoice] (mu/with-context {:textract-invoice textract-invoice} (let [[_ [vendor-id]] (:textract-invoice/vendor-name textract-invoice) [_ [client-id]] (:textract-invoice/customer-identifier textract-invoice) [_ total] (:textract-invoice/total textract-invoice) [_ date] (:textract-invoice/date textract-invoice) [_ invoice-number] (:textract-invoice/invoice-number textract-invoice) vendor (dc/pull (dc/db conn) d-vendors/default-read vendor-id) location (when client-id (->> (dc/pull (dc/db conn) '[:client/locations] client-id) :client/locations first)) due (and (:vendor/terms vendor) (time/plus (coerce/to-date-time date) (time/days (d-vendors/terms-for-client-id vendor client-id)))) scheduled-payment (and (d-vendors/automatically-paid-for-client-id? vendor client-id) due)] (when (and client-id date invoice-number vendor-id total) (cond-> {:db/id (random-tempid) :invoice/client client-id :invoice/client-identifier (first (:textract-invoice/customer-identifier textract-invoice)) :invoice/vendor vendor-id :invoice/invoice-number invoice-number :invoice/total total :invoice/date date :invoice/location location :invoice/import-status :import-status/imported :invoice/outstanding-balance total :invoice/status :invoice-status/unpaid} scheduled-payment (assoc :invoice/scheduled-payment (some-> scheduled-payment coerce/to-date)) (instance? org.joda.time.DateTime due) (assoc :invoice/due (some-> due coerce/to-date)) (instance? org.joda.time.DateTime scheduled-payment) (assoc :invoice/scheduled-payment (some-> scheduled-payment coerce/to-date))))))) (defn update-textract-invoice- [id {:strs [date total invoice-number client vendor]}] @(dc/transact-async conn [[:upsert-entity (cond-> {:db/id id} date (assoc :textract-invoice/date [date (coerce/to-date (atime/parse date atime/normal-date))]) total (assoc :textract-invoice/total [total (Double/parseDouble total)]) invoice-number (assoc :textract-invoice/invoice-number [invoice-number invoice-number]) client (assoc :textract-invoice/customer-identifier [(pull-attr (dc/db conn) :client/name (Long/parseLong client)) (Long/parseLong client)]) vendor (assoc :textract-invoice/vendor-name [(pull-attr (dc/db conn) :vendor/name (Long/parseLong vendor)) (Long/parseLong vendor)]))]]) (get-job id)) (defn upload [{:keys [identity] :as request}] (let [file (or (get (:params request) :file) (get (:params request) "file"))] (mu/log ::uploading-file :file file) (try (let [s3-location (str "textract-files/" (UUID/randomUUID) "." (last (str/split (:filename file) #"[\\.]"))) _ (with-open [stream (io/input-stream (:tempfile file))] (s3/put-object (:data-bucket env) s3-location stream {:content-type "application/pdf" :content-length (.length (:tempfile file))})) textract-invoice (begin-textract-file s3-location)] {:headers {"Location" (str (bidi/path-for ssr-routes/only-routes :invoice-glimpse-textract-invoice :textract-invoice-id (:db/id textract-invoice)))} :status 302}) (catch Exception e (alog/error ::cant-begin-textract :error e) (html-response [:div (.getMessage e)]))))) (defn update-textract-invoice [{:as request}] (let [current-job (update-textract-invoice- (Long/parseLong (:textract-invoice-id (:route-params request))) (:query-params request))] (html-response (textract->invoice-form* current-job)))) (defn create-invoice [request] (let [current-job (update-textract-invoice- (Long/parseLong (:textract-invoice-id (:route-params request))) (:form-params request)) new-invoice (textract-invoice->invoice current-job) new-invoice-id (get-in @(dc/transact conn [[:propose-invoice new-invoice]]) [:tempids (:db/id new-invoice)]) _ (when new-invoice-id @(dc/transact conn [{:db/id (:db/id current-job) :textract-invoice/invoice new-invoice-id}]))] (if new-invoice-id (html-response (page* nil) :headers {"hx-push-url" (bidi/path-for ssr-routes/only-routes :invoice-glimpse) "hx-retarget" "#invoice-glimpse-content" "hx-trigger" (cheshire/generate-string {"notification" (str (hiccup/html [:div "Successfully created " (com/link {:href (str (bidi/path-for client-routes/routes :invoices) "?exact-match-id=" new-invoice-id)} (format "invoice %s" (:invoice/invoice-number new-invoice))) "."]))})}) (html-response [:div "This invoice already exists."] :status 400)))) (defn page [{:keys [matched-route request-method] :as request}] (mu/log ::method :method request-method) (base-page request (com/page {:nav com/admin-aside-nav :client-selection (:client-selection (:session request)) :client (:client request) :identity (:identity request) :app-params {:hx-get (bidi/path-for ssr-routes/only-routes :invoice-glimpse) :hx-trigger "clientSelected from:body" :hx-select "#app-contents" :hx-swap "outerHTML swap:300ms"}} (com/breadcrumbs {} [:a {:href (bidi/path-for ssr-routes/only-routes :admin)} "Invoice"] [:a {:href (bidi/path-for ssr-routes/only-routes :invoice-glimpse)} "Glimpse"]) (page* (some-> request :route-params :textract-invoice-id Long/parseLong))) "Invoice Glimpse")) (defn textract-invoice [request] (if (get-in request [:headers "hx-request"]) (html-response (job-progress* (some-> request :route-params :textract-invoice-id Long/parseLong))) (page request)))