Invoices can now be imported with the POWER OF AI

This commit is contained in:
Bryce
2023-08-03 21:33:15 -07:00
parent 88eda03e7f
commit ea4ccf731e
3 changed files with 106 additions and 97 deletions

View File

@@ -42,9 +42,9 @@
:company-reports-delete (wrap-client-redirect-unauthenticated (wrap-admin company-reports/delete-report)) :company-reports-delete (wrap-client-redirect-unauthenticated (wrap-admin company-reports/delete-report))
:invoice-glimpse (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/page)) :invoice-glimpse (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/page))
:invoice-glimpse-upload (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/upload)) :invoice-glimpse-upload (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/upload))
:invoice-glimpse-job (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/job-progress)) :invoice-glimpse-textract-invoice (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/textract-invoice))
:invoice-glimpse-create (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/create)) :invoice-glimpse-create-invoice (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/create-invoice))
:invoice-glimpse-update-job (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/update-job)) :invoice-glimpse-update-textract-invoice (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/update-textract-invoice))
:transaction-insights (wrap-client-redirect-unauthenticated (wrap-admin insights/page)) :transaction-insights (wrap-client-redirect-unauthenticated (wrap-admin insights/page))
:transaction-insight-table (wrap-client-redirect-unauthenticated (wrap-admin insights/insight-table)) :transaction-insight-table (wrap-client-redirect-unauthenticated (wrap-admin insights/insight-table))
:transaction-insight-rows (wrap-client-redirect-unauthenticated (wrap-admin insights/transaction-rows)) :transaction-insight-rows (wrap-client-redirect-unauthenticated (wrap-admin insights/transaction-rows))

View File

@@ -44,7 +44,8 @@
(sort-by #(* (-> % :type :confidence) (sort-by #(* (-> % :type :confidence)
(-> % :value-detection :confidence))) (-> % :value-detection :confidence)))
(reverse) (reverse)
(map (comp :text :value-detection)))) (map (comp :text :value-detection))
(filter #(not (str/blank? %)))))
(defn clean-customer [c] (defn clean-customer [c]
(clojure.string/replace c #"\W+" " ")) (clojure.string/replace c #"\W+" " "))
@@ -60,7 +61,7 @@
[[] #{}] [[] #{}]
xs))) xs)))
(defn textract->textract-invoice [job-id tx] (defn textract->textract-invoice [id tx]
(let [lookup (lookup tx) (let [lookup (lookup tx)
total-options (->> (stack-rank #{"AMOUNT_DUE"} lookup) total-options (->> (stack-rank #{"AMOUNT_DUE"} lookup)
(map (fn [t] (map (fn [t]
@@ -93,7 +94,7 @@
(map (comp #(Long/parseLong %) :id)) (map (comp #(Long/parseLong %) :id))
first)])) first)]))
(deduplicate)) (deduplicate))
date-options (->> (stack-rank #{"ORDER_DATE" "DELIVERY_DATE"} lookup) date-options (->> (stack-rank #{"INVOICE_RECEIPT_DATE" "ORDER_DATE" "DELIVERY_DATE"} lookup)
(map (fn [t] (map (fn [t]
[t (or (some-> (and (re-find #"\d{1,2}\/\d{1,2}/\d{4,4}" t) (atime/parse t "MM/dd/yyyy")) [t (or (some-> (and (re-find #"\d{1,2}\/\d{1,2}/\d{4,4}" t) (atime/parse t "MM/dd/yyyy"))
(coerce/to-date)) (coerce/to-date))
@@ -105,7 +106,7 @@
[t t])) [t t]))
(deduplicate))] (deduplicate))]
#:textract-invoice #:textract-invoice
{:db/id [:textract-invoice/job-id job-id] {:db/id id
:textract-status "SUCCEEDED" :textract-status "SUCCEEDED"
:total (first total-options) :total (first total-options)
:total-options (seq total-options) :total-options (seq total-options)
@@ -135,6 +136,7 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
success: function(file, response) { success: function(file, response) {
window.location.href = file.xhr.responseURL; window.location.href = file.xhr.responseURL;
}, },
acceptedFiles: 'application/pdf,.pdf',
disablePreviews: true disablePreviews: true
}); ")]]) }); ")]])
@@ -152,38 +154,34 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
vendor)] vendor)]
[vn [(:db/id real-vendor) (:vendor/name real-vendor)]]))) [vn [(:db/id real-vendor) (:vendor/name real-vendor)]])))
(defn get-job [job-id] (defn get-job [id]
(-> (dc/pull (dc/db conn) '[*] [:textract-invoice/job-id job-id]) (-> (dc/pull (dc/db conn) '[*] id)
(update :textract-invoice/customer-identifier customer-identifier-id->customer-identifier-client) (update :textract-invoice/customer-identifier customer-identifier-id->customer-identifier-client)
(update :textract-invoice/customer-identifier-options #(map customer-identifier-id->customer-identifier-client %) ) (update :textract-invoice/customer-identifier-options #(map customer-identifier-id->customer-identifier-client %) )
(update :textract-invoice/vendor-name vendor-name-tuple->vendor-tuple) (update :textract-invoice/vendor-name vendor-name-tuple->vendor-tuple)
(update :textract-invoice/vendor-name-options #(map vendor-name-tuple->vendor-tuple %) ))) (update :textract-invoice/vendor-name-options #(map vendor-name-tuple->vendor-tuple %) )))
(defn refresh-job [job-id] (defn refresh-job [id]
(let [{:keys [:db/id :textract-invoice/textract-status]} (dc/pull (dc/db conn) '[:db/id :textract-invoice/textract-status] [:textract-invoice/job-id job-id])] (let [{:keys [:db/id :textract-invoice/job-id :textract-invoice/textract-status]} (get-job id)]
(when (= "IN_PROGRESS" textract-status) (when (and job-id (= "IN_PROGRESS" textract-status))
(let [result (textract/get-expense-analysis {:job-id job-id}) (let [result (textract/get-expense-analysis {:job-id job-id})
new-status (:job-status result)] new-status (:job-status result)]
(cond (= "SUCCEEDED" new-status) (cond (= "SUCCEEDED" new-status)
@(dc/transact conn [[:upsert-entity (textract->textract-invoice job-id result)]]) @(dc/transact conn [[:upsert-entity (textract->textract-invoice id result)]])
:else :else
@(dc/transact conn [{:db/id id :textract-invoice/textract-status new-status}])))) @(dc/transact conn [{:db/id id :textract-invoice/textract-status new-status}]))))
(get-job job-id))) (get-job id)))
(defn pill-list* [{:keys [selected options class ->text ->value job-id field]}] (defn pill-list* [{:keys [selected options class ->text ->value id field]}]
(let [options (->> options (let [options (->> options
(filter (complement #{selected})) (filter (complement #{selected}))
(map (fn [x] (map (fn [x]
[:div.shrink (com/pill {:color :secondary} (com/link {:hx-patch (str (bidi/path-for ssr-routes/only-routes [:div.shrink
:invoice-glimpse-update-job (com/pill {:color :secondary}
:job-id job-id) (com/link {:hx-patch (str (bidi/path-for ssr-routes/only-routes :invoice-glimpse-update-textract-invoice :textract-invoice-id id) "?" (url/map->query {field (if ->value (->value x) (->text x))}))
"?" :hx-target "closest form"
(url/map->query {field (if ->value :href "#"} (->text x)))]) ))]
(->value x)
(->text x))}))
:hx-target "closest form"
:href "#"} (->text x)))]) ))]
(when (seq options) (when (seq options)
[:div.col-span-6.col-start-1.text-xs [:div.col-span-6.col-start-1.text-xs
"Alternates: " "Alternates: "
@@ -192,8 +190,8 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
(defn textract->invoice-form* [textract-invoice] (defn textract->invoice-form* [textract-invoice]
[:form {:hx-post (bidi/path-for ssr-routes/only-routes [:form {:hx-post (bidi/path-for ssr-routes/only-routes
:invoice-glimpse-create :invoice-glimpse-create-invoice
:job-id (:textract-invoice/job-id textract-invoice))} :textract-invoice-id (:db/id textract-invoice))}
[:div.grid.grid-cols-6.gap-4.mb-4 [:div.grid.grid-cols-6.gap-4.mb-4
[:div.col-span-6 [:div.col-span-6
(com/field {:label "Client"} (com/field {:label "Client"}
@@ -204,7 +202,7 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
:autofocus true}))] :autofocus true}))]
(pill-list* {:selected (:textract-invoice/customer-identifier textract-invoice) (pill-list* {:selected (:textract-invoice/customer-identifier textract-invoice)
:options (:textract-invoice/customer-identifier-options textract-invoice) :options (:textract-invoice/customer-identifier-options textract-invoice)
:job-id (:textract-invoice/job-id textract-invoice) :id (:db/id textract-invoice)
:class "flex-col" :class "flex-col"
:field "client" :field "client"
:->text (fn [[customer-identifier [id client-name]]] :->text (fn [[customer-identifier [id client-name]]]
@@ -219,7 +217,7 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
:placeholder "Vendor"}))] :placeholder "Vendor"}))]
(pill-list* {:selected (:textract-invoice/vendor-name textract-invoice) (pill-list* {:selected (:textract-invoice/vendor-name textract-invoice)
:options (:textract-invoice/vendor-name-options textract-invoice) :options (:textract-invoice/vendor-name-options textract-invoice)
:job-id (:textract-invoice/job-id textract-invoice) :id (:db/id textract-invoice)
:class "flex-row" :class "flex-row"
:field "vendor" :field "vendor"
:->text (fn [[vendor-identifier [id vendor-name]]] :->text (fn [[vendor-identifier [id vendor-name]]]
@@ -237,7 +235,7 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
:placeholder "Date"}))] :placeholder "Date"}))]
(pill-list* {:selected (:textract-invoice/date textract-invoice) (pill-list* {:selected (:textract-invoice/date textract-invoice)
:options (:textract-invoice/date-options textract-invoice) :options (:textract-invoice/date-options textract-invoice)
:job-id (:textract-invoice/job-id textract-invoice) :id (:db/id textract-invoice)
:field "date" :field "date"
:->text (fn [[_ date]] :->text (fn [[_ date]]
(-> date (-> date
@@ -252,7 +250,7 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
:placeholder "Total"}))] :placeholder "Total"}))]
(pill-list* {:selected (:textract-invoice/total textract-invoice) (pill-list* {:selected (:textract-invoice/total textract-invoice)
:options (:textract-invoice/total-options textract-invoice) :options (:textract-invoice/total-options textract-invoice)
:job-id (:textract-invoice/job-id textract-invoice) :id (:db/id textract-invoice)
:field "total" :field "total"
:->text (fn [[_ amount]] :->text (fn [[_ amount]]
(str amount))}) (str amount))})
@@ -265,59 +263,65 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
:placeholder "Invoice Number"}))] :placeholder "Invoice Number"}))]
(pill-list* {:selected (:textract-invoice/invoice-number textract-invoice) (pill-list* {:selected (:textract-invoice/invoice-number textract-invoice)
:field "invoice-number" :field "invoice-number"
:job-id (:textract-invoice/job-id textract-invoice) :id (:db/id textract-invoice)
:options (:textract-invoice/invoice-number-options textract-invoice) :options (:textract-invoice/invoice-number-options textract-invoice)
:->text (fn [[_ invoice-number]] :->text (fn [[_ invoice-number]]
(str invoice-number))})] (str invoice-number))})]
(com/button {:color :primary} "Save")]) (com/button {:color :primary} "Save")])
(defn job-progress* [job-id] (defn job-progress* [id]
(when (pull-id (dc/db conn) [:textract-invoice/job-id job-id]) (let [textract-invoice (refresh-job id)]
(let [textract-invoice (refresh-job job-id)] (cond
(cond (= "IN_PROGRESS" (:textract-invoice/textract-status textract-invoice))
(= "IN_PROGRESS" (:textract-invoice/textract-status textract-invoice)) [:div.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer
[:div.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer {:hx-get (str (bidi/path-for ssr-routes/only-routes
{:hx-get (str (bidi/path-for ssr-routes/only-routes :invoice-glimpse-textract-invoice
:invoice-glimpse-job :textract-invoice-id (:db/id textract-invoice)))
:job-id (:textract-invoice/job-id textract-invoice))) :hx-trigger "load delay:5s"
:hx-trigger "load delay:5s" :hx-swap "outerHTML"}
:hx-swap "outerHTML"} "Analyzing job " (some-> textract-invoice
"Analyzing job " (subs (:textract-invoice/job-id textract-invoice) 0 8) "..."] :textract-invoice/job-id
(= "SUCCEEDED" (:textract-invoice/textract-status textract-invoice)) (subs 0 8)) "..."]
[:div.px-4 (= "SUCCEEDED" (:textract-invoice/textract-status textract-invoice))
[:div.px-4
[:div.flex.flex-row.space-x-4
[:div {:style {:width "805"}} [:div.flex.flex-row.space-x-4
(com/card {} [:div {:style {:width "805"}}
[:iframe.p-4 {:src (:textract-invoice/pdf-url textract-invoice) :width 791 :height 700}])] (com/card {}
[:div {:class "basis-1/4"} [:iframe.p-4 {:src (:textract-invoice/pdf-url textract-invoice) :width 791 :height 700}])]
(com/card {} [:div {:class "basis-1/4"}
[:div.p-4 (com/card {}
(textract->invoice-form* textract-invoice)])]]])))) [:div.p-4
(textract->invoice-form* textract-invoice)])]]])))
(defn page* [job-id] (defn page* [id]
[:div#invoice-glimpse-content.mt-4 [:div#invoice-glimpse-content.mt-4
(com/card {} (com/card {}
[:div.px-4.py-3.space-y-4.flex.flex-col [:div.px-4.py-3.space-y-4.flex.flex-col
[:div.flex.gap-x-4 [:h1.text-2xl.font-bold "Invoice Glimpse"] [:div (com/pill {:color :primary} "Beta")] [:div.flex.gap-x-4 [:h1.text-2xl.font-bold "Invoice Glimpse"] [:div (com/pill {:color :primary} "Beta")]
(when job-id (when id
[:div.ml-auto [:a.mb-2 {:href (bidi/path-for ssr-routes/only-routes [:div.ml-auto [:a.mb-2 {:href (bidi/path-for ssr-routes/only-routes
:invoice-glimpse)} :invoice-glimpse)}
(com/button {:color :secondary} "New glimpse")]])] (com/button {:color :secondary} "New glimpse")]])]
[:p.text-sm.italic "Import your invoices with the power of AI. Please only use PDFs with a single invoice in them."] [:p.text-sm.italic "Import your invoices with the power of AI. Please only use PDFs with a single invoice in them."]
(when job-id (when id
(job-progress* job-id)) (job-progress* id))
(when-not job-id (when-not id
(upload-form*))])]) (upload-form*))])])
(defn begin-textract-file [s3-location] (defn begin-textract-file [s3-location]
(let [analysis (textract/start-expense-analysis {:document-location {:s3-object {:bucket (:data-bucket env) :name s3-location}}}) (let [tempid (random-tempid)
textract-invoice {:textract-invoice/job-id (:job-id analysis)
:textract-invoice/textract-status "IN_PROGRESS" id (get-in @(dc/transact conn [{:db/id tempid
:textract-invoice/pdf-url (str "http://" bucket-name ".s3-website-us-east-1.amazonaws.com/" s3-location)}] :textract-invoice/textract-status "IN_PROGRESS"
@(dc/transact conn [textract-invoice]) :textract-invoice/pdf-url (str "http://" bucket-name ".s3-website-us-east-1.amazonaws.com/" s3-location)}])
textract-invoice)) [:tempids tempid])]
(future (let [analysis (textract/start-expense-analysis {:document-location {:s3-object {:bucket (:data-bucket env) :name s3-location}}
:client-request-token (str id)})]
@(dc/transact conn [{:db/id id
:textract-invoice/job-id (:job-id analysis)}])))
(get-job id)))
(defn textract-invoice->invoice [textract-invoice] (defn textract-invoice->invoice [textract-invoice]
(mu/with-context {:textract-invoice textract-invoice} (mu/with-context {:textract-invoice textract-invoice}
@@ -343,46 +347,45 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
:invoice/outstanding-balance total :invoice/outstanding-balance total
:invoice/status :invoice-status/unpaid})))) :invoice/status :invoice-status/unpaid}))))
(defn update-textract-invoice [job-id {:strs [date total invoice-number client vendor]}] (defn update-textract-invoice- [id {:strs [date total invoice-number client vendor]}]
@(dc/transact-async conn [[:upsert-entity (cond-> {:db/id [:textract-invoice/job-id job-id]} @(dc/transact-async conn [[:upsert-entity (cond-> {:db/id id}
date (assoc :textract-invoice/date [date (coerce/to-date (atime/parse date atime/iso-date))]) date (assoc :textract-invoice/date [date (coerce/to-date (atime/parse date atime/iso-date))])
total (assoc :textract-invoice/total [total (Double/parseDouble total)]) total (assoc :textract-invoice/total [total (Double/parseDouble total)])
invoice-number (assoc :textract-invoice/invoice-number [invoice-number invoice-number]) invoice-number (assoc :textract-invoice/invoice-number [invoice-number invoice-number])
client (assoc :textract-invoice/customer-identifier [(pull-attr (dc/db conn) :client/name (Long/parseLong client)) (Long/parseLong client)]) client (assoc :textract-invoice/customer-identifier [(pull-attr (dc/db conn) :client/name (Long/parseLong client)) (Long/parseLong client)])
vendor (assoc :textract-invoice/vendor-name [(pull-attr (dc/db conn) :vendor/name (Long/parseLong vendor)) (Long/parseLong vendor)]))]]) vendor (assoc :textract-invoice/vendor-name [(pull-attr (dc/db conn) :vendor/name (Long/parseLong vendor)) (Long/parseLong vendor)]))]])
(get-job job-id)) (get-job id))
(defn upload [{:keys [identity] :as request}] (defn upload [{:keys [identity] :as request}]
(let [file (or (get (:params request) :file) (let [file (or (get (:params request) :file)
(get (:params request) "file"))] (get (:params request) "file"))]
(mu/log ::uploading-file (mu/log ::uploading-file
:file file) :file file)
(with-open [s (io/input-stream (:tempfile file))] (try
(try (let [s3-location (str "textract-files/" (UUID/randomUUID) "." (last (str/split (:filename file) #"[\\.]")))
(let [s3-location (str "textract-files/" (UUID/randomUUID) "." (last (str/split (:filename file) #"[\\.]"))) _ (with-open [stream (io/input-stream (:tempfile file))]
_ (with-open [stream (io/input-stream (:tempfile file))] (s3/put-object (:data-bucket env)
(s3/put-object (:data-bucket env) s3-location
s3-location stream
stream {:content-type "application/pdf"
{:content-type "application/pdf" :content-length (.length (:tempfile file))}))
:content-length (.length (:tempfile file))})) textract-invoice (begin-textract-file s3-location)]
textract-invoice (begin-textract-file s3-location)] {:headers {"Location"
{:headers {"Location" (str (bidi/path-for ssr-routes/only-routes
(str (bidi/path-for ssr-routes/only-routes :invoice-glimpse-textract-invoice
:invoice-glimpse-job :textract-invoice-id (:db/id textract-invoice)))}
:job-id (:textract-invoice/job-id textract-invoice)))} :status 302})
:status 302}) (catch Exception e
(catch Exception e (alog/error ::cant-begin-textract
(alog/error ::cant-begin-textract
:error e) :error e)
(html-response [:div (.getMessage e)])))))) (html-response [:div (.getMessage e)])))))
(defn update-job [{:as request}] (defn update-textract-invoice [{:as request}]
(let [current-job (update-textract-invoice (:job-id (:route-params request)) (:query-params request))] (let [current-job (update-textract-invoice- (Long/parseLong (:textract-invoice-id (:route-params request))) (:query-params request))]
(html-response (textract->invoice-form* current-job)))) (html-response (textract->invoice-form* current-job))))
(defn create [request] (defn create-invoice [request]
(let [current-job (update-textract-invoice (:job-id (:route-params request)) (:form-params request)) (let [current-job (update-textract-invoice- (Long/parseLong (:textract-invoice-id (:route-params request))) (:form-params request))
new-invoice (textract-invoice->invoice current-job) new-invoice (textract-invoice->invoice current-job)
new-invoice-id (get-in @(dc/transact conn [[:propose-invoice new-invoice]]) new-invoice-id (get-in @(dc/transact conn [[:propose-invoice new-invoice]])
[:tempids (:db/id new-invoice)]) [:tempids (:db/id new-invoice)])
@@ -422,11 +425,17 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
[:a {:href (bidi/path-for ssr-routes/only-routes [:a {:href (bidi/path-for ssr-routes/only-routes
:invoice-glimpse)} :invoice-glimpse)}
"Glimpse"]) "Glimpse"])
(page* (:job-id (:route-params request)))) (page* (some-> request
:route-params
:textract-invoice-id
Long/parseLong)))
"Invoice Glimpse")) "Invoice Glimpse"))
(defn job-progress [request] (defn textract-invoice [request]
(if (get-in request [:headers "hx-request"]) (if (get-in request [:headers "hx-request"])
(html-response (job-progress* (:job-id (:route-params request)))) (html-response (job-progress* (some-> request
:route-params
:textract-invoice-id
Long/parseLong)))
(page request))) (page request)))

View File

@@ -3,10 +3,10 @@
(def routes {"logout" :logout (def routes {"logout" :logout
"search" :search "search" :search
"invoice" {"/glimpse" {"" {:get :invoice-glimpse "invoice" {"/glimpse" {"" {:get :invoice-glimpse
:post :invoice-glimpse-upload} :post :invoice-glimpse-upload
"/job" {["/" [#"\w+" :job-id]] {:get :invoice-glimpse-job ["/" [#"\w+" :textract-invoice-id]] {:get :invoice-glimpse-textract-invoice
"/create" {:post :invoice-glimpse-create} "/create" {:post :invoice-glimpse-create-invoice}
"/update" {:patch :invoice-glimpse-update-job}}}}} "/update" {:patch :invoice-glimpse-update-textract-invoice}}}}}
"admin" {"/history" {"" :admin-history "admin" {"/history" {"" :admin-history
"/" :admin-history "/" :admin-history
#"/search/?" :admin-history-search #"/search/?" :admin-history-search