Invoices can now be imported with the POWER OF AI

This commit is contained in:
Bryce
2023-08-03 21:33:15 -07:00
parent 88eda03e7f
commit ea4ccf731e
3 changed files with 106 additions and 97 deletions

View File

@@ -42,9 +42,9 @@
:company-reports-delete (wrap-client-redirect-unauthenticated (wrap-admin company-reports/delete-report))
:invoice-glimpse (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/page))
:invoice-glimpse-upload (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/upload))
:invoice-glimpse-job (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/job-progress))
:invoice-glimpse-create (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/create))
:invoice-glimpse-update-job (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/update-job))
:invoice-glimpse-textract-invoice (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/textract-invoice))
:invoice-glimpse-create-invoice (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/create-invoice))
:invoice-glimpse-update-textract-invoice (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/update-textract-invoice))
:transaction-insights (wrap-client-redirect-unauthenticated (wrap-admin insights/page))
:transaction-insight-table (wrap-client-redirect-unauthenticated (wrap-admin insights/insight-table))
:transaction-insight-rows (wrap-client-redirect-unauthenticated (wrap-admin insights/transaction-rows))

View File

@@ -44,7 +44,8 @@
(sort-by #(* (-> % :type :confidence)
(-> % :value-detection :confidence)))
(reverse)
(map (comp :text :value-detection))))
(map (comp :text :value-detection))
(filter #(not (str/blank? %)))))
(defn clean-customer [c]
(clojure.string/replace c #"\W+" " "))
@@ -60,7 +61,7 @@
[[] #{}]
xs)))
(defn textract->textract-invoice [job-id tx]
(defn textract->textract-invoice [id tx]
(let [lookup (lookup tx)
total-options (->> (stack-rank #{"AMOUNT_DUE"} lookup)
(map (fn [t]
@@ -93,7 +94,7 @@
(map (comp #(Long/parseLong %) :id))
first)]))
(deduplicate))
date-options (->> (stack-rank #{"ORDER_DATE" "DELIVERY_DATE"} lookup)
date-options (->> (stack-rank #{"INVOICE_RECEIPT_DATE" "ORDER_DATE" "DELIVERY_DATE"} lookup)
(map (fn [t]
[t (or (some-> (and (re-find #"\d{1,2}\/\d{1,2}/\d{4,4}" t) (atime/parse t "MM/dd/yyyy"))
(coerce/to-date))
@@ -105,7 +106,7 @@
[t t]))
(deduplicate))]
#:textract-invoice
{:db/id [:textract-invoice/job-id job-id]
{:db/id id
:textract-status "SUCCEEDED"
:total (first total-options)
:total-options (seq total-options)
@@ -135,6 +136,7 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
success: function(file, response) {
window.location.href = file.xhr.responseURL;
},
acceptedFiles: 'application/pdf,.pdf',
disablePreviews: true
}); ")]])
@@ -152,38 +154,34 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
vendor)]
[vn [(:db/id real-vendor) (:vendor/name real-vendor)]])))
(defn get-job [job-id]
(-> (dc/pull (dc/db conn) '[*] [:textract-invoice/job-id job-id])
(defn get-job [id]
(-> (dc/pull (dc/db conn) '[*] id)
(update :textract-invoice/customer-identifier customer-identifier-id->customer-identifier-client)
(update :textract-invoice/customer-identifier-options #(map customer-identifier-id->customer-identifier-client %) )
(update :textract-invoice/vendor-name vendor-name-tuple->vendor-tuple)
(update :textract-invoice/vendor-name-options #(map vendor-name-tuple->vendor-tuple %) )))
(defn refresh-job [job-id]
(let [{:keys [:db/id :textract-invoice/textract-status]} (dc/pull (dc/db conn) '[:db/id :textract-invoice/textract-status] [:textract-invoice/job-id job-id])]
(when (= "IN_PROGRESS" textract-status)
(defn refresh-job [id]
(let [{:keys [:db/id :textract-invoice/job-id :textract-invoice/textract-status]} (get-job id)]
(when (and job-id (= "IN_PROGRESS" textract-status))
(let [result (textract/get-expense-analysis {:job-id job-id})
new-status (:job-status result)]
(cond (= "SUCCEEDED" new-status)
@(dc/transact conn [[:upsert-entity (textract->textract-invoice job-id result)]])
@(dc/transact conn [[:upsert-entity (textract->textract-invoice id result)]])
:else
@(dc/transact conn [{:db/id id :textract-invoice/textract-status new-status}]))))
(get-job job-id)))
(get-job id)))
(defn pill-list* [{:keys [selected options class ->text ->value job-id field]}]
(defn pill-list* [{:keys [selected options class ->text ->value id field]}]
(let [options (->> options
(filter (complement #{selected}))
(map (fn [x]
[:div.shrink (com/pill {:color :secondary} (com/link {:hx-patch (str (bidi/path-for ssr-routes/only-routes
:invoice-glimpse-update-job
:job-id job-id)
"?"
(url/map->query {field (if ->value
(->value x)
(->text x))}))
:hx-target "closest form"
:href "#"} (->text x)))]) ))]
[:div.shrink
(com/pill {:color :secondary}
(com/link {:hx-patch (str (bidi/path-for ssr-routes/only-routes :invoice-glimpse-update-textract-invoice :textract-invoice-id id) "?" (url/map->query {field (if ->value (->value x) (->text x))}))
:hx-target "closest form"
:href "#"} (->text x)))]) ))]
(when (seq options)
[:div.col-span-6.col-start-1.text-xs
"Alternates: "
@@ -192,8 +190,8 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
(defn textract->invoice-form* [textract-invoice]
[:form {:hx-post (bidi/path-for ssr-routes/only-routes
:invoice-glimpse-create
:job-id (:textract-invoice/job-id textract-invoice))}
:invoice-glimpse-create-invoice
:textract-invoice-id (:db/id textract-invoice))}
[:div.grid.grid-cols-6.gap-4.mb-4
[:div.col-span-6
(com/field {:label "Client"}
@@ -204,7 +202,7 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
:autofocus true}))]
(pill-list* {:selected (:textract-invoice/customer-identifier textract-invoice)
:options (:textract-invoice/customer-identifier-options textract-invoice)
:job-id (:textract-invoice/job-id textract-invoice)
:id (:db/id textract-invoice)
:class "flex-col"
:field "client"
:->text (fn [[customer-identifier [id client-name]]]
@@ -219,7 +217,7 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
:placeholder "Vendor"}))]
(pill-list* {:selected (:textract-invoice/vendor-name textract-invoice)
:options (:textract-invoice/vendor-name-options textract-invoice)
:job-id (:textract-invoice/job-id textract-invoice)
:id (:db/id textract-invoice)
:class "flex-row"
:field "vendor"
:->text (fn [[vendor-identifier [id vendor-name]]]
@@ -237,7 +235,7 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
:placeholder "Date"}))]
(pill-list* {:selected (:textract-invoice/date textract-invoice)
:options (:textract-invoice/date-options textract-invoice)
:job-id (:textract-invoice/job-id textract-invoice)
:id (:db/id textract-invoice)
:field "date"
:->text (fn [[_ date]]
(-> date
@@ -252,7 +250,7 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
:placeholder "Total"}))]
(pill-list* {:selected (:textract-invoice/total textract-invoice)
:options (:textract-invoice/total-options textract-invoice)
:job-id (:textract-invoice/job-id textract-invoice)
:id (:db/id textract-invoice)
:field "total"
:->text (fn [[_ amount]]
(str amount))})
@@ -265,59 +263,65 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
:placeholder "Invoice Number"}))]
(pill-list* {:selected (:textract-invoice/invoice-number textract-invoice)
:field "invoice-number"
:job-id (:textract-invoice/job-id textract-invoice)
:id (:db/id textract-invoice)
:options (:textract-invoice/invoice-number-options textract-invoice)
:->text (fn [[_ invoice-number]]
(str invoice-number))})]
(com/button {:color :primary} "Save")])
(defn job-progress* [job-id]
(when (pull-id (dc/db conn) [:textract-invoice/job-id job-id])
(let [textract-invoice (refresh-job job-id)]
(cond
(= "IN_PROGRESS" (:textract-invoice/textract-status textract-invoice))
[:div.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer
{:hx-get (str (bidi/path-for ssr-routes/only-routes
:invoice-glimpse-job
:job-id (:textract-invoice/job-id textract-invoice)))
:hx-trigger "load delay:5s"
:hx-swap "outerHTML"}
"Analyzing job " (subs (:textract-invoice/job-id textract-invoice) 0 8) "..."]
(= "SUCCEEDED" (:textract-invoice/textract-status textract-invoice))
[:div.px-4
[:div.flex.flex-row.space-x-4
[:div {:style {:width "805"}}
(com/card {}
[:iframe.p-4 {:src (:textract-invoice/pdf-url textract-invoice) :width 791 :height 700}])]
[:div {:class "basis-1/4"}
(com/card {}
[:div.p-4
(textract->invoice-form* textract-invoice)])]]]))))
(defn job-progress* [id]
(let [textract-invoice (refresh-job id)]
(cond
(= "IN_PROGRESS" (:textract-invoice/textract-status textract-invoice))
[:div.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer
{:hx-get (str (bidi/path-for ssr-routes/only-routes
:invoice-glimpse-textract-invoice
:textract-invoice-id (:db/id textract-invoice)))
:hx-trigger "load delay:5s"
:hx-swap "outerHTML"}
"Analyzing job " (some-> textract-invoice
:textract-invoice/job-id
(subs 0 8)) "..."]
(= "SUCCEEDED" (:textract-invoice/textract-status textract-invoice))
[:div.px-4
[:div.flex.flex-row.space-x-4
[:div {:style {:width "805"}}
(com/card {}
[:iframe.p-4 {:src (:textract-invoice/pdf-url textract-invoice) :width 791 :height 700}])]
[:div {:class "basis-1/4"}
(com/card {}
[:div.p-4
(textract->invoice-form* textract-invoice)])]]])))
(defn page* [job-id]
(defn page* [id]
[:div#invoice-glimpse-content.mt-4
(com/card {}
[:div.px-4.py-3.space-y-4.flex.flex-col
[:div.flex.gap-x-4 [:h1.text-2xl.font-bold "Invoice Glimpse"] [:div (com/pill {:color :primary} "Beta")]
(when job-id
(when id
[:div.ml-auto [:a.mb-2 {:href (bidi/path-for ssr-routes/only-routes
:invoice-glimpse)}
(com/button {:color :secondary} "New glimpse")]])]
[:p.text-sm.italic "Import your invoices with the power of AI. Please only use PDFs with a single invoice in them."]
(when job-id
(job-progress* job-id))
(when-not job-id
(when id
(job-progress* id))
(when-not id
(upload-form*))])])
(defn begin-textract-file [s3-location]
(let [analysis (textract/start-expense-analysis {:document-location {:s3-object {:bucket (:data-bucket env) :name s3-location}}})
textract-invoice {:textract-invoice/job-id (:job-id analysis)
:textract-invoice/textract-status "IN_PROGRESS"
:textract-invoice/pdf-url (str "http://" bucket-name ".s3-website-us-east-1.amazonaws.com/" s3-location)}]
@(dc/transact conn [textract-invoice])
textract-invoice))
(let [tempid (random-tempid)
id (get-in @(dc/transact conn [{:db/id tempid
:textract-invoice/textract-status "IN_PROGRESS"
:textract-invoice/pdf-url (str "http://" bucket-name ".s3-website-us-east-1.amazonaws.com/" s3-location)}])
[:tempids tempid])]
(future (let [analysis (textract/start-expense-analysis {:document-location {:s3-object {:bucket (:data-bucket env) :name s3-location}}
:client-request-token (str id)})]
@(dc/transact conn [{:db/id id
:textract-invoice/job-id (:job-id analysis)}])))
(get-job id)))
(defn textract-invoice->invoice [textract-invoice]
(mu/with-context {:textract-invoice textract-invoice}
@@ -343,46 +347,45 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
:invoice/outstanding-balance total
:invoice/status :invoice-status/unpaid}))))
(defn update-textract-invoice [job-id {:strs [date total invoice-number client vendor]}]
@(dc/transact-async conn [[:upsert-entity (cond-> {:db/id [:textract-invoice/job-id job-id]}
(defn update-textract-invoice- [id {:strs [date total invoice-number client vendor]}]
@(dc/transact-async conn [[:upsert-entity (cond-> {:db/id id}
date (assoc :textract-invoice/date [date (coerce/to-date (atime/parse date atime/iso-date))])
total (assoc :textract-invoice/total [total (Double/parseDouble total)])
invoice-number (assoc :textract-invoice/invoice-number [invoice-number invoice-number])
client (assoc :textract-invoice/customer-identifier [(pull-attr (dc/db conn) :client/name (Long/parseLong client)) (Long/parseLong client)])
vendor (assoc :textract-invoice/vendor-name [(pull-attr (dc/db conn) :vendor/name (Long/parseLong vendor)) (Long/parseLong vendor)]))]])
(get-job job-id))
(get-job id))
(defn upload [{:keys [identity] :as request}]
(let [file (or (get (:params request) :file)
(get (:params request) "file"))]
(mu/log ::uploading-file
:file file)
(with-open [s (io/input-stream (:tempfile file))]
(try
(let [s3-location (str "textract-files/" (UUID/randomUUID) "." (last (str/split (:filename file) #"[\\.]")))
_ (with-open [stream (io/input-stream (:tempfile file))]
(s3/put-object (:data-bucket env)
s3-location
stream
{:content-type "application/pdf"
:content-length (.length (:tempfile file))}))
textract-invoice (begin-textract-file s3-location)]
{:headers {"Location"
(str (bidi/path-for ssr-routes/only-routes
:invoice-glimpse-job
:job-id (:textract-invoice/job-id textract-invoice)))}
:status 302})
(catch Exception e
(alog/error ::cant-begin-textract
(try
(let [s3-location (str "textract-files/" (UUID/randomUUID) "." (last (str/split (:filename file) #"[\\.]")))
_ (with-open [stream (io/input-stream (:tempfile file))]
(s3/put-object (:data-bucket env)
s3-location
stream
{:content-type "application/pdf"
:content-length (.length (:tempfile file))}))
textract-invoice (begin-textract-file s3-location)]
{:headers {"Location"
(str (bidi/path-for ssr-routes/only-routes
:invoice-glimpse-textract-invoice
:textract-invoice-id (:db/id textract-invoice)))}
:status 302})
(catch Exception e
(alog/error ::cant-begin-textract
:error e)
(html-response [:div (.getMessage e)]))))))
(html-response [:div (.getMessage e)])))))
(defn update-job [{:as request}]
(let [current-job (update-textract-invoice (:job-id (:route-params request)) (:query-params request))]
(defn update-textract-invoice [{:as request}]
(let [current-job (update-textract-invoice- (Long/parseLong (:textract-invoice-id (:route-params request))) (:query-params request))]
(html-response (textract->invoice-form* current-job))))
(defn create [request]
(let [current-job (update-textract-invoice (:job-id (:route-params request)) (:form-params request))
(defn create-invoice [request]
(let [current-job (update-textract-invoice- (Long/parseLong (:textract-invoice-id (:route-params request))) (:form-params request))
new-invoice (textract-invoice->invoice current-job)
new-invoice-id (get-in @(dc/transact conn [[:propose-invoice new-invoice]])
[:tempids (:db/id new-invoice)])
@@ -422,11 +425,17 @@ invoice_dropzone = new Dropzone(\"#invoice\", {
[:a {:href (bidi/path-for ssr-routes/only-routes
:invoice-glimpse)}
"Glimpse"])
(page* (:job-id (:route-params request))))
(page* (some-> request
:route-params
:textract-invoice-id
Long/parseLong)))
"Invoice Glimpse"))
(defn job-progress [request]
(defn textract-invoice [request]
(if (get-in request [:headers "hx-request"])
(html-response (job-progress* (:job-id (:route-params request))))
(html-response (job-progress* (some-> request
:route-params
:textract-invoice-id
Long/parseLong)))
(page request)))

View File

@@ -3,10 +3,10 @@
(def routes {"logout" :logout
"search" :search
"invoice" {"/glimpse" {"" {:get :invoice-glimpse
:post :invoice-glimpse-upload}
"/job" {["/" [#"\w+" :job-id]] {:get :invoice-glimpse-job
"/create" {:post :invoice-glimpse-create}
"/update" {:patch :invoice-glimpse-update-job}}}}}
:post :invoice-glimpse-upload
["/" [#"\w+" :textract-invoice-id]] {:get :invoice-glimpse-textract-invoice
"/create" {:post :invoice-glimpse-create-invoice}
"/update" {:patch :invoice-glimpse-update-textract-invoice}}}}}
"admin" {"/history" {"" :admin-history
"/" :admin-history
#"/search/?" :admin-history-search