diff --git a/package-lock.json b/package-lock.json index f0367b9e..4018dafc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -2827,9 +2827,9 @@ "peer": true }, "node_modules/yaml": { - "version": "2.2.2", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.2.2.tgz", - "integrity": "sha512-CBKFWExMn46Foo4cldiChEzn7S7SRV+wqiluAb6xmueD/fGyRHIhX8m14vVGgeFWjN540nKCNVj6P21eQjgTuA==", + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.1.tgz", + "integrity": "sha512-2eHWfjaoXgTBC2jNM1LRef62VQa0umtvRiDSk6HSzW7RvS5YtkabJrwYLLEKWBc8a5U2PTSCs+dJjUTJdlHsWQ==", "dev": true, "peer": true, "engines": { @@ -4976,9 +4976,9 @@ "peer": true }, "yaml": { - "version": "2.2.2", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.2.2.tgz", - "integrity": "sha512-CBKFWExMn46Foo4cldiChEzn7S7SRV+wqiluAb6xmueD/fGyRHIhX8m14vVGgeFWjN540nKCNVj6P21eQjgTuA==", + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.1.tgz", + "integrity": "sha512-2eHWfjaoXgTBC2jNM1LRef62VQa0umtvRiDSk6HSzW7RvS5YtkabJrwYLLEKWBc8a5U2PTSCs+dJjUTJdlHsWQ==", "dev": true, "peer": true } diff --git a/resources/public/output.css b/resources/public/output.css index 251d5b1f..4da0b717 100644 --- a/resources/public/output.css +++ b/resources/public/output.css @@ -1131,6 +1131,10 @@ input:checked + .toggle-bg { grid-column: span 6 / span 6; } +.col-start-1 { + grid-column-start: 1; +} + .m-4 { margin: 1rem; } @@ -1376,12 +1380,20 @@ input:checked + .toggle-bg { max-width: 1024px; } -.flex-1 { - flex: 1 1 0%; +.max-w-sm { + max-width: 24rem; } -.flex-none { - flex: none; +.max-w-md { + max-width: 28rem; +} + +.max-w-xl { + max-width: 36rem; +} + +.flex-1 { + flex: 1 1 0%; } .flex-shrink { @@ -1392,6 +1404,14 @@ input:checked + .toggle-bg { flex-shrink: 0; } +.basis-1\/2 { + flex-basis: 50%; +} + +.basis-1\/4 { + flex-basis: 25%; +} + .-translate-x-full { --tw-translate-x: -100%; transform: translate(var(--tw-translate-x), var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y)); @@ -1476,6 +1496,10 @@ input:checked + .toggle-bg { grid-template-columns: repeat(7, minmax(0, 1fr)); } +.flex-row { + flex-direction: row; +} + .flex-row-reverse { flex-direction: row-reverse; } @@ -1488,10 +1512,6 @@ input:checked + .toggle-bg { flex-wrap: wrap; } -.content-start { - align-content: flex-start; -} - .items-start { align-items: flex-start; } @@ -1524,10 +1544,6 @@ input:checked + .toggle-bg { justify-content: space-between; } -.justify-items-start { - justify-items: start; -} - .gap-1 { gap: 0.25rem; } @@ -1603,10 +1619,6 @@ input:checked + .toggle-bg { border-color: rgb(243 244 246 / var(--tw-divide-opacity)); } -.justify-self-start { - justify-self: start; -} - .overflow-auto { overflow: auto; } @@ -1676,6 +1688,10 @@ input:checked + .toggle-bg { border-width: 2px; } +.border-4 { + border-width: 4px; +} + .border-b { border-bottom-width: 1px; } @@ -1688,6 +1704,15 @@ input:checked + .toggle-bg { border-top-width: 1px; } +.border-dashed { + border-style: dashed; +} + +.border-blue-300 { + --tw-border-opacity: 1; + border-color: rgb(102 196 242 / var(--tw-border-opacity)); +} + .border-blue-600 { --tw-border-opacity: 1; border-color: rgb(0 125 187 / var(--tw-border-opacity)); @@ -1733,6 +1758,11 @@ input:checked + .toggle-bg { background-color: rgb(204 235 251 / var(--tw-bg-opacity)); } +.bg-blue-200 { + --tw-bg-opacity: 1; + background-color: rgb(153 215 247 / var(--tw-bg-opacity)); +} + .bg-blue-300 { --tw-bg-opacity: 1; background-color: rgb(102 196 242 / var(--tw-bg-opacity)); @@ -1783,6 +1813,11 @@ input:checked + .toggle-bg { background-color: rgb(228 240 213 / var(--tw-bg-opacity)); } +.bg-green-200 { + --tw-bg-opacity: 1; + background-color: rgb(201 225 171 / var(--tw-bg-opacity)); +} + .bg-green-500 { --tw-bg-opacity: 1; background-color: rgb(121 181 46 / var(--tw-bg-opacity)); @@ -1798,6 +1833,11 @@ input:checked + .toggle-bg { background-color: rgb(255 205 205 / var(--tw-bg-opacity)); } +.bg-red-200 { + --tw-bg-opacity: 1; + background-color: rgb(255 154 154 / var(--tw-bg-opacity)); +} + .bg-red-50 { --tw-bg-opacity: 1; background-color: rgb(255 230 230 / var(--tw-bg-opacity)); @@ -1817,21 +1857,6 @@ input:checked + .toggle-bg { background-color: rgb(253 246 178 / var(--tw-bg-opacity)); } -.bg-green-200 { - --tw-bg-opacity: 1; - background-color: rgb(201 225 171 / var(--tw-bg-opacity)); -} - -.bg-blue-200 { - --tw-bg-opacity: 1; - background-color: rgb(153 215 247 / var(--tw-bg-opacity)); -} - -.bg-red-200 { - --tw-bg-opacity: 1; - background-color: rgb(255 154 154 / var(--tw-bg-opacity)); -} - .bg-opacity-50 { --tw-bg-opacity: 0.5; } @@ -2336,6 +2361,11 @@ input:checked + .toggle-bg { border-color: rgb(209 213 219 / var(--tw-border-opacity)); } +.hover\:bg-blue-300:hover { + --tw-bg-opacity: 1; + background-color: rgb(102 196 242 / var(--tw-bg-opacity)); +} + .hover\:bg-blue-600:hover { --tw-bg-opacity: 1; background-color: rgb(0 125 187 / var(--tw-bg-opacity)); @@ -2361,6 +2391,11 @@ input:checked + .toggle-bg { background-color: rgb(228 240 213 / var(--tw-bg-opacity)); } +.hover\:bg-green-300:hover { + --tw-bg-opacity: 1; + background-color: rgb(175 211 130 / var(--tw-bg-opacity)); +} + .hover\:bg-green-600:hover { --tw-bg-opacity: 1; background-color: rgb(97 145 37 / var(--tw-bg-opacity)); @@ -2376,26 +2411,16 @@ input:checked + .toggle-bg { background-color: rgb(228 240 213 / var(--tw-bg-opacity)); } -.hover\:bg-white:hover { - --tw-bg-opacity: 1; - background-color: rgb(255 255 255 / var(--tw-bg-opacity)); -} - -.hover\:bg-green-300:hover { - --tw-bg-opacity: 1; - background-color: rgb(175 211 130 / var(--tw-bg-opacity)); -} - -.hover\:bg-blue-300:hover { - --tw-bg-opacity: 1; - background-color: rgb(102 196 242 / var(--tw-bg-opacity)); -} - .hover\:bg-red-300:hover { --tw-bg-opacity: 1; background-color: rgb(255 104 104 / var(--tw-bg-opacity)); } +.hover\:bg-white:hover { + --tw-bg-opacity: 1; + background-color: rgb(255 255 255 / var(--tw-bg-opacity)); +} + .hover\:text-blue-600:hover { --tw-text-opacity: 1; color: rgb(0 125 187 / var(--tw-text-opacity)); @@ -2457,6 +2482,11 @@ input:checked + .toggle-bg { box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000); } +.focus\:ring-blue-200:focus { + --tw-ring-opacity: 1; + --tw-ring-color: rgb(153 215 247 / var(--tw-ring-opacity)); +} + .focus\:ring-blue-300:focus { --tw-ring-opacity: 1; --tw-ring-color: rgb(102 196 242 / var(--tw-ring-opacity)); @@ -2477,6 +2507,11 @@ input:checked + .toggle-bg { --tw-ring-color: rgb(209 213 219 / var(--tw-ring-opacity)); } +.focus\:ring-green-200:focus { + --tw-ring-opacity: 1; + --tw-ring-color: rgb(201 225 171 / var(--tw-ring-opacity)); +} + .focus\:ring-green-300:focus { --tw-ring-opacity: 1; --tw-ring-color: rgb(175 211 130 / var(--tw-ring-opacity)); @@ -2487,16 +2522,6 @@ input:checked + .toggle-bg { --tw-ring-color: rgb(121 181 46 / var(--tw-ring-opacity)); } -.focus\:ring-green-200:focus { - --tw-ring-opacity: 1; - --tw-ring-color: rgb(201 225 171 / var(--tw-ring-opacity)); -} - -.focus\:ring-blue-200:focus { - --tw-ring-opacity: 1; - --tw-ring-color: rgb(153 215 247 / var(--tw-ring-opacity)); -} - .focus\:ring-red-200:focus { --tw-ring-opacity: 1; --tw-ring-color: rgb(255 154 154 / var(--tw-ring-opacity)); @@ -2565,6 +2590,11 @@ input:checked + .toggle-bg { background-color: rgb(0 125 187 / var(--tw-bg-opacity)); } +:is(.dark .dark\:bg-blue-700) { + --tw-bg-opacity: 1; + background-color: rgb(0 94 140 / var(--tw-bg-opacity)); +} + :is(.dark .dark\:bg-blue-900) { --tw-bg-opacity: 1; background-color: rgb(0 31 47 / var(--tw-bg-opacity)); @@ -2599,11 +2629,21 @@ input:checked + .toggle-bg { background-color: rgb(97 145 37 / var(--tw-bg-opacity)); } +:is(.dark .dark\:bg-green-700) { + --tw-bg-opacity: 1; + background-color: rgb(73 109 28 / var(--tw-bg-opacity)); +} + :is(.dark .dark\:bg-green-900) { --tw-bg-opacity: 1; background-color: rgb(24 36 9 / var(--tw-bg-opacity)); } +:is(.dark .dark\:bg-red-700) { + --tw-bg-opacity: 1; + background-color: rgb(153 2 2 / var(--tw-bg-opacity)); +} + :is(.dark .dark\:bg-red-900) { --tw-bg-opacity: 1; background-color: rgb(51 1 1 / var(--tw-bg-opacity)); @@ -2614,26 +2654,6 @@ input:checked + .toggle-bg { background-color: rgb(99 49 18 / var(--tw-bg-opacity)); } -:is(.dark .dark\:bg-green-500) { - --tw-bg-opacity: 1; - background-color: rgb(121 181 46 / var(--tw-bg-opacity)); -} - -:is(.dark .dark\:bg-green-700) { - --tw-bg-opacity: 1; - background-color: rgb(73 109 28 / var(--tw-bg-opacity)); -} - -:is(.dark .dark\:bg-blue-700) { - --tw-bg-opacity: 1; - background-color: rgb(0 94 140 / var(--tw-bg-opacity)); -} - -:is(.dark .dark\:bg-red-700) { - --tw-bg-opacity: 1; - background-color: rgb(153 2 2 / var(--tw-bg-opacity)); -} - :is(.dark .dark\:bg-opacity-80) { --tw-bg-opacity: 0.8; } @@ -2717,6 +2737,11 @@ input:checked + .toggle-bg { --tw-ring-offset-color: #1F2937; } +:is(.dark .dark\:hover\:bg-blue-600:hover) { + --tw-bg-opacity: 1; + background-color: rgb(0 125 187 / var(--tw-bg-opacity)); +} + :is(.dark .dark\:hover\:bg-blue-700:hover) { --tw-bg-opacity: 1; background-color: rgb(0 94 140 / var(--tw-bg-opacity)); @@ -2747,11 +2772,6 @@ input:checked + .toggle-bg { background-color: rgb(73 109 28 / var(--tw-bg-opacity)); } -:is(.dark .dark\:hover\:bg-blue-600:hover) { - --tw-bg-opacity: 1; - background-color: rgb(0 125 187 / var(--tw-bg-opacity)); -} - :is(.dark .dark\:hover\:bg-red-600:hover) { --tw-bg-opacity: 1; background-color: rgb(204 2 2 / var(--tw-bg-opacity)); diff --git a/resources/schema.edn b/resources/schema.edn index 9b430fc1..d912ae3f 100644 --- a/resources/schema.edn +++ b/resources/schema.edn @@ -2112,4 +2112,19 @@ :db/cardinality :db.cardinality/one :db/unique :db.unique/identity} + {:db/ident :textract-invoice/job-id + :db/doc "The Textract job id used to parse the pdf" + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/unique :db.unique/identity} + + {:db/ident :textract-invoice/textract-status + :db/doc "The raw textract status, e.g., SUCCEEDED, IN_PROGRESS" + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one} + + {:db/ident :textract-invoice/pdf-url + :db/doc "A url to the pdf on s3" + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one} ] diff --git a/scratch-sessions/textract.repl b/scratch-sessions/textract.repl new file mode 100644 index 00000000..da6f75cf --- /dev/null +++ b/scratch-sessions/textract.repl @@ -0,0 +1,162 @@ + +(ns amazonica.aws.textract + (:require + [auto-ap.solr :as solr] + [unilog.context :as lc])) +(require '[amazonica.core :as amz]) +(import '[com.amazonaws.services.textract AmazonTextractClient ]) + +(import '[com.amazonaws.services.textract AmazonTextractClient ]) +(import '[com.amazonaws.services.textract.model S3Object ]) +(import '[com.amazonaws.services.textract.model StartExpenseAnalysisRequest ]) +(import '[com.amazonaws.services.textract.model GetExpenseAnalysisRequest ]) + +(import '[com.amazonaws.services.textract.model DocumentLocation]) +(amz/set-client AmazonTextractClient *ns*) + +(in-ns 'user) +(require '[clojure.java.io :as io]) +(require '[cheshire.core :as cheshire]) +(require '[amazonica.aws.s3 :as s3]) +(require '[auto-ap.graphql.utils :refer [cleanse-query]]) +(require '[iol-ion.tx :as itx]) + +(require '[config.core :refer [env]]) +(require '[amazonica.aws.textract :as txtract]) + + +(import '[com.amazonaws.services.textract AmazonTextractClient ]) +(import '[com.amazonaws.services.textract.model S3Object ]) +(import '[com.amazonaws.services.textract.model StartExpenseAnalysisRequest ]) +(import '[com.amazonaws.services.textract.model GetExpenseAnalysisRequest ]) + +(import '[com.amazonaws.services.textract.model DocumentLocation]) +(import '[java.util UUID]) + + + +(defn textract-file [s3-location] + (let [job-id (:job-id (txtract/start-expense-analysis {:document-location {:s3-object {:bucket (:data-bucket env) :name s3-location}}})) + result (loop [result (txtract/get-expense-analysis {:job-id job-id})] + (println "checking..." (:job-status result)) + (Thread/sleep 2000) + (if (= "IN_PROGRESS" (:job-status result)) + (recur (txtract/get-expense-analysis {:job-id job-id})) + result))] result)) + +(defn lookup [tx] + (->> (:expense-documents tx) + (mapcat :summary-fields) + (concat (->> tx :expense-documents )) + (map (fn [sf] + (-> sf + (update :label-detection dissoc :geometry) + (update :value-detection dissoc :geometry)))) + #_(group-by (fn [sf] + [(get-in sf ["Type" "Text"]) + (get-in sf ["LabelDetection" "Text"])] + )))) + +(defn find-best [field-descriptors] + {:raw field-descriptors + :best + (->> field-descriptors + (sort-by #(* (-> % :type :confidence) + (-> % :value-detection :confidence))) + last + :value-detection + :text)}) + +(require '[auto-ap.solr :as solr]) +(require '[auto-ap.logging :as alog]) + +(require '[com.brunobonacci.mulog :as mu]) +(require '[auto-ap.datomic.clients :as d-clients]) +(require '[auto-ap.time :as atime]) + +(defn textract->coalesced [tx] + (let [lookup (lookup tx) + ] + {:total (find-best (filter (fn [node] (= "TOTAL" (:text (:type node)))) lookup)) + :account-number (find-best (filter (fn [node] (= "CUSTOMER_NUMBER" (:text (:type node)))) lookup)) + :customer-identifier (find-best (filter (fn [node] (= "RECEIVER_NAME" (:text (:type node)))) lookup)) + :vendor-name (find-best (filter (fn [node] (= "VENDOR_NAME" (:text (:type node)))) lookup)) + :date (find-best (filter (fn [node] (= "ORDER_DATE" (:text (:type node)))) lookup)) + :invoice-number (find-best (filter (fn [node] (= "INVOICE_RECEIPT_ID" (:text (:type node)))) lookup)) + })) + +(defn clean-customer [c] + (clojure.string/replace c #"\W+" " ")) + +(require '[datomic.api :as dc]) + +(require '[auto-ap.datomic :refer [conn]]) + +(defn coalesced->invoice [i] + (mu/with-context {:inference i} + (let [vendor-id (->> (solr/query solr/impl "vendors" {"query" (format "name:(%s) ", (:best (:vendor-name i))) "fields" "score, *"}) + (filter (fn [d] (> (:score d) 4.0))) + (map (comp #(Long/parseLong %) :id)) + first) + account-number (:best (:account-number i)) + customer-identifier (:best (:customer-identifier i)) + client-id (or + (when (not-empty account-number) + (:db/id (d-clients/exact-match (:best (:account-number i))))) + (when (:best (:customer-identifier i)) + (->> (solr/query solr/impl "clients" {"query" (format "name:(%s) ", (clean-customer customer-identifier)) "fields" "score, *"}) + #_(filter (fn [d] (> (:score d) 4.0))) + (map (comp #(Long/parseLong %) :id)) + first))) + location (when client-id + (->> (dc/pull (dc/db conn) '[:client/locations] client-id) + :client/locations + first)) + invoice-number (:best (:invoice-number i)) + total (Double/parseDouble (some->> i + :total + :best + (re-find #"([0-9.\-]+)") + second) ) + date (or (atime/parse (:best (:date i)) "MM/dd/yyyy") + (atime/parse (:best (:date i)) "MM/dd/yy"))] + (when-not vendor-id + (alog/warn ::cant-find-vendor + :search-results (solr/query solr/impl "vendors" {"query" (format "name:(%s) ", (:best (:vendor-name i))) "fields" "score, *"}) + :vendor-name (:vendor-name i))) + (when-not client-id + (alog/warn ::cant-find-customer)) + (when (and client-id date invoice-number vendor-id total) + {:db/id (itx/random-tempid) + :invoice/client client-id + :invoice/client-identifier (or account-number customer-identifier) + :invoice/vendor vendor-id + :invoice/invoice-number invoice-number + :invoice/total total + :invoice/date date + :invoice/location location + :invoice/import-status :import-status/pending + :invoice/outstanding-balance total + :invoice/status :invoice-status/unpaid})))) + +(defn file->textract->invoice [f] + (let [s3-location (str "textract-files/" (UUID/randomUUID) "." (last (str/split f #"[\\.]"))) + file (io/file f)] + (with-open [stream (io/input-stream f)] + (s3/put-object (:data-bucket env) + s3-location + stream + {:content-type "application/pdf" + :content-length (.length file)})) + (-> (textract-file s3-location) + (textract->coalesced) + (coalesced->invoice)))) + + + + +#_(def result (with-open [x (io/reader "batch.json")] + (json/parse-stream x))) + + + diff --git a/src/clj/amazonica/aws/textract.clj b/src/clj/amazonica/aws/textract.clj new file mode 100644 index 00000000..3278a264 --- /dev/null +++ b/src/clj/amazonica/aws/textract.clj @@ -0,0 +1,12 @@ +(ns amazonica.aws.textract + (:require [amazonica.core :as amz]) + (:import [com.amazonaws.services.textract AmazonTextractClient ])) + +#_ +(import '[com.amazonaws.services.textract AmazonTextractClient ]) +#_(import '[com.amazonaws.services.textract.model S3Object ]) +#_(import '[com.amazonaws.services.textract.model StartExpenseAnalysisRequest ]) +#_(import '[com.amazonaws.services.textract.model GetExpenseAnalysisRequest ]) + +#_(import '[com.amazonaws.services.textract.model DocumentLocation]) +(amz/set-client AmazonTextractClient *ns*) diff --git a/src/clj/auto_ap/routes/ezcater_xls.clj b/src/clj/auto_ap/routes/ezcater_xls.clj index a9fdb31a..1e52cba2 100644 --- a/src/clj/auto_ap/routes/ezcater_xls.clj +++ b/src/clj/auto_ap/routes/ezcater_xls.clj @@ -207,5 +207,5 @@ "EZCater XLS Import"]) (page*)) - "EZCater upload"))) + "Invoice Glimpse"))) diff --git a/src/clj/auto_ap/ssr/core.clj b/src/clj/auto_ap/ssr/core.clj index 0b483364..7313810b 100644 --- a/src/clj/auto_ap/ssr/core.clj +++ b/src/clj/auto_ap/ssr/core.clj @@ -11,6 +11,7 @@ [auto-ap.ssr.search :as search] [auto-ap.ssr.company-dropdown :as company-dropdown] [auto-ap.ssr.company.reports :as company-reports] + [auto-ap.ssr.invoice.glimpse :as invoice-glimpse] [auto-ap.routes.ezcater-xls :as ezcater-xls] [auto-ap.ssr.company :as company])) @@ -39,6 +40,9 @@ :company-reports (wrap-client-redirect-unauthenticated (wrap-secure company-reports/page)) :company-reports-table (wrap-client-redirect-unauthenticated (wrap-secure company-reports/table)) :company-reports-delete (wrap-client-redirect-unauthenticated (wrap-admin company-reports/delete-report)) + :invoice-glimpse (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/page)) + :invoice-glimpse-upload (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/upload)) + :invoice-glimpse-job (wrap-client-redirect-unauthenticated (wrap-admin invoice-glimpse/job-progress)) :transaction-insights (wrap-client-redirect-unauthenticated (wrap-admin insights/page)) :transaction-insight-table (wrap-client-redirect-unauthenticated (wrap-admin insights/insight-table)) :transaction-insight-rows (wrap-client-redirect-unauthenticated (wrap-admin insights/transaction-rows)) diff --git a/src/clj/auto_ap/ssr/invoice/glimpse.clj b/src/clj/auto_ap/ssr/invoice/glimpse.clj new file mode 100644 index 00000000..01379a22 --- /dev/null +++ b/src/clj/auto_ap/ssr/invoice/glimpse.clj @@ -0,0 +1,295 @@ + +(ns auto-ap.ssr.invoice.glimpse + (:require + [amazonica.aws.s3 :as s3] + [amazonica.aws.textract :as textract] + [auto-ap.datomic :refer [conn pull-attr pull-id]] + [auto-ap.datomic.clients :as d-clients] + [auto-ap.logging :as alog] + [auto-ap.solr :as solr] + [auto-ap.ssr-routes :as ssr-routes] + [auto-ap.ssr.components :as com] + [auto-ap.ssr.ui :refer [base-page]] + [auto-ap.ssr.utils :refer [html-response path->name]] + [auto-ap.time :as atime] + [bidi.bidi :as bidi] + [cemerick.url :as url] + [clojure.java.io :as io] + [clojure.string :as str] + [com.brunobonacci.mulog :as mu] + [config.core :refer [env]] + [datomic.api :as dc] + [hiccup2.core :as hiccup] + [iol-ion.tx :refer [random-tempid]]) + (:import + (java.util UUID))) + +(def bucket-name (:data-bucket env)) + +(defn lookup [tx] + (->> (:expense-documents tx) + (mapcat :summary-fields) + (concat (->> tx :expense-documents )) + (map (fn [sf] + (-> sf + (update :label-detection dissoc :geometry) + (update :value-detection dissoc :geometry)))) + #_(group-by (fn [sf] + [(get-in sf ["Type" "Text"]) + (get-in sf ["LabelDetection" "Text"])] + )))) +(defn find-best [field-descriptors] + {:raw field-descriptors + :best + (->> field-descriptors + (sort-by #(* (-> % :type :confidence) + (-> % :value-detection :confidence))) + last + :value-detection + :text)}) + +(defn textract->coalesced [tx] + (let [lookup (lookup tx) + ] + {:total (find-best (filter (fn [node] (= "TOTAL" (:text (:type node)))) lookup)) + :account-number (find-best (filter (fn [node] (= "CUSTOMER_NUMBER" (:text (:type node)))) lookup)) + :customer-identifier (find-best (filter (fn [node] (= "RECEIVER_NAME" (:text (:type node)))) lookup)) + :vendor-name (find-best (filter (fn [node] (= "VENDOR_NAME" (:text (:type node)))) lookup)) + :date (find-best (filter (fn [node] (= "ORDER_DATE" (:text (:type node)))) lookup)) + :invoice-number (find-best (filter (fn [node] (= "INVOICE_RECEIPT_ID" (:text (:type node)))) lookup)) + })) + +(defn clean-customer [c] + (clojure.string/replace c #"\W+" " ")) + +(defn coalesced->invoice [i] + (mu/with-context {:inference i} + (let [vendor-id (->> (solr/query solr/impl "vendors" {"query" (format "name:(%s) ", (:best (:vendor-name i))) "fields" "score, *"}) + (filter (fn [d] (> (:score d) 4.0))) + (map (comp #(Long/parseLong %) :id)) + first) + account-number (:best (:account-number i)) + customer-identifier (:best (:customer-identifier i)) + client-id (or + (when (not-empty account-number) + (:db/id (d-clients/exact-match (:best (:account-number i))))) + (when (:best (:customer-identifier i)) + (->> (solr/query solr/impl "clients" {"query" (format "name:(%s) ", (clean-customer customer-identifier)) "fields" "score, *"}) + #_(filter (fn [d] (> (:score d) 4.0))) + (map (comp #(Long/parseLong %) :id)) + first))) + location (when client-id + (->> (dc/pull (dc/db conn) '[:client/locations] client-id) + :client/locations + first)) + invoice-number (:best (:invoice-number i)) + total (Double/parseDouble (some->> i + :total + :best + (re-find #"([0-9.\-]+)") + second) ) + date (or (atime/parse (:best (:date i)) "MM/dd/yyyy") + (atime/parse (:best (:date i)) "MM/dd/yy"))] + (when-not vendor-id + (alog/warn ::cant-find-vendor + :search-results (solr/query solr/impl "vendors" {"query" (format "name:(%s) ", (:best (:vendor-name i))) "fields" "score, *"}) + :vendor-name (:vendor-name i))) + (when-not client-id + (alog/warn ::cant-find-customer)) + (when (and client-id date invoice-number vendor-id total) + {:db/id (random-tempid) + :invoice/client client-id + :invoice/client-identifier (or account-number customer-identifier) + :invoice/vendor vendor-id + :invoice/invoice-number invoice-number + :invoice/total total + :invoice/date date + :invoice/location location + :invoice/import-status :import-status/pending + :invoice/outstanding-balance total + :invoice/status :invoice-status/unpaid})))) + +(defn upload-form* [] + [:div + [:form.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer + {:action (bidi/path-for ssr-routes/only-routes + :invoice-glimpse-upload) + :method "POST" + :id "invoice"} + "Drop an invoice here"] + [:script + (hiccup/raw + " +invoice_dropzone = new Dropzone(\"#invoice\", { + success: function(file, response) { + window.location.href = file.xhr.responseURL; + }, + disablePreviews: true +}); ")]]) + +(defn refresh-job [job-id] + (let [{:keys [:db/id :textract-invoice/textract-status]} (dc/pull (dc/db conn) '[:db/id :textract-invoice/textract-status] [:textract-invoice/job-id job-id])] + (when (= "IN_PROGRESS" textract-status) + (let [result (textract/get-expense-analysis {:job-id job-id})] + @(dc/transact conn [{:db/id id :textract-invoice/textract-status (:job-status result)}]))) + (dc/pull (dc/db conn) '[*] [:textract-invoice/job-id job-id]))) + +(defn textract->invoice-form* [job-id] + (let [coalesced (-> (textract/get-expense-analysis {:job-id job-id}) + (textract->coalesced)) + candidate-invoice (-> coalesced + (coalesced->invoice))] + [:form + [:div.grid.grid-cols-6.gap-4 + [:div.col-span-6 + (com/field {:label "Client"} + (com/text-input {:name (path->name [:invoice/client]) + :value (pull-attr (dc/db conn) :client/name (:invoice/client candidate-invoice)) + :placeholder "Client" + :disabled true + :autofocus true}))] + [:div.col-span-6 + (com/field {:label "Vendor"} + (com/text-input {:name (path->name [:invoice/vendor]) + :value (pull-attr (dc/db conn) :vendor/name (:invoice/vendor candidate-invoice)) + :placeholder "Vendor" + :disabled true + :autofocus true}))] + [:div.col-span-3 + (com/field {:label "Date"} + (com/text-input {:name (path->name [:invoice/date]) + :value (atime/unparse-local (:invoice/date candidate-invoice) + atime/normal-date) + :placeholder "Date" + :disabled true + :autofocus true}))] + [:div.col-span-3.col-start-1.text-xs + "Alternates: " + (butlast + (interleave + (map (fn [x] (com/link {:href "#"} (pr-str x))) (set (map (comp :text :value-detection) (:raw (:date coalesced))))) + (repeat ", ")))] + [:div.col-span-2.col-start-1 + (com/field {:label "Total"} + (com/text-input {:name (path->name [:invoice/total]) + :value (:invoice/total candidate-invoice) + :placeholder "Total" + :disabled true + :autofocus true}))] + [:div.col-span-3.col-start-1.text-xs + "Alternates: " + (butlast + (interleave + (map (fn [x] (com/link {:href "#"} (pr-str x))) (set (map (comp :text :value-detection) (:raw (:total coalesced))))) + (repeat ", ")))] + + [:div.col-span-2.col-start-1 + (com/field {:label "Invoice Number"} + (com/text-input {:name (path->name [:invoice/invoice-number]) + :value (:invoice/invoice-number candidate-invoice) + :placeholder "Invoice Number" + :disabled true + :autofocus true}))]]])) + +(defn job-progress* [job-id] + (when (pull-id (dc/db conn) [:textract-invoice/job-id job-id]) + (let [textract-invoice (refresh-job job-id)] + (cond + (= "IN_PROGRESS" (:textract-invoice/textract-status textract-invoice)) + [:div.bg-blue-100.border-2.border-dashed.rounded-lg.border-blue-300.p-4.max-w-md.w-md.text-center.cursor-pointer + {:hx-get (str (bidi/path-for ssr-routes/only-routes + :invoice-glimpse-job) + "?" (url/map->query {:job-id job-id})) + :hx-trigger "load delay:5s" + :hx-swap "outerHTML"} + "Analyzing job " (subs (:textract-invoice/job-id textract-invoice) 0 8) "..."] + + (= "SUCCEEDED" (:textract-invoice/textract-status textract-invoice)) + [:div.px-4 + [:a.mb-2 {:href (bidi/path-for ssr-routes/only-routes + :invoice-glimpse)} + (com/button {:color :secondary} "New import")] + [:div.flex.flex-row.space-x-4 + [:div {:style {:width "805"}} + (com/card {} + [:iframe.p-4 {:src (:textract-invoice/pdf-url textract-invoice) :width 791 :height 1024}])] + [:div {:class "basis-1/4"} + (com/card {} + [:div.p-4 + (textract->invoice-form* job-id)])]]])))) + +(defn job-progress [request] + (html-response (job-progress* (get (:query-params request) "job-id")))) + +(defn page* [job-id] + [:div.mt-4 + (com/card {} + [:div.px-4.py-3.space-y-4.flex.flex-col + [:h1.text-2xl.mb-3.font-bold "Invoice Glimpse"] + [:p.text-sm.italic "Import your invoices with the power of AI."] + [:div.flex.flex-row.space-x-4 (com/pill {:color :primary} "Beta") + [:span "Note: This upload is expirimental. Please only use PDFs with a single invoice in them."]] + (when job-id + (job-progress* job-id)) + + (when-not job-id + (upload-form*))])]) + +(defn begin-textract-file [s3-location] + (let [analysis (textract/start-expense-analysis {:document-location {:s3-object {:bucket (:data-bucket env) :name s3-location}}}) + textract-invoice {:textract-invoice/job-id (:job-id analysis) + :textract-invoice/textract-status "IN_PROGRESS" + :textract-invoice/pdf-url (str "http://" bucket-name ".s3-website-us-east-1.amazonaws.com/" s3-location)}] + @(dc/transact conn [textract-invoice]) + textract-invoice)) + +(defn upload [{:keys [identity] :as request}] + (let [file (or (get (:params request) :file) + (get (:params request) "file"))] + (mu/log ::uploading-file + :file file) + (with-open [s (io/input-stream (:tempfile file))] + (try + (let [s3-location (str "textract-files/" (UUID/randomUUID) "." (last (str/split (:filename file) #"[\\.]"))) + _ (with-open [stream (io/input-stream (:tempfile file))] + (s3/put-object (:data-bucket env) + s3-location + stream + {:content-type "application/pdf" + :content-length (.length (:tempfile file))})) + textract-invoice (begin-textract-file s3-location)] + + {:headers {"Location" + (str (bidi/path-for ssr-routes/only-routes + :invoice-glimpse) + "?" (url/map->query {:job-id (:textract-invoice/job-id textract-invoice)}))} + :status 302}) + (catch Exception e + (alog/error ::cant-begin-textract + :error e) + (html-response [:div (.getMessage e)])))))) + +(defn page [{:keys [matched-route request-method] :as request}] + (mu/log ::method + :method request-method) + (base-page + request + (com/page {:nav (com/admin-aside-nav) + :active-client (:client (:session request)) + :identity (:identity request) + :app-params {:hx-get (bidi/path-for ssr-routes/only-routes + :invoice-glimpse) + :hx-trigger "clientSelected from:body" + :hx-select "#app-contents" + :hx-swap "outerHTML swap:300ms"}} + (com/breadcrumbs {} + [:a {:href (bidi/path-for ssr-routes/only-routes + :admin)} + "Invoice"] + [:a {:href (bidi/path-for ssr-routes/only-routes + :invoice-glimpse)} + "Glimpse"]) + (page* (get (:query-params request) "job-id"))) + + "Invoice Glimpse")) + diff --git a/src/cljc/auto_ap/ssr_routes.cljc b/src/cljc/auto_ap/ssr_routes.cljc index 2d1e98b6..6694c2fb 100644 --- a/src/cljc/auto_ap/ssr_routes.cljc +++ b/src/cljc/auto_ap/ssr_routes.cljc @@ -2,6 +2,9 @@ (def routes {"logout" :logout "search" :search + "invoice" {"/glimpse" {"" {:get :invoice-glimpse + :post :invoice-glimpse-upload} + "/job" {:get :invoice-glimpse-job}}} "admin" {"/history" {"" :admin-history "/" :admin-history #"/search/?" :admin-history-search