Best effort glimpse

This commit is contained in:
2024-05-31 16:37:40 -07:00
parent bd3432f540
commit 495751df48
3 changed files with 97 additions and 32 deletions

View File

@@ -1,14 +1,18 @@
(ns auto-ap.parse
(:require
[auto-ap.parse.csv :as csv]
[auto-ap.parse.excel :as excel]
[auto-ap.parse.templates :as t]
[auto-ap.parse.util :as u]
[clj-fuzzy.metrics :as m]
[clojure.java.shell :as sh]
[clojure.set :as set]
[clojure.string :as str]
[auto-ap.logging :as alog]))
(:require [amazonica.aws.lambda :as lambda]
[amazonica.aws.s3 :as s3]
[auto-ap.logging :as alog]
[auto-ap.parse.csv :as csv]
[auto-ap.parse.excel :as excel]
[auto-ap.parse.templates :as t]
[auto-ap.parse.util :as u]
[auto-ap.ssr.vendor :as vendors]
[clj-fuzzy.metrics :as m]
[clojure.data.json :as json]
[clojure.java.io :as io]
[clojure.java.shell :as sh]
[clojure.set :as set]
[clojure.string :as str]))
(def last-text (atom nil))
@@ -57,12 +61,44 @@
(defmulti parse-file (fn [_ filename] (.toLowerCase (last (str/split filename #"\." )))))
(defn invoke-glimpse2 [f]
(doto
(-> (lambda/invoke {:function-name "glimpse2" :payload
(json/write-str
(alog/peek ::x {"url" (str "https://" "data.prod.app.integreatconsult.com" "/" f ) }))})
:payload
slurp
json/read-str)
println))
(defn glimpse2 [file]
(try
(let [tmp-key (str "glimpse2/import/" (java.util.UUID/randomUUID) ".pdf")
_ (with-open [f (io/input-stream file)]
(s3/put-object {:bucket-name "data.prod.app.integreatconsult.com"
:key tmp-key
:input-stream f}))
g (invoke-glimpse2 tmp-key) ]
[ {:date (u/parse-value :clj-time "yyyy-MM-dd" (str/trim (get g "date")))
:customer-identifier (get g "customer_identifier")
:account-number (not-empty (get g "account_number"))
:vendor-code (-> (vendors/best-match (get g "vendor_identifier") )
(get "label"))
:total (get g "total")
:invoice-number (get g "invoice_number")}]
)
(catch Exception e
(alog/warn ::glimpse2-not-work :error e)
nil)))
(defmethod parse-file
"pdf"
[file _]
(-> (sh/sh "pdftotext" "-layout" file "-")
:out
parse))
(or
(-> (sh/sh "pdftotext" "-layout" file "-")
:out
parse)
(alog/peek ::glimpse2-result (glimpse2 file))))
(defmethod parse-file
"csv"