Best effort glimpse
This commit is contained in:
@@ -1,14 +1,18 @@
|
||||
(ns auto-ap.parse
|
||||
(:require
|
||||
[auto-ap.parse.csv :as csv]
|
||||
[auto-ap.parse.excel :as excel]
|
||||
[auto-ap.parse.templates :as t]
|
||||
[auto-ap.parse.util :as u]
|
||||
[clj-fuzzy.metrics :as m]
|
||||
[clojure.java.shell :as sh]
|
||||
[clojure.set :as set]
|
||||
[clojure.string :as str]
|
||||
[auto-ap.logging :as alog]))
|
||||
(:require [amazonica.aws.lambda :as lambda]
|
||||
[amazonica.aws.s3 :as s3]
|
||||
[auto-ap.logging :as alog]
|
||||
[auto-ap.parse.csv :as csv]
|
||||
[auto-ap.parse.excel :as excel]
|
||||
[auto-ap.parse.templates :as t]
|
||||
[auto-ap.parse.util :as u]
|
||||
[auto-ap.ssr.vendor :as vendors]
|
||||
[clj-fuzzy.metrics :as m]
|
||||
[clojure.data.json :as json]
|
||||
[clojure.java.io :as io]
|
||||
[clojure.java.shell :as sh]
|
||||
[clojure.set :as set]
|
||||
[clojure.string :as str]))
|
||||
|
||||
(def last-text (atom nil))
|
||||
|
||||
@@ -57,12 +61,44 @@
|
||||
|
||||
(defmulti parse-file (fn [_ filename] (.toLowerCase (last (str/split filename #"\." )))))
|
||||
|
||||
(defn invoke-glimpse2 [f]
|
||||
(doto
|
||||
(-> (lambda/invoke {:function-name "glimpse2" :payload
|
||||
(json/write-str
|
||||
(alog/peek ::x {"url" (str "https://" "data.prod.app.integreatconsult.com" "/" f ) }))})
|
||||
:payload
|
||||
slurp
|
||||
json/read-str)
|
||||
println))
|
||||
|
||||
(defn glimpse2 [file]
|
||||
(try
|
||||
(let [tmp-key (str "glimpse2/import/" (java.util.UUID/randomUUID) ".pdf")
|
||||
_ (with-open [f (io/input-stream file)]
|
||||
(s3/put-object {:bucket-name "data.prod.app.integreatconsult.com"
|
||||
:key tmp-key
|
||||
:input-stream f}))
|
||||
g (invoke-glimpse2 tmp-key) ]
|
||||
[ {:date (u/parse-value :clj-time "yyyy-MM-dd" (str/trim (get g "date")))
|
||||
:customer-identifier (get g "customer_identifier")
|
||||
:account-number (not-empty (get g "account_number"))
|
||||
:vendor-code (-> (vendors/best-match (get g "vendor_identifier") )
|
||||
(get "label"))
|
||||
:total (get g "total")
|
||||
:invoice-number (get g "invoice_number")}]
|
||||
)
|
||||
(catch Exception e
|
||||
(alog/warn ::glimpse2-not-work :error e)
|
||||
nil)))
|
||||
|
||||
(defmethod parse-file
|
||||
"pdf"
|
||||
[file _]
|
||||
(-> (sh/sh "pdftotext" "-layout" file "-")
|
||||
:out
|
||||
parse))
|
||||
(or
|
||||
(-> (sh/sh "pdftotext" "-layout" file "-")
|
||||
:out
|
||||
parse)
|
||||
(alog/peek ::glimpse2-result (glimpse2 file))))
|
||||
|
||||
(defmethod parse-file
|
||||
"csv"
|
||||
|
||||
Reference in New Issue
Block a user