Disallows chatgpt for emailed invoices.
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -62,7 +62,7 @@
|
|||||||
:input-stream (io/input-stream filename)
|
:input-stream (io/input-stream filename)
|
||||||
:metadata {:content-type "application/pdf"
|
:metadata {:content-type "application/pdf"
|
||||||
:content-length (.length (io/file filename))})
|
:content-length (.length (io/file filename))})
|
||||||
(let [imports (->> (parse/parse-file filename filename)
|
(let [imports (->> (parse/parse-file filename filename :allow-glimpse? false)
|
||||||
(map #(assoc %
|
(map #(assoc %
|
||||||
:source-url (str "https://" (:data-bucket env)
|
:source-url (str "https://" (:data-bucket env)
|
||||||
"/"
|
"/"
|
||||||
|
|||||||
@@ -34,14 +34,14 @@
|
|||||||
([text full-text template]
|
([text full-text template]
|
||||||
(when (and template
|
(when (and template
|
||||||
(or (not (:multi-match? template))
|
(or (not (:multi-match? template))
|
||||||
(re-find (:multi-match? template) text )))
|
(re-find (:multi-match? template) text)))
|
||||||
[(->> template
|
[(->> template
|
||||||
:extract
|
:extract
|
||||||
(reduce-kv
|
(reduce-kv
|
||||||
(fn [result k v]
|
(fn [result k v]
|
||||||
(let [value (some-> (or (first (map second (re-seq v text)))
|
(let [value (some-> (or (first (map second (re-seq v text)))
|
||||||
(first (map second (re-seq v full-text))))
|
(first (map second (re-seq v full-text))))
|
||||||
str/trim )
|
str/trim)
|
||||||
[value-parser parser-params] (-> template :parser k)]
|
[value-parser parser-params] (-> template :parser k)]
|
||||||
(assoc result k (try
|
(assoc result k (try
|
||||||
(u/parse-value value-parser parser-params value)
|
(u/parse-value value-parser parser-params value)
|
||||||
@@ -61,14 +61,20 @@
|
|||||||
(extract-template text)))
|
(extract-template text)))
|
||||||
|
|
||||||
|
|
||||||
(defmulti parse-file (fn [_ filename] (.toLowerCase (last (str/split filename #"\." )))))
|
|
||||||
|
(defmulti parse-file
|
||||||
|
"Parses a file based on its extension. Accepts options as additional arguments.
|
||||||
|
Options:
|
||||||
|
- :allow-glimpse? (default false) - If true, allows parsing a glimpse of the file."
|
||||||
|
(fn [_ filename & opts]
|
||||||
|
(.toLowerCase (last (str/split filename #"\.")))))
|
||||||
|
|
||||||
(defn invoke-glimpse2 [f]
|
(defn invoke-glimpse2 [f]
|
||||||
(let [result (slurp (:payload (lambda/invoke {:client-config {:request-timeout 120000
|
(let [result (slurp (:payload (lambda/invoke {:client-config {:request-timeout 120000
|
||||||
:socket-timeout 120000}} {:function-name "glimpse2" :payload
|
:socket-timeout 120000}} {:function-name "glimpse2" :payload
|
||||||
(json/write-str
|
(json/write-str
|
||||||
(alog/peek ::x {"url" (str "https://" "data.prod.app.integreatconsult.com" "/" f ) }))
|
(alog/peek ::x {"url" (str "https://" "data.prod.app.integreatconsult.com" "/" f)}))})))]
|
||||||
})))]
|
|
||||||
|
|
||||||
(alog/info ::glimpse2-payload :payload result)
|
(alog/info ::glimpse2-payload :payload result)
|
||||||
(-> result
|
(-> result
|
||||||
@@ -81,46 +87,46 @@
|
|||||||
(s3/put-object {:bucket-name "data.prod.app.integreatconsult.com"
|
(s3/put-object {:bucket-name "data.prod.app.integreatconsult.com"
|
||||||
:key tmp-key
|
:key tmp-key
|
||||||
:input-stream f}))
|
:input-stream f}))
|
||||||
is (invoke-glimpse2 tmp-key) ]
|
is (invoke-glimpse2 tmp-key)]
|
||||||
(alog/peek ::glimpse2-result is)
|
(alog/peek ::glimpse2-result is)
|
||||||
(for [i is]
|
(for [i is]
|
||||||
{:date (u/parse-value :clj-time "yyyy-MM-dd" (str/trim (get i "date")))
|
{:date (u/parse-value :clj-time "yyyy-MM-dd" (str/trim (get i "date")))
|
||||||
:customer-identifier (get i "customer_identifier")
|
:customer-identifier (get i "customer_identifier")
|
||||||
:account-number (not-empty (get i "account_number"))
|
:account-number (not-empty (get i "account_number"))
|
||||||
:vendor-search (get i "vendor_identifier")
|
:vendor-search (get i "vendor_identifier")
|
||||||
:vendor-code (-> (vendors/best-match (get i "vendor_identifier") )
|
:vendor-code (-> (vendors/best-match (get i "vendor_identifier"))
|
||||||
(get "label"))
|
(get "label"))
|
||||||
:total (get i "total")
|
:total (get i "total")
|
||||||
:invoice-number (get i "invoice_number")
|
:invoice-number (get i "invoice_number")
|
||||||
:template "None found - defaulting to ChatGPT"})
|
:template "None found - defaulting to ChatGPT"}))
|
||||||
)
|
|
||||||
(catch Exception e
|
(catch Exception e
|
||||||
(alog/warn ::glimpse2-not-work :error e)
|
(alog/warn ::glimpse2-not-work :error e)
|
||||||
nil)))
|
nil)))
|
||||||
|
|
||||||
(defmethod parse-file
|
(defmethod parse-file
|
||||||
"pdf"
|
"pdf"
|
||||||
[file _]
|
[file _ & {:keys [allow-glimpse?] :or {allow-glimpse? false}}]
|
||||||
(or
|
(or
|
||||||
(-> (sh/sh "pdftotext" "-layout" file "-")
|
(-> (sh/sh "pdftotext" "-layout" file "-")
|
||||||
:out
|
:out
|
||||||
parse)
|
parse)
|
||||||
(alog/peek ::glimpse2-result (glimpse2 file))))
|
(and allow-glimpse? (alog/peek ::glimpse2-result (glimpse2 file)))))
|
||||||
|
|
||||||
(defmethod parse-file
|
(defmethod parse-file
|
||||||
"csv"
|
"csv"
|
||||||
[file filename]
|
[file filename & _]
|
||||||
(csv/parse-file file filename))
|
(csv/parse-file file filename))
|
||||||
|
|
||||||
(defmethod parse-file
|
(defmethod parse-file
|
||||||
"xls"
|
"xls"
|
||||||
[file filename]
|
[file filename & _]
|
||||||
(excel/parse-file file filename))
|
(excel/parse-file file filename))
|
||||||
|
|
||||||
|
|
||||||
(defmethod parse-file
|
(defmethod parse-file
|
||||||
"xlsx"
|
"xlsx"
|
||||||
[file filename]
|
[file filename & _]
|
||||||
(excel/parse-file file filename))
|
(excel/parse-file file filename))
|
||||||
|
|
||||||
(defn best-match
|
(defn best-match
|
||||||
@@ -147,17 +153,17 @@
|
|||||||
(sort-by second)
|
(sort-by second)
|
||||||
first)
|
first)
|
||||||
|
|
||||||
word-set (set (filter (complement str/blank?) (str/split (.toLowerCase invoice-client-name) #"[\s:\-]" )))
|
word-set (set (filter (complement str/blank?) (str/split (.toLowerCase invoice-client-name) #"[\s:\-]")))
|
||||||
client-word-match (->> clients
|
client-word-match (->> clients
|
||||||
(map
|
(map
|
||||||
(fn [{:keys [:client/matches :client/name] :as client :or {matches []}}]
|
(fn [{:keys [:client/matches :client/name] :as client :or {matches []}}]
|
||||||
(let [client-words (-> #{}
|
(let [client-words (-> #{}
|
||||||
(into
|
(into
|
||||||
(mapcat
|
(mapcat
|
||||||
(fn [match] (str/split (.toLowerCase match) #"\s" ))
|
(fn [match] (str/split (.toLowerCase match) #"\s"))
|
||||||
matches))
|
matches))
|
||||||
(into
|
(into
|
||||||
(str/split (.toLowerCase name) #"\s" )))]
|
(str/split (.toLowerCase name) #"\s")))]
|
||||||
[client (count (set/intersection client-words word-set))])))
|
[client (count (set/intersection client-words word-set))])))
|
||||||
(filter (fn [[_ c]] (> c 0)))
|
(filter (fn [[_ c]] (> c 0)))
|
||||||
(sort-by (fn [[_ c]] c))
|
(sort-by (fn [[_ c]] c))
|
||||||
@@ -184,14 +190,14 @@
|
|||||||
(mapcat (fn [{:keys [:location-match/location :location-match/matches]}]
|
(mapcat (fn [{:keys [:location-match/location :location-match/matches]}]
|
||||||
(map (fn [match] [location match]) matches)))
|
(map (fn [match] [location match]) matches)))
|
||||||
(filter (fn [[_ match]]
|
(filter (fn [[_ match]]
|
||||||
(re-find (re-pattern (str "(?i)" match)) text)) )
|
(re-find (re-pattern (str "(?i)" match)) text)))
|
||||||
first
|
first
|
||||||
first)
|
first)
|
||||||
(->> client
|
(->> client
|
||||||
:client/location-matches
|
:client/location-matches
|
||||||
(mapcat (fn [{:keys [:location-match/location :location-match/matches]}]
|
(mapcat (fn [{:keys [:location-match/location :location-match/matches]}]
|
||||||
(map (fn [match] [location match]) matches)))
|
(map (fn [match] [location match]) matches)))
|
||||||
(filter (fn [[_ match]] (re-find (re-pattern (str "(?i)" match)) full-text)) )
|
(filter (fn [[_ match]] (re-find (re-pattern (str "(?i)" match)) full-text)))
|
||||||
first
|
first
|
||||||
first)
|
first)
|
||||||
(:client/default-location client)
|
(:client/default-location client)
|
||||||
|
|||||||
@@ -278,7 +278,7 @@
|
|||||||
"text/csv"
|
"text/csv"
|
||||||
"application/pdf")
|
"application/pdf")
|
||||||
:content-length (.length tempfile)})
|
:content-length (.length tempfile)})
|
||||||
imports (->> (parse/parse-file (.getPath tempfile) filename)
|
imports (->> (parse/parse-file (.getPath tempfile) filename :allow-glimpse? true)
|
||||||
(map #(assoc %
|
(map #(assoc %
|
||||||
:client-override client
|
:client-override client
|
||||||
:location-override location
|
:location-override location
|
||||||
|
|||||||
@@ -716,7 +716,7 @@
|
|||||||
:content-length (.length tempfile)})
|
:content-length (.length tempfile)})
|
||||||
imports (->> (if force-chatgpt
|
imports (->> (if force-chatgpt
|
||||||
(parse/glimpse2 (.getPath tempfile))
|
(parse/glimpse2 (.getPath tempfile))
|
||||||
(parse/parse-file (.getPath tempfile) filename))
|
(parse/parse-file (.getPath tempfile) filename :allow-glimpse? true))
|
||||||
(map #(assoc %
|
(map #(assoc %
|
||||||
:client-override force-client
|
:client-override force-client
|
||||||
:location-override force-location
|
:location-override force-location
|
||||||
|
|||||||
Reference in New Issue
Block a user