Disallows chatgpt for emailed invoices.

This commit is contained in:
2025-03-25 21:26:40 -07:00
parent 581033001b
commit 01329bbdf5
5 changed files with 30 additions and 24 deletions

File diff suppressed because one or more lines are too long

View File

@@ -62,7 +62,7 @@
:input-stream (io/input-stream filename) :input-stream (io/input-stream filename)
:metadata {:content-type "application/pdf" :metadata {:content-type "application/pdf"
:content-length (.length (io/file filename))}) :content-length (.length (io/file filename))})
(let [imports (->> (parse/parse-file filename filename) (let [imports (->> (parse/parse-file filename filename :allow-glimpse? false)
(map #(assoc % (map #(assoc %
:source-url (str "https://" (:data-bucket env) :source-url (str "https://" (:data-bucket env)
"/" "/"

View File

@@ -34,14 +34,14 @@
([text full-text template] ([text full-text template]
(when (and template (when (and template
(or (not (:multi-match? template)) (or (not (:multi-match? template))
(re-find (:multi-match? template) text ))) (re-find (:multi-match? template) text)))
[(->> template [(->> template
:extract :extract
(reduce-kv (reduce-kv
(fn [result k v] (fn [result k v]
(let [value (some-> (or (first (map second (re-seq v text))) (let [value (some-> (or (first (map second (re-seq v text)))
(first (map second (re-seq v full-text)))) (first (map second (re-seq v full-text))))
str/trim ) str/trim)
[value-parser parser-params] (-> template :parser k)] [value-parser parser-params] (-> template :parser k)]
(assoc result k (try (assoc result k (try
(u/parse-value value-parser parser-params value) (u/parse-value value-parser parser-params value)
@@ -61,14 +61,20 @@
(extract-template text))) (extract-template text)))
(defmulti parse-file (fn [_ filename] (.toLowerCase (last (str/split filename #"\." )))))
(defmulti parse-file
"Parses a file based on its extension. Accepts options as additional arguments.
Options:
- :allow-glimpse? (default false) - If true, allows parsing a glimpse of the file."
(fn [_ filename & opts]
(.toLowerCase (last (str/split filename #"\.")))))
(defn invoke-glimpse2 [f] (defn invoke-glimpse2 [f]
(let [result (slurp (:payload (lambda/invoke {:client-config {:request-timeout 120000 (let [result (slurp (:payload (lambda/invoke {:client-config {:request-timeout 120000
:socket-timeout 120000}} {:function-name "glimpse2" :payload :socket-timeout 120000}} {:function-name "glimpse2" :payload
(json/write-str (json/write-str
(alog/peek ::x {"url" (str "https://" "data.prod.app.integreatconsult.com" "/" f ) })) (alog/peek ::x {"url" (str "https://" "data.prod.app.integreatconsult.com" "/" f)}))})))]
})))]
(alog/info ::glimpse2-payload :payload result) (alog/info ::glimpse2-payload :payload result)
(-> result (-> result
@@ -81,46 +87,46 @@
(s3/put-object {:bucket-name "data.prod.app.integreatconsult.com" (s3/put-object {:bucket-name "data.prod.app.integreatconsult.com"
:key tmp-key :key tmp-key
:input-stream f})) :input-stream f}))
is (invoke-glimpse2 tmp-key) ] is (invoke-glimpse2 tmp-key)]
(alog/peek ::glimpse2-result is) (alog/peek ::glimpse2-result is)
(for [i is] (for [i is]
{:date (u/parse-value :clj-time "yyyy-MM-dd" (str/trim (get i "date"))) {:date (u/parse-value :clj-time "yyyy-MM-dd" (str/trim (get i "date")))
:customer-identifier (get i "customer_identifier") :customer-identifier (get i "customer_identifier")
:account-number (not-empty (get i "account_number")) :account-number (not-empty (get i "account_number"))
:vendor-search (get i "vendor_identifier") :vendor-search (get i "vendor_identifier")
:vendor-code (-> (vendors/best-match (get i "vendor_identifier") ) :vendor-code (-> (vendors/best-match (get i "vendor_identifier"))
(get "label")) (get "label"))
:total (get i "total") :total (get i "total")
:invoice-number (get i "invoice_number") :invoice-number (get i "invoice_number")
:template "None found - defaulting to ChatGPT"}) :template "None found - defaulting to ChatGPT"}))
)
(catch Exception e (catch Exception e
(alog/warn ::glimpse2-not-work :error e) (alog/warn ::glimpse2-not-work :error e)
nil))) nil)))
(defmethod parse-file (defmethod parse-file
"pdf" "pdf"
[file _] [file _ & {:keys [allow-glimpse?] :or {allow-glimpse? false}}]
(or (or
(-> (sh/sh "pdftotext" "-layout" file "-") (-> (sh/sh "pdftotext" "-layout" file "-")
:out :out
parse) parse)
(alog/peek ::glimpse2-result (glimpse2 file)))) (and allow-glimpse? (alog/peek ::glimpse2-result (glimpse2 file)))))
(defmethod parse-file (defmethod parse-file
"csv" "csv"
[file filename] [file filename & _]
(csv/parse-file file filename)) (csv/parse-file file filename))
(defmethod parse-file (defmethod parse-file
"xls" "xls"
[file filename] [file filename & _]
(excel/parse-file file filename)) (excel/parse-file file filename))
(defmethod parse-file (defmethod parse-file
"xlsx" "xlsx"
[file filename] [file filename & _]
(excel/parse-file file filename)) (excel/parse-file file filename))
(defn best-match (defn best-match
@@ -147,17 +153,17 @@
(sort-by second) (sort-by second)
first) first)
word-set (set (filter (complement str/blank?) (str/split (.toLowerCase invoice-client-name) #"[\s:\-]" ))) word-set (set (filter (complement str/blank?) (str/split (.toLowerCase invoice-client-name) #"[\s:\-]")))
client-word-match (->> clients client-word-match (->> clients
(map (map
(fn [{:keys [:client/matches :client/name] :as client :or {matches []}}] (fn [{:keys [:client/matches :client/name] :as client :or {matches []}}]
(let [client-words (-> #{} (let [client-words (-> #{}
(into (into
(mapcat (mapcat
(fn [match] (str/split (.toLowerCase match) #"\s" )) (fn [match] (str/split (.toLowerCase match) #"\s"))
matches)) matches))
(into (into
(str/split (.toLowerCase name) #"\s" )))] (str/split (.toLowerCase name) #"\s")))]
[client (count (set/intersection client-words word-set))]))) [client (count (set/intersection client-words word-set))])))
(filter (fn [[_ c]] (> c 0))) (filter (fn [[_ c]] (> c 0)))
(sort-by (fn [[_ c]] c)) (sort-by (fn [[_ c]] c))
@@ -184,14 +190,14 @@
(mapcat (fn [{:keys [:location-match/location :location-match/matches]}] (mapcat (fn [{:keys [:location-match/location :location-match/matches]}]
(map (fn [match] [location match]) matches))) (map (fn [match] [location match]) matches)))
(filter (fn [[_ match]] (filter (fn [[_ match]]
(re-find (re-pattern (str "(?i)" match)) text)) ) (re-find (re-pattern (str "(?i)" match)) text)))
first first
first) first)
(->> client (->> client
:client/location-matches :client/location-matches
(mapcat (fn [{:keys [:location-match/location :location-match/matches]}] (mapcat (fn [{:keys [:location-match/location :location-match/matches]}]
(map (fn [match] [location match]) matches))) (map (fn [match] [location match]) matches)))
(filter (fn [[_ match]] (re-find (re-pattern (str "(?i)" match)) full-text)) ) (filter (fn [[_ match]] (re-find (re-pattern (str "(?i)" match)) full-text)))
first first
first) first)
(:client/default-location client) (:client/default-location client)

View File

@@ -278,7 +278,7 @@
"text/csv" "text/csv"
"application/pdf") "application/pdf")
:content-length (.length tempfile)}) :content-length (.length tempfile)})
imports (->> (parse/parse-file (.getPath tempfile) filename) imports (->> (parse/parse-file (.getPath tempfile) filename :allow-glimpse? true)
(map #(assoc % (map #(assoc %
:client-override client :client-override client
:location-override location :location-override location

View File

@@ -716,7 +716,7 @@
:content-length (.length tempfile)}) :content-length (.length tempfile)})
imports (->> (if force-chatgpt imports (->> (if force-chatgpt
(parse/glimpse2 (.getPath tempfile)) (parse/glimpse2 (.getPath tempfile))
(parse/parse-file (.getPath tempfile) filename)) (parse/parse-file (.getPath tempfile) filename :allow-glimpse? true))
(map #(assoc % (map #(assoc %
:client-override force-client :client-override force-client
:location-override force-location :location-override force-location