This commit is contained in:
Bryce Covert
2019-10-26 22:48:51 -07:00
7 changed files with 271 additions and 101 deletions

View File

@@ -1,54 +1,46 @@
(ns auto-ap.parse
(:require [auto-ap.parse.excel :as excel]
[auto-ap.parse.templates :as t]
[auto-ap.parse.util :as u]
[auto-ap.parse.csv :as csv]
[clj-fuzzy.metrics :as m]
[clojure.java.shell :as sh]
[clojure.string :as str]
[clj-time.format :as f]
[clj-time.core :as time]))
[clj-time.core :as time]
[clojure.set :as set]))
(defmulti parse-value (fn [method _ _]
method))
(defmethod parse-value :trim-commas
[_ _ value]
(str/replace value #"," "")
)
(defmethod parse-value :clj-time
[_ format value]
(time/from-time-zone (f/parse (f/formatter format) value)
(time/time-zone-for-id "America/Los_Angeles")))
(defmethod parse-value nil
[_ _ value]
value)
(def last-text (atom nil))
(defn template-applies? [text {:keys [keywords]}]
(every? #(re-find % text) keywords))
(defn extract-template [text template]
(if (:multi template)
(mapcat
#(extract-template % (dissoc template :multi))
(str/split text (:multi template)))
(defn extract-template
([text template]
(if (:multi template)
(mapcat
#(extract-template % text (dissoc template :multi))
(str/split text (:multi template)))
(when template
[(->> template
:extract
(reduce-kv
(fn [result k v]
(let [value (some-> (first (map second (re-seq v text)))
str/trim )
[value-parser parser-params] (-> template :parser k)]
(assoc result k (parse-value value-parser parser-params value))))
{:vendor-code (:vendor template)
:text text}))])))
(extract-template text text template)))
([text full-text template]
(when (and template
(or (not (:multi-match? template))
(re-find (:multi-match? template) text )))
[(->> template
:extract
(reduce-kv
(fn [result k v]
(let [value (some-> (or (first (map second (re-seq v text)))
(first (map second (re-seq v full-text))))
str/trim )
[value-parser parser-params] (-> template :parser k)]
(assoc result k (u/parse-value value-parser parser-params value))))
{:vendor-code (:vendor template)
:text text
:full-text full-text}))])))
(defn parse [text]
(reset! last-text text)
@@ -67,6 +59,11 @@
:out
parse))
(defmethod parse-file
"csv"
[file filename]
(csv/parse-file file filename))
(defmethod parse-file
"xls"
[file filename]
@@ -79,22 +76,50 @@
(excel/parse-file file filename))
(defn best-match [clients invoice-client-name]
(->> clients
(mapcat (fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}]
(map (fn [m]
[client (m/jaccard (.toLowerCase invoice-client-name) (.toLowerCase m))])
(conj matches name))))
(filter #(< (second %) 0.25))
(sort-by second)
ffirst))
(let [fuzzy-match (->> clients
(mapcat (fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}]
(map (fn [m]
(println m invoice-client-name)
[client (m/jaccard (.toLowerCase invoice-client-name) (.toLowerCase m))])
(conj matches name))))
(filter #(< (second %) 0.25))
(sort-by second)
ffirst)
(defn best-location-match [client text]
word-set (set (str/split (.toLowerCase invoice-client-name) #"\s" ))
client-word-match (->> clients
(map
(fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}]
(let [client-words (-> #{}
(into
(mapcat
(fn [match] (str/split (.toLowerCase match) #"\s" ))
matches))
(into
(str/split (.toLowerCase name) #"\s" )))]
[client (count (set/intersection client-words word-set))])))
(filter (fn [[_ c]] (> c 0)))
(sort-by (fn [[_ c]] c))
reverse
ffirst)]
(or fuzzy-match client-word-match)))
(defn best-location-match [client text full-text]
(or (->> client
:client/location-matches
(mapcat (fn [{:keys [:location-match/location :location-match/matches]}]
(map (fn [match] [location match]) matches)))
(filter (fn [[location match]] (re-find (re-pattern (str "(?i)" match)) text)) )
(filter (fn [[location match]]
(println "loc " location match text)
(re-find (re-pattern (str "(?i)" match)) text)) )
first
first)
(->> client
:client/location-matches
(mapcat (fn [{:keys [:location-match/location :location-match/matches]}]
(map (fn [match] [location match]) matches)))
(filter (fn [[location match]] (re-find (re-pattern (str "(?i)" match)) full-text)) )
first
first)
(:client/default-location client)

View File

@@ -0,0 +1,46 @@
(ns auto-ap.parse.csv
(:require [auto-ap.parse.util :as u]
[clojure.data.csv :as csv]
[clojure.java.io :as io]
[clojure.string :as str]))
(defn determine
[[header :as z]]
(prn header)
(cond (str/includes? (second header) "Customer's PO No.")
:mama-lus
:else
nil))
(defmulti parse-csv
determine
:default (fn default [rows]
nil))
(defmethod parse-csv :mama-lus
[rows]
(println "MAMA LU")
(transduce
(comp (drop 1)
(map (fn [[_ po-number despatch-number invoice-number invoice-date customer value :as row]]
{:vendor-code "Mama Lu's Foods"
:customer-identifier customer
:invoice-number (str po-number "-" invoice-number )
:date (u/parse-value :clj-time "MM/dd/yy HH:ss" invoice-date)
:total value
:text (str/join " " row)})))
conj
[]
rows))
(defmethod parse-csv nil
[rows]
nil)
(defn parse-file [file filename]
(println "HEREERE")
(with-open [reader (io/reader file)]
(let [rows (csv/read-csv reader :separator \,)]
(parse-csv rows))))

View File

@@ -10,38 +10,40 @@
(every? #(re-find % text) keywords))
(defn extract [wb {:keys [extract vendor]}]
(reduce-kv
(fn [invoice k [regex offset-row offset-column extract-regex]]
(assoc invoice k
(->> wb
(d/sheet-seq)
first
(d/cell-seq)
(filter (fn [cell]
(re-find regex (str (d/read-cell cell)))))
(map (fn [cell]
(let [address (.getAddress cell)
cell-value (str (d/read-cell (d/select-cell (.toString (CellAddress. (+ offset-row (.getRow address)) (+ offset-column (.getColumn address)) ))
(first (d/sheet-seq wb)))))]
(if extract-regex
(second (re-find extract-regex cell-value))
cell-value))))
first)))
{:vendor-code vendor}
extract))
(if (fn? extract)
(extract wb vendor)
[(reduce-kv
(fn [invoice k [regex offset-row offset-column extract-regex]]
(assoc invoice k
(->> wb
(d/sheet-seq)
first
(d/cell-seq)
(filter (fn [cell]
(re-find regex (str (d/read-cell cell)))))
(map (fn [cell]
(let [address (.getAddress cell)
cell-value (str (d/read-cell (d/select-cell (.toString (CellAddress. (+ offset-row (.getRow address)) (+ offset-column (.getColumn address)) ))
(first (d/sheet-seq wb)))))]
(if extract-regex
(second (re-find extract-regex cell-value))
cell-value))))
first)))
{:vendor-code vendor}
extract)]))
(defn parse-file
[file filename]
[(let [wb (d/load-workbook file)
text (->> wb
(d/sheet-seq)
first
(d/cell-seq)
(map d/read-cell)
(str/join " "))]
(->> t/excel-templates
(filter (partial template-applies? text))
first
(extract wb)
))])
(let [wb (d/load-workbook file)
text (->> wb
(d/sheet-seq)
first
(d/cell-seq)
(map d/read-cell)
(str/join " "))]
(->> t/excel-templates
(filter (partial template-applies? text))
first
(extract wb)
)))

View File

@@ -1,4 +1,7 @@
(ns auto-ap.parse.templates)
(ns auto-ap.parse.templates
(:require [dk.ative.docjure.spreadsheet :as d]
[clojure.string :as str])
(:import (org.apache.poi.ss.util CellAddress)))
(def pdf-templates
@@ -48,7 +51,40 @@
:customer-identifier #"Bill To:[^\n]+\n\s*([\w ]+)"
:invoice-number #"Invoice\s([\w\./]+)*"
:total #"Total Invoice\s+([0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]}}])
:parser {:date [:clj-time "MM/dd/yy"]}}
{:vendor "Southbay Fresh Produce"
:keywords [#"SOUTH BAY FRESH PRODUCE"]
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"FAX:[^\n]+\n\s+([A-Za-z ]+)\s{2}"
:invoice-number #"^[0-9]+/[0-9]+/[0-9]+\s+(\d+)"
:total #"\$([0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]}
:multi #"\n"
:multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+(\d+)"}
{:vendor "Performance Food Group"
:keywords [#"performancefoodservice"]
:extract {:date #"DELIVER TO[^\n]+\n.+?(?=[0-9]+/[0-9]+/[0-9]+)([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"DELIVER TO[^\n]+\n\s*[\S ]+?(?=\s{2,}([\S ]+?)\s{2,})" ;; ([\S ]+)\s{2,}
:invoice-number #"DELIVER TO[^\n]+\n.+?(?=\d+)(\d+)\s*\n"
:total #"([0-9.]+)\s+Status Code"}
:parser {:date [:clj-time "MM/dd/yy"]}}
{:vendor "US Foods"
:keywords [#"US Foods"]
:extract {:date #"INVOICE NUMBER[^\n]+\n\n\d+\s+\d+\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,}
:invoice-number #"INVOICE NUMBER[^\n]+\n\n\d+\s+(\d+)"
:total #"DELIVERED AMOUNT\s+\$([0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]}}
{:vendor "Sysco"
:keywords [#"SYSCO"]
:extract {:date #"INVOICE NUMBER[^\n]+\n([^\n]+)\n"
:customer-identifier #"INVOICE NUMBER[^\n]+\n[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,}
:invoice-number #"INVOICE NUMBER[^\n]+\n[^\n]+\n.*?(?=[\d]{9})(\d{9})"
:total #"\s{2,}INVOICE\s{2,}.*?(?=TOTAL)TOTAL\s+([0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]}}])
(defn offset [c x y]
(.toString (CellAddress. (+ y (.getRow (.getAddress c))) (+ x (.getColumn (.getAddress c))) )))
(def excel-templates
[{:vendor "Isp Productions"
@@ -62,4 +98,27 @@
:extract {:customer-identifier [#"Customer #" 1 0]
:total [#"Total Invoice" 0 5]
:date [#"Date" 0 0 #"Date: (.*)"]
:invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]}}])
:invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]}}
{:vendor "DVW Commercial"
:keywords [#"Thank you!!!"]
:extract (fn [wb vendor]
(let [[sheet] (d/sheet-seq wb)]
(transduce (comp (filter (fn [c]
(re-find #"Invoice" (str (d/read-cell c)))))
(map (fn [c]
(let [customer-identifier (d/read-cell (->> (d/select-cell (offset c -3 0) sheet)
(iterate (fn [c]
(d/select-cell (offset c 0 -1) sheet)))
(filter (fn [c]
(not (str/blank? (d/read-cell c)))))
first))]
{:customer-identifier customer-identifier
:text customer-identifier
:full-text customer-identifier
:date (d/read-cell (d/select-cell (offset c 2 0) sheet))
:invoice-number (d/read-cell (d/select-cell (offset c 4 0) sheet))
:total (str (d/read-cell (d/select-cell (offset c 8 0) sheet)))
:vendor-code vendor}))))
conj
[]
(d/cell-seq sheet))))}])

View File

@@ -0,0 +1,25 @@
(ns auto-ap.parse.util
(:require [clj-fuzzy.metrics :as m]
[clojure.java.shell :as sh]
[clojure.string :as str]
[clj-time.format :as f]
[clj-time.core :as time]))
(defmulti parse-value (fn [method _ _]
method))
(defmethod parse-value :trim-commas
[_ _ value]
(str/replace value #"," "")
)
(defmethod parse-value :clj-time
[_ format value]
(time/from-time-zone (f/parse (f/formatter format) value)
(time/time-zone-for-id "America/Los_Angeles")))
(defmethod parse-value nil
[_ _ value]
value)

View File

@@ -8,6 +8,7 @@
[auto-ap.datomic :refer [remove-nils uri]]
[datomic.api :as d]
[auto-ap.parse :as parse]
[auto-ap.parse.util :as parse-u]
[auto-ap.graphql.utils :refer [assert-admin]]
[auto-ap.routes.utils :refer [wrap-secure]]
[clj-time.coerce :refer [to-date]]
@@ -89,7 +90,7 @@
(defn parse-date [{:keys [raw-date]}]
(try
(parse/parse-value :clj-time "MM/dd/yyyy" raw-date)
(parse-u/parse-value :clj-time "MM/dd/yyyy" raw-date)
(catch Exception e
(throw (Exception. (str "Could not parse date from '" raw-date "'") e)))))
@@ -167,32 +168,41 @@
(let [clients (d-clients/get-all)
_ (clojure.pprint/pprint imports)
transactions (reduce (fn [result {:keys [invoice-number customer-identifier total date vendor-code text] :as info}]
transactions (reduce (fn [result {:keys [invoice-number customer-identifier total date vendor-code text full-text] :as info}]
(println "searching for" vendor-code)
(let [[matching-vendor default-account] (->> (d/query
(cond-> {:query {:find ['?vendor '?default-account]
{:query {:find ['?vendor '?default-account]
:in ['$ '?vendor-name]
:where ['[?vendor :vendor/name ?vendor-name]
'[?vendor :vendor/default-account ?default-account]]}
:args [(d/db (d/connect uri)) vendor-code]}))
first)
:args [(d/db (d/connect uri)) vendor-code]})
first)
_ (println "matching" customer-identifier "-" matching-vendor)
matching-client (parse/best-match clients customer-identifier)
_ (println "New invoice matches client '" matching-client "', vendor '" matching-vendor "', account '" default-account "'")
matching-location (parse/best-location-match matching-client text )
[existing-id existing-outstanding-balance existing-status import-status] (->> (d/query
(cond-> {:query {:find ['?e '?outstanding-balance '?status '?import-status2]
:in ['$ '?invoice-number '?vendor '?client]
:where '[[?e :invoice/invoice-number ?invoice-number]
[?e :invoice/vendor ?vendor]
[?e :invoice/client ?client]
[?e :invoice/outstanding-balance ?outstanding-balance]
[?e :invoice/status ?status]
[?e :invoice/import-status ?import-status]
[?import-status :db/ident ?import-status2]]}
:args [(d/db (d/connect uri)) invoice-number matching-vendor (:db/id matching-client)]}))
first)]
_ (println "New invoice matches client" matching-client)
matching-location (parse/best-location-match matching-client text full-text)
[existing-id existing-outstanding-balance existing-status import-status] (when (and matching-client matching-location)
(->> (d/query
(cond-> {:query {:find ['?e '?outstanding-balance '?status '?import-status2]
:in ['$ '?invoice-number '?vendor '?client]
:where '[[?e :invoice/invoice-number ?invoice-number]
[?e :invoice/vendor ?vendor]
[?e :invoice/client ?client]
[?e :invoice/outstanding-balance ?outstanding-balance]
[?e :invoice/status ?status]
[?e :invoice/import-status ?import-status]
[?import-status :db/ident ?import-status2]]}
:args [(d/db (d/connect uri)) invoice-number matching-vendor (:db/id matching-client)]}))
first))]
(if (= :import-status/imported import-status)
(cond
(not (and matching-location matching-client))
result
(= :import-status/imported import-status)
result
:else
(conj result (remove-nils #:invoice {:invoice/client (:db/id matching-client)
:invoice/vendor matching-vendor
:invoice/invoice-number invoice-number

View File

@@ -7,6 +7,7 @@
[auto-ap.views.components.layouts :refer [side-bar-layout]]
[auto-ap.views.components.invoices.side-bar :refer [invoices-side-bar]]
[auto-ap.views.utils :refer [dispatch-event]]
[auto-ap.utils :refer [by]]
[auto-ap.entities.vendors :as vendor]
[auto-ap.views.components.invoice-table :refer [invoice-table] :as invoice-table]
[cljsjs.dropzone :as dropzone]
@@ -67,6 +68,8 @@
(fn [db [_ data]]
(-> db
(assoc ::invoice-page (first (:invoice-page data)))
(update-in [::invoice-page] (fn [ip]
(assoc ip :checked (by :id (:invoices ip)))))
(assoc-in [:status :loading] false))))
(re-frame/reg-event-fx