merged.
This commit is contained in:
@@ -1,54 +1,46 @@
|
||||
(ns auto-ap.parse
|
||||
(:require [auto-ap.parse.excel :as excel]
|
||||
[auto-ap.parse.templates :as t]
|
||||
[auto-ap.parse.util :as u]
|
||||
[auto-ap.parse.csv :as csv]
|
||||
[clj-fuzzy.metrics :as m]
|
||||
[clojure.java.shell :as sh]
|
||||
[clojure.string :as str]
|
||||
[clj-time.format :as f]
|
||||
[clj-time.core :as time]))
|
||||
[clj-time.core :as time]
|
||||
[clojure.set :as set]))
|
||||
|
||||
|
||||
(defmulti parse-value (fn [method _ _]
|
||||
method))
|
||||
|
||||
|
||||
(defmethod parse-value :trim-commas
|
||||
[_ _ value]
|
||||
(str/replace value #"," "")
|
||||
)
|
||||
|
||||
(defmethod parse-value :clj-time
|
||||
[_ format value]
|
||||
(time/from-time-zone (f/parse (f/formatter format) value)
|
||||
(time/time-zone-for-id "America/Los_Angeles")))
|
||||
|
||||
(defmethod parse-value nil
|
||||
[_ _ value]
|
||||
value)
|
||||
|
||||
(def last-text (atom nil))
|
||||
|
||||
|
||||
(defn template-applies? [text {:keys [keywords]}]
|
||||
(every? #(re-find % text) keywords))
|
||||
|
||||
(defn extract-template [text template]
|
||||
(if (:multi template)
|
||||
(mapcat
|
||||
#(extract-template % (dissoc template :multi))
|
||||
(str/split text (:multi template)))
|
||||
(defn extract-template
|
||||
([text template]
|
||||
(if (:multi template)
|
||||
(mapcat
|
||||
#(extract-template % text (dissoc template :multi))
|
||||
(str/split text (:multi template)))
|
||||
|
||||
(when template
|
||||
[(->> template
|
||||
:extract
|
||||
(reduce-kv
|
||||
(fn [result k v]
|
||||
(let [value (some-> (first (map second (re-seq v text)))
|
||||
str/trim )
|
||||
[value-parser parser-params] (-> template :parser k)]
|
||||
(assoc result k (parse-value value-parser parser-params value))))
|
||||
{:vendor-code (:vendor template)
|
||||
:text text}))])))
|
||||
(extract-template text text template)))
|
||||
([text full-text template]
|
||||
(when (and template
|
||||
(or (not (:multi-match? template))
|
||||
(re-find (:multi-match? template) text )))
|
||||
[(->> template
|
||||
:extract
|
||||
(reduce-kv
|
||||
(fn [result k v]
|
||||
(let [value (some-> (or (first (map second (re-seq v text)))
|
||||
(first (map second (re-seq v full-text))))
|
||||
str/trim )
|
||||
[value-parser parser-params] (-> template :parser k)]
|
||||
(assoc result k (u/parse-value value-parser parser-params value))))
|
||||
{:vendor-code (:vendor template)
|
||||
:text text
|
||||
:full-text full-text}))])))
|
||||
|
||||
(defn parse [text]
|
||||
(reset! last-text text)
|
||||
@@ -67,6 +59,11 @@
|
||||
:out
|
||||
parse))
|
||||
|
||||
(defmethod parse-file
|
||||
"csv"
|
||||
[file filename]
|
||||
(csv/parse-file file filename))
|
||||
|
||||
(defmethod parse-file
|
||||
"xls"
|
||||
[file filename]
|
||||
@@ -79,22 +76,50 @@
|
||||
(excel/parse-file file filename))
|
||||
|
||||
(defn best-match [clients invoice-client-name]
|
||||
(->> clients
|
||||
|
||||
(mapcat (fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}]
|
||||
(map (fn [m]
|
||||
[client (m/jaccard (.toLowerCase invoice-client-name) (.toLowerCase m))])
|
||||
(conj matches name))))
|
||||
(filter #(< (second %) 0.25))
|
||||
(sort-by second)
|
||||
ffirst))
|
||||
(let [fuzzy-match (->> clients
|
||||
(mapcat (fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}]
|
||||
(map (fn [m]
|
||||
(println m invoice-client-name)
|
||||
[client (m/jaccard (.toLowerCase invoice-client-name) (.toLowerCase m))])
|
||||
(conj matches name))))
|
||||
(filter #(< (second %) 0.25))
|
||||
(sort-by second)
|
||||
ffirst)
|
||||
|
||||
(defn best-location-match [client text]
|
||||
word-set (set (str/split (.toLowerCase invoice-client-name) #"\s" ))
|
||||
client-word-match (->> clients
|
||||
(map
|
||||
(fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}]
|
||||
(let [client-words (-> #{}
|
||||
(into
|
||||
(mapcat
|
||||
(fn [match] (str/split (.toLowerCase match) #"\s" ))
|
||||
matches))
|
||||
(into
|
||||
(str/split (.toLowerCase name) #"\s" )))]
|
||||
[client (count (set/intersection client-words word-set))])))
|
||||
(filter (fn [[_ c]] (> c 0)))
|
||||
(sort-by (fn [[_ c]] c))
|
||||
reverse
|
||||
ffirst)]
|
||||
(or fuzzy-match client-word-match)))
|
||||
|
||||
(defn best-location-match [client text full-text]
|
||||
(or (->> client
|
||||
:client/location-matches
|
||||
(mapcat (fn [{:keys [:location-match/location :location-match/matches]}]
|
||||
|
||||
(map (fn [match] [location match]) matches)))
|
||||
(filter (fn [[location match]] (re-find (re-pattern (str "(?i)" match)) text)) )
|
||||
(filter (fn [[location match]]
|
||||
(println "loc " location match text)
|
||||
(re-find (re-pattern (str "(?i)" match)) text)) )
|
||||
first
|
||||
first)
|
||||
(->> client
|
||||
:client/location-matches
|
||||
(mapcat (fn [{:keys [:location-match/location :location-match/matches]}]
|
||||
(map (fn [match] [location match]) matches)))
|
||||
(filter (fn [[location match]] (re-find (re-pattern (str "(?i)" match)) full-text)) )
|
||||
first
|
||||
first)
|
||||
(:client/default-location client)
|
||||
|
||||
46
src/clj/auto_ap/parse/csv.clj
Normal file
46
src/clj/auto_ap/parse/csv.clj
Normal file
@@ -0,0 +1,46 @@
|
||||
(ns auto-ap.parse.csv
|
||||
(:require [auto-ap.parse.util :as u]
|
||||
[clojure.data.csv :as csv]
|
||||
[clojure.java.io :as io]
|
||||
[clojure.string :as str]))
|
||||
|
||||
(defn determine
|
||||
[[header :as z]]
|
||||
(prn header)
|
||||
(cond (str/includes? (second header) "Customer's PO No.")
|
||||
:mama-lus
|
||||
|
||||
:else
|
||||
nil))
|
||||
|
||||
(defmulti parse-csv
|
||||
determine
|
||||
:default (fn default [rows]
|
||||
nil))
|
||||
|
||||
(defmethod parse-csv :mama-lus
|
||||
[rows]
|
||||
(println "MAMA LU")
|
||||
(transduce
|
||||
(comp (drop 1)
|
||||
(map (fn [[_ po-number despatch-number invoice-number invoice-date customer value :as row]]
|
||||
{:vendor-code "Mama Lu's Foods"
|
||||
:customer-identifier customer
|
||||
:invoice-number (str po-number "-" invoice-number )
|
||||
:date (u/parse-value :clj-time "MM/dd/yy HH:ss" invoice-date)
|
||||
:total value
|
||||
:text (str/join " " row)})))
|
||||
conj
|
||||
[]
|
||||
rows))
|
||||
|
||||
(defmethod parse-csv nil
|
||||
[rows]
|
||||
nil)
|
||||
|
||||
(defn parse-file [file filename]
|
||||
(println "HEREERE")
|
||||
(with-open [reader (io/reader file)]
|
||||
(let [rows (csv/read-csv reader :separator \,)]
|
||||
(parse-csv rows))))
|
||||
|
||||
@@ -10,38 +10,40 @@
|
||||
(every? #(re-find % text) keywords))
|
||||
|
||||
(defn extract [wb {:keys [extract vendor]}]
|
||||
(reduce-kv
|
||||
(fn [invoice k [regex offset-row offset-column extract-regex]]
|
||||
(assoc invoice k
|
||||
(->> wb
|
||||
(d/sheet-seq)
|
||||
first
|
||||
(d/cell-seq)
|
||||
(filter (fn [cell]
|
||||
(re-find regex (str (d/read-cell cell)))))
|
||||
(map (fn [cell]
|
||||
(let [address (.getAddress cell)
|
||||
cell-value (str (d/read-cell (d/select-cell (.toString (CellAddress. (+ offset-row (.getRow address)) (+ offset-column (.getColumn address)) ))
|
||||
(first (d/sheet-seq wb)))))]
|
||||
(if extract-regex
|
||||
(second (re-find extract-regex cell-value))
|
||||
|
||||
cell-value))))
|
||||
first)))
|
||||
{:vendor-code vendor}
|
||||
extract))
|
||||
(if (fn? extract)
|
||||
(extract wb vendor)
|
||||
[(reduce-kv
|
||||
(fn [invoice k [regex offset-row offset-column extract-regex]]
|
||||
(assoc invoice k
|
||||
(->> wb
|
||||
(d/sheet-seq)
|
||||
first
|
||||
(d/cell-seq)
|
||||
(filter (fn [cell]
|
||||
(re-find regex (str (d/read-cell cell)))))
|
||||
(map (fn [cell]
|
||||
(let [address (.getAddress cell)
|
||||
cell-value (str (d/read-cell (d/select-cell (.toString (CellAddress. (+ offset-row (.getRow address)) (+ offset-column (.getColumn address)) ))
|
||||
(first (d/sheet-seq wb)))))]
|
||||
(if extract-regex
|
||||
(second (re-find extract-regex cell-value))
|
||||
|
||||
cell-value))))
|
||||
first)))
|
||||
{:vendor-code vendor}
|
||||
extract)]))
|
||||
|
||||
(defn parse-file
|
||||
[file filename]
|
||||
[(let [wb (d/load-workbook file)
|
||||
text (->> wb
|
||||
(d/sheet-seq)
|
||||
first
|
||||
(d/cell-seq)
|
||||
(map d/read-cell)
|
||||
(str/join " "))]
|
||||
(->> t/excel-templates
|
||||
(filter (partial template-applies? text))
|
||||
first
|
||||
(extract wb)
|
||||
))])
|
||||
(let [wb (d/load-workbook file)
|
||||
text (->> wb
|
||||
(d/sheet-seq)
|
||||
first
|
||||
(d/cell-seq)
|
||||
(map d/read-cell)
|
||||
(str/join " "))]
|
||||
(->> t/excel-templates
|
||||
(filter (partial template-applies? text))
|
||||
first
|
||||
(extract wb)
|
||||
)))
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
(ns auto-ap.parse.templates)
|
||||
(ns auto-ap.parse.templates
|
||||
(:require [dk.ative.docjure.spreadsheet :as d]
|
||||
[clojure.string :as str])
|
||||
(:import (org.apache.poi.ss.util CellAddress)))
|
||||
|
||||
|
||||
(def pdf-templates
|
||||
@@ -48,7 +51,40 @@
|
||||
:customer-identifier #"Bill To:[^\n]+\n\s*([\w ]+)"
|
||||
:invoice-number #"Invoice\s([\w\./]+)*"
|
||||
:total #"Total Invoice\s+([0-9.]+)"}
|
||||
:parser {:date [:clj-time "MM/dd/yy"]}}])
|
||||
:parser {:date [:clj-time "MM/dd/yy"]}}
|
||||
{:vendor "Southbay Fresh Produce"
|
||||
:keywords [#"SOUTH BAY FRESH PRODUCE"]
|
||||
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
|
||||
:customer-identifier #"FAX:[^\n]+\n\s+([A-Za-z ]+)\s{2}"
|
||||
:invoice-number #"^[0-9]+/[0-9]+/[0-9]+\s+(\d+)"
|
||||
:total #"\$([0-9.]+)"}
|
||||
:parser {:date [:clj-time "MM/dd/yyyy"]}
|
||||
:multi #"\n"
|
||||
:multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+(\d+)"}
|
||||
{:vendor "Performance Food Group"
|
||||
:keywords [#"performancefoodservice"]
|
||||
:extract {:date #"DELIVER TO[^\n]+\n.+?(?=[0-9]+/[0-9]+/[0-9]+)([0-9]+/[0-9]+/[0-9]+)"
|
||||
:customer-identifier #"DELIVER TO[^\n]+\n\s*[\S ]+?(?=\s{2,}([\S ]+?)\s{2,})" ;; ([\S ]+)\s{2,}
|
||||
:invoice-number #"DELIVER TO[^\n]+\n.+?(?=\d+)(\d+)\s*\n"
|
||||
:total #"([0-9.]+)\s+Status Code"}
|
||||
:parser {:date [:clj-time "MM/dd/yy"]}}
|
||||
{:vendor "US Foods"
|
||||
:keywords [#"US Foods"]
|
||||
:extract {:date #"INVOICE NUMBER[^\n]+\n\n\d+\s+\d+\s+([0-9]+/[0-9]+/[0-9]+)"
|
||||
:customer-identifier #"BILL TO[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,}
|
||||
:invoice-number #"INVOICE NUMBER[^\n]+\n\n\d+\s+(\d+)"
|
||||
:total #"DELIVERED AMOUNT\s+\$([0-9.]+)"}
|
||||
:parser {:date [:clj-time "MM/dd/yyyy"]}}
|
||||
{:vendor "Sysco"
|
||||
:keywords [#"SYSCO"]
|
||||
:extract {:date #"INVOICE NUMBER[^\n]+\n([^\n]+)\n"
|
||||
:customer-identifier #"INVOICE NUMBER[^\n]+\n[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,}
|
||||
:invoice-number #"INVOICE NUMBER[^\n]+\n[^\n]+\n.*?(?=[\d]{9})(\d{9})"
|
||||
:total #"\s{2,}INVOICE\s{2,}.*?(?=TOTAL)TOTAL\s+([0-9.]+)"}
|
||||
:parser {:date [:clj-time "MM/dd/yyyy"]}}])
|
||||
|
||||
(defn offset [c x y]
|
||||
(.toString (CellAddress. (+ y (.getRow (.getAddress c))) (+ x (.getColumn (.getAddress c))) )))
|
||||
|
||||
(def excel-templates
|
||||
[{:vendor "Isp Productions"
|
||||
@@ -62,4 +98,27 @@
|
||||
:extract {:customer-identifier [#"Customer #" 1 0]
|
||||
:total [#"Total Invoice" 0 5]
|
||||
:date [#"Date" 0 0 #"Date: (.*)"]
|
||||
:invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]}}])
|
||||
:invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]}}
|
||||
{:vendor "DVW Commercial"
|
||||
:keywords [#"Thank you!!!"]
|
||||
:extract (fn [wb vendor]
|
||||
(let [[sheet] (d/sheet-seq wb)]
|
||||
(transduce (comp (filter (fn [c]
|
||||
(re-find #"Invoice" (str (d/read-cell c)))))
|
||||
(map (fn [c]
|
||||
(let [customer-identifier (d/read-cell (->> (d/select-cell (offset c -3 0) sheet)
|
||||
(iterate (fn [c]
|
||||
(d/select-cell (offset c 0 -1) sheet)))
|
||||
(filter (fn [c]
|
||||
(not (str/blank? (d/read-cell c)))))
|
||||
first))]
|
||||
{:customer-identifier customer-identifier
|
||||
:text customer-identifier
|
||||
:full-text customer-identifier
|
||||
:date (d/read-cell (d/select-cell (offset c 2 0) sheet))
|
||||
:invoice-number (d/read-cell (d/select-cell (offset c 4 0) sheet))
|
||||
:total (str (d/read-cell (d/select-cell (offset c 8 0) sheet)))
|
||||
:vendor-code vendor}))))
|
||||
conj
|
||||
[]
|
||||
(d/cell-seq sheet))))}])
|
||||
|
||||
25
src/clj/auto_ap/parse/util.clj
Normal file
25
src/clj/auto_ap/parse/util.clj
Normal file
@@ -0,0 +1,25 @@
|
||||
(ns auto-ap.parse.util
|
||||
(:require [clj-fuzzy.metrics :as m]
|
||||
[clojure.java.shell :as sh]
|
||||
[clojure.string :as str]
|
||||
[clj-time.format :as f]
|
||||
[clj-time.core :as time]))
|
||||
|
||||
(defmulti parse-value (fn [method _ _]
|
||||
method))
|
||||
|
||||
|
||||
(defmethod parse-value :trim-commas
|
||||
[_ _ value]
|
||||
(str/replace value #"," "")
|
||||
)
|
||||
|
||||
(defmethod parse-value :clj-time
|
||||
[_ format value]
|
||||
(time/from-time-zone (f/parse (f/formatter format) value)
|
||||
(time/time-zone-for-id "America/Los_Angeles")))
|
||||
|
||||
(defmethod parse-value nil
|
||||
[_ _ value]
|
||||
value)
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
[auto-ap.datomic :refer [remove-nils uri]]
|
||||
[datomic.api :as d]
|
||||
[auto-ap.parse :as parse]
|
||||
[auto-ap.parse.util :as parse-u]
|
||||
[auto-ap.graphql.utils :refer [assert-admin]]
|
||||
[auto-ap.routes.utils :refer [wrap-secure]]
|
||||
[clj-time.coerce :refer [to-date]]
|
||||
@@ -89,7 +90,7 @@
|
||||
|
||||
(defn parse-date [{:keys [raw-date]}]
|
||||
(try
|
||||
(parse/parse-value :clj-time "MM/dd/yyyy" raw-date)
|
||||
(parse-u/parse-value :clj-time "MM/dd/yyyy" raw-date)
|
||||
(catch Exception e
|
||||
(throw (Exception. (str "Could not parse date from '" raw-date "'") e)))))
|
||||
|
||||
@@ -167,32 +168,41 @@
|
||||
(let [clients (d-clients/get-all)
|
||||
_ (clojure.pprint/pprint imports)
|
||||
|
||||
transactions (reduce (fn [result {:keys [invoice-number customer-identifier total date vendor-code text] :as info}]
|
||||
transactions (reduce (fn [result {:keys [invoice-number customer-identifier total date vendor-code text full-text] :as info}]
|
||||
(println "searching for" vendor-code)
|
||||
(let [[matching-vendor default-account] (->> (d/query
|
||||
(cond-> {:query {:find ['?vendor '?default-account]
|
||||
{:query {:find ['?vendor '?default-account]
|
||||
:in ['$ '?vendor-name]
|
||||
:where ['[?vendor :vendor/name ?vendor-name]
|
||||
'[?vendor :vendor/default-account ?default-account]]}
|
||||
:args [(d/db (d/connect uri)) vendor-code]}))
|
||||
first)
|
||||
:args [(d/db (d/connect uri)) vendor-code]})
|
||||
first)
|
||||
_ (println "matching" customer-identifier "-" matching-vendor)
|
||||
matching-client (parse/best-match clients customer-identifier)
|
||||
_ (println "New invoice matches client '" matching-client "', vendor '" matching-vendor "', account '" default-account "'")
|
||||
matching-location (parse/best-location-match matching-client text )
|
||||
[existing-id existing-outstanding-balance existing-status import-status] (->> (d/query
|
||||
(cond-> {:query {:find ['?e '?outstanding-balance '?status '?import-status2]
|
||||
:in ['$ '?invoice-number '?vendor '?client]
|
||||
:where '[[?e :invoice/invoice-number ?invoice-number]
|
||||
[?e :invoice/vendor ?vendor]
|
||||
[?e :invoice/client ?client]
|
||||
[?e :invoice/outstanding-balance ?outstanding-balance]
|
||||
[?e :invoice/status ?status]
|
||||
[?e :invoice/import-status ?import-status]
|
||||
[?import-status :db/ident ?import-status2]]}
|
||||
:args [(d/db (d/connect uri)) invoice-number matching-vendor (:db/id matching-client)]}))
|
||||
first)]
|
||||
_ (println "New invoice matches client" matching-client)
|
||||
matching-location (parse/best-location-match matching-client text full-text)
|
||||
[existing-id existing-outstanding-balance existing-status import-status] (when (and matching-client matching-location)
|
||||
(->> (d/query
|
||||
(cond-> {:query {:find ['?e '?outstanding-balance '?status '?import-status2]
|
||||
:in ['$ '?invoice-number '?vendor '?client]
|
||||
:where '[[?e :invoice/invoice-number ?invoice-number]
|
||||
[?e :invoice/vendor ?vendor]
|
||||
[?e :invoice/client ?client]
|
||||
[?e :invoice/outstanding-balance ?outstanding-balance]
|
||||
[?e :invoice/status ?status]
|
||||
[?e :invoice/import-status ?import-status]
|
||||
[?import-status :db/ident ?import-status2]]}
|
||||
:args [(d/db (d/connect uri)) invoice-number matching-vendor (:db/id matching-client)]}))
|
||||
first))]
|
||||
|
||||
(if (= :import-status/imported import-status)
|
||||
(cond
|
||||
(not (and matching-location matching-client))
|
||||
result
|
||||
|
||||
(= :import-status/imported import-status)
|
||||
result
|
||||
|
||||
:else
|
||||
(conj result (remove-nils #:invoice {:invoice/client (:db/id matching-client)
|
||||
:invoice/vendor matching-vendor
|
||||
:invoice/invoice-number invoice-number
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
[auto-ap.views.components.layouts :refer [side-bar-layout]]
|
||||
[auto-ap.views.components.invoices.side-bar :refer [invoices-side-bar]]
|
||||
[auto-ap.views.utils :refer [dispatch-event]]
|
||||
[auto-ap.utils :refer [by]]
|
||||
[auto-ap.entities.vendors :as vendor]
|
||||
[auto-ap.views.components.invoice-table :refer [invoice-table] :as invoice-table]
|
||||
[cljsjs.dropzone :as dropzone]
|
||||
@@ -67,6 +68,8 @@
|
||||
(fn [db [_ data]]
|
||||
(-> db
|
||||
(assoc ::invoice-page (first (:invoice-page data)))
|
||||
(update-in [::invoice-page] (fn [ip]
|
||||
(assoc ip :checked (by :id (:invoices ip)))))
|
||||
(assoc-in [:status :loading] false))))
|
||||
|
||||
(re-frame/reg-event-fx
|
||||
|
||||
Reference in New Issue
Block a user