From 823d4998a7230adea9f0237d0e83a2ac3f1a2009 Mon Sep 17 00:00:00 2001 From: Bryce Covert Date: Mon, 21 Oct 2019 23:25:36 -0700 Subject: [PATCH 1/6] Supports importing mama lu --- src/clj/auto_ap/parse.clj | 65 ++++++++++++++++------------- src/clj/auto_ap/parse/csv.clj | 46 ++++++++++++++++++++ src/clj/auto_ap/parse/util.clj | 25 +++++++++++ src/clj/auto_ap/routes/invoices.clj | 33 +++++++++------ 4 files changed, 127 insertions(+), 42 deletions(-) create mode 100644 src/clj/auto_ap/parse/csv.clj create mode 100644 src/clj/auto_ap/parse/util.clj diff --git a/src/clj/auto_ap/parse.clj b/src/clj/auto_ap/parse.clj index 30cf3cf6..8f3ce7cd 100644 --- a/src/clj/auto_ap/parse.clj +++ b/src/clj/auto_ap/parse.clj @@ -1,31 +1,16 @@ (ns auto-ap.parse (:require [auto-ap.parse.excel :as excel] [auto-ap.parse.templates :as t] + [auto-ap.parse.util :as u] + [auto-ap.parse.csv :as csv] [clj-fuzzy.metrics :as m] [clojure.java.shell :as sh] [clojure.string :as str] [clj-time.format :as f] - [clj-time.core :as time])) + [clj-time.core :as time] + [clojure.set :as set])) -(defmulti parse-value (fn [method _ _] - method)) - - -(defmethod parse-value :trim-commas - [_ _ value] - (str/replace value #"," "") - ) - -(defmethod parse-value :clj-time - [_ format value] - (time/from-time-zone (f/parse (f/formatter format) value) - (time/time-zone-for-id "America/Los_Angeles"))) - -(defmethod parse-value nil - [_ _ value] - value) - (def last-text (atom nil)) @@ -46,7 +31,7 @@ (let [value (some-> (first (map second (re-seq v text))) str/trim ) [value-parser parser-params] (-> template :parser k)] - (assoc result k (parse-value value-parser parser-params value)))) + (assoc result k (u/parse-value value-parser parser-params value)))) {:vendor-code (:vendor template) :text text}))]))) @@ -67,6 +52,11 @@ :out parse)) +(defmethod parse-file + "csv" + [file filename] + (csv/parse-file file filename)) + (defmethod parse-file "xls" [file filename] @@ -79,15 +69,32 @@ (excel/parse-file file filename)) (defn best-match [clients invoice-client-name] - (->> clients - - (mapcat (fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}] - (map (fn [m] - [client (m/jaccard (.toLowerCase invoice-client-name) (.toLowerCase m))]) - (conj matches name)))) - (filter #(< (second %) 0.25)) - (sort-by second) - ffirst)) + (let [fuzzy-match (->> clients + (mapcat (fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}] + (map (fn [m] + [client (m/jaccard (.toLowerCase invoice-client-name) (.toLowerCase m))]) + (conj matches name)))) + (filter #(< (second %) 0.25)) + (sort-by second) + ffirst) + + word-set (set (str/split (.toLowerCase invoice-client-name) #"\s" )) + client-word-match (->> clients + (map + (fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}] + (let [client-words (-> #{} + (into + (mapcat + (fn [match] (str/split (.toLowerCase match) #"\s" )) + matches)) + (into + (str/split (.toLowerCase name) #"\s" )))] + [client (count (set/intersection client-words word-set))]))) + (filter (fn [[_ c]] (> c 0))) + (sort-by (fn [[_ c]] c)) + reverse + ffirst)] + (or fuzzy-match client-word-match))) (defn best-location-match [client text] (or (->> client diff --git a/src/clj/auto_ap/parse/csv.clj b/src/clj/auto_ap/parse/csv.clj new file mode 100644 index 00000000..bfbf39c1 --- /dev/null +++ b/src/clj/auto_ap/parse/csv.clj @@ -0,0 +1,46 @@ +(ns auto-ap.parse.csv + (:require [auto-ap.parse.util :as u] + [clojure.data.csv :as csv] + [clojure.java.io :as io] + [clojure.string :as str])) + +(defn determine + [[header :as z]] + (prn header) + (cond (str/includes? (second header) "Customer's PO No.") + :mama-lus + + :else + nil)) + +(defmulti parse-csv + determine + :default (fn default [rows] + nil)) + +(defmethod parse-csv :mama-lus + [rows] + (println "MAMA LU") + (transduce + (comp (drop 1) + (map (fn [[_ po-number despatch-number invoice-number invoice-date customer value :as row]] + {:vendor-code "Mama Lu's Foods" + :customer-identifier customer + :invoice-number (str invoice-number " " po-number) + :date (u/parse-value :clj-time "MM/dd/yy HH:ss" invoice-date) + :total value + :text (str/join " " row)}))) + conj + [] + rows)) + +(defmethod parse-csv nil + [rows] + nil) + +(defn parse-file [file filename] + (println "HEREERE") + (with-open [reader (io/reader file)] + (let [rows (csv/read-csv reader :separator \,)] + (parse-csv rows)))) + diff --git a/src/clj/auto_ap/parse/util.clj b/src/clj/auto_ap/parse/util.clj new file mode 100644 index 00000000..380d9b7c --- /dev/null +++ b/src/clj/auto_ap/parse/util.clj @@ -0,0 +1,25 @@ +(ns auto-ap.parse.util + (:require [clj-fuzzy.metrics :as m] + [clojure.java.shell :as sh] + [clojure.string :as str] + [clj-time.format :as f] + [clj-time.core :as time])) + +(defmulti parse-value (fn [method _ _] + method)) + + +(defmethod parse-value :trim-commas + [_ _ value] + (str/replace value #"," "") + ) + +(defmethod parse-value :clj-time + [_ format value] + (time/from-time-zone (f/parse (f/formatter format) value) + (time/time-zone-for-id "America/Los_Angeles"))) + +(defmethod parse-value nil + [_ _ value] + value) + diff --git a/src/clj/auto_ap/routes/invoices.clj b/src/clj/auto_ap/routes/invoices.clj index d02e929c..da54221d 100644 --- a/src/clj/auto_ap/routes/invoices.clj +++ b/src/clj/auto_ap/routes/invoices.clj @@ -175,21 +175,28 @@ matching-client (parse/best-match clients customer-identifier) _ (println "New invoice matches client" matching-client) matching-location (parse/best-location-match matching-client text ) - [existing-id existing-outstanding-balance existing-status import-status] (->> (d/query - (cond-> {:query {:find ['?e '?outstanding-balance '?status '?import-status2] - :in ['$ '?invoice-number '?vendor '?client] - :where '[[?e :invoice/invoice-number ?invoice-number] - [?e :invoice/vendor ?vendor] - [?e :invoice/client ?client] - [?e :invoice/outstanding-balance ?outstanding-balance] - [?e :invoice/status ?status] - [?e :invoice/import-status ?import-status] - [?import-status :db/ident ?import-status2]]} - :args [(d/db (d/connect uri)) invoice-number matching-vendor (:db/id matching-client)]})) - first)] + [existing-id existing-outstanding-balance existing-status import-status] (when (and matching-client matching-location) + (->> (d/query + (cond-> {:query {:find ['?e '?outstanding-balance '?status '?import-status2] + :in ['$ '?invoice-number '?vendor '?client] + :where '[[?e :invoice/invoice-number ?invoice-number] + [?e :invoice/vendor ?vendor] + [?e :invoice/client ?client] + [?e :invoice/outstanding-balance ?outstanding-balance] + [?e :invoice/status ?status] + [?e :invoice/import-status ?import-status] + [?import-status :db/ident ?import-status2]]} + :args [(d/db (d/connect uri)) invoice-number matching-vendor (:db/id matching-client)]})) + first))] - (if (= :import-status/imported import-status) + (cond + (not (and matching-location matching-client)) result + + (= :import-status/imported import-status) + result + + :else (conj result (remove-nils #:invoice {:invoice/client (:db/id matching-client) :invoice/vendor matching-vendor :invoice/invoice-number invoice-number From 84e202bab9ecd62fe60aee01c5381a2903a3ad41 Mon Sep 17 00:00:00 2001 From: Bryce Covert Date: Tue, 22 Oct 2019 06:16:43 -0700 Subject: [PATCH 2/6] fix. --- src/clj/auto_ap/routes/invoices.clj | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/clj/auto_ap/routes/invoices.clj b/src/clj/auto_ap/routes/invoices.clj index da54221d..6607cd40 100644 --- a/src/clj/auto_ap/routes/invoices.clj +++ b/src/clj/auto_ap/routes/invoices.clj @@ -7,6 +7,7 @@ [auto-ap.datomic :refer [remove-nils uri]] [datomic.api :as d] [auto-ap.parse :as parse] + [auto-ap.parse.util :as parse-u] [auto-ap.graphql.utils :refer [assert-admin]] [auto-ap.routes.utils :refer [wrap-secure]] [clj-time.coerce :refer [to-date]] @@ -87,7 +88,7 @@ (defn parse-date [{:keys [raw-date]}] (try - (parse/parse-value :clj-time "MM/dd/yyyy" raw-date) + (parse-u/parse-value :clj-time "MM/dd/yyyy" raw-date) (catch Exception e (throw (Exception. (str "Could not parse date from '" raw-date "'") e))))) From aa90c1b4c1b61ca63ff638793359ff579f2a1cc9 Mon Sep 17 00:00:00 2001 From: Bryce Covert Date: Thu, 24 Oct 2019 20:16:28 -0700 Subject: [PATCH 3/6] south bay produce. --- src/clj/auto_ap/parse.clj | 48 +++++++++++++++++++---------- src/clj/auto_ap/parse/csv.clj | 2 +- src/clj/auto_ap/parse/templates.clj | 11 ++++++- src/clj/auto_ap/routes/invoices.clj | 4 +-- 4 files changed, 44 insertions(+), 21 deletions(-) diff --git a/src/clj/auto_ap/parse.clj b/src/clj/auto_ap/parse.clj index 8f3ce7cd..fb8ff478 100644 --- a/src/clj/auto_ap/parse.clj +++ b/src/clj/auto_ap/parse.clj @@ -17,23 +17,30 @@ (defn template-applies? [text {:keys [keywords]}] (every? #(re-find % text) keywords)) -(defn extract-template [text template] - (if (:multi template) - (mapcat - #(extract-template % (dissoc template :multi)) - (str/split text (:multi template))) +(defn extract-template + ([text template] + (if (:multi template) + (mapcat + #(extract-template % text (dissoc template :multi)) + (str/split text (:multi template))) - (when template - [(->> template - :extract - (reduce-kv - (fn [result k v] - (let [value (some-> (first (map second (re-seq v text))) - str/trim ) - [value-parser parser-params] (-> template :parser k)] - (assoc result k (u/parse-value value-parser parser-params value)))) - {:vendor-code (:vendor template) - :text text}))]))) + (extract-template text text template))) + ([text full-text template] + (when (and template + (or (not (:multi-match? template)) + (re-find (:multi-match? template) text ))) + [(->> template + :extract + (reduce-kv + (fn [result k v] + (let [value (some-> (or (first (map second (re-seq v text))) + (first (map second (re-seq v full-text)))) + str/trim ) + [value-parser parser-params] (-> template :parser k)] + (assoc result k (u/parse-value value-parser parser-params value)))) + {:vendor-code (:vendor template) + :text text + :full-text full-text}))]))) (defn parse [text] (reset! last-text text) @@ -96,7 +103,7 @@ ffirst)] (or fuzzy-match client-word-match))) -(defn best-location-match [client text] +(defn best-location-match [client text full-text] (or (->> client :client/location-matches (mapcat (fn [{:keys [:location-match/location :location-match/matches]}] @@ -104,5 +111,12 @@ (filter (fn [[location match]] (re-find (re-pattern (str "(?i)" match)) text)) ) first first) + (->> client + :client/location-matches + (mapcat (fn [{:keys [:location-match/location :location-match/matches]}] + (map (fn [match] [location match]) matches))) + (filter (fn [[location match]] (re-find (re-pattern (str "(?i)" match)) full-text)) ) + first + first) (:client/default-location client) (first (:client/locations client)))) diff --git a/src/clj/auto_ap/parse/csv.clj b/src/clj/auto_ap/parse/csv.clj index bfbf39c1..f70b7727 100644 --- a/src/clj/auto_ap/parse/csv.clj +++ b/src/clj/auto_ap/parse/csv.clj @@ -26,7 +26,7 @@ (map (fn [[_ po-number despatch-number invoice-number invoice-date customer value :as row]] {:vendor-code "Mama Lu's Foods" :customer-identifier customer - :invoice-number (str invoice-number " " po-number) + :invoice-number (str po-number "-" invoice-number ) :date (u/parse-value :clj-time "MM/dd/yy HH:ss" invoice-date) :total value :text (str/join " " row)}))) diff --git a/src/clj/auto_ap/parse/templates.clj b/src/clj/auto_ap/parse/templates.clj index 1350a0fa..9c0c966b 100644 --- a/src/clj/auto_ap/parse/templates.clj +++ b/src/clj/auto_ap/parse/templates.clj @@ -48,7 +48,16 @@ :customer-identifier #"Bill To:[^\n]+\n\s*([\w ]+)" :invoice-number #"Invoice\s([\w\./]+)*" :total #"Total Invoice\s+([0-9.]+)"} - :parser {:date [:clj-time "MM/dd/yy"]}}]) + :parser {:date [:clj-time "MM/dd/yy"]}} + {:vendor "Southbay Fresh Produce" + :keywords [#"SOUTH BAY FRESH PRODUCE"] + :extract {:date #"^([0-9]+/[0-9]+/[0-9]+)" + :customer-identifier #"FAX:[^\n]+\n\s+([A-Za-z ]+)\s{2}" + :invoice-number #"^[0-9]+/[0-9]+/[0-9]+\s+(\d+)" + :total #"\$([0-9.]+)"} + :parser {:date [:clj-time "MM/dd/yyyy"]} + :multi #"\n" + :multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+(\d+)"}]) (def excel-templates [{:vendor "Isp Productions" diff --git a/src/clj/auto_ap/routes/invoices.clj b/src/clj/auto_ap/routes/invoices.clj index 6607cd40..e37b3be4 100644 --- a/src/clj/auto_ap/routes/invoices.clj +++ b/src/clj/auto_ap/routes/invoices.clj @@ -163,7 +163,7 @@ (let [clients (d-clients/get-all) _ (println imports) - transactions (reduce (fn [result {:keys [invoice-number customer-identifier total date vendor-code text] :as info}] + transactions (reduce (fn [result {:keys [invoice-number customer-identifier total date vendor-code text full-text] :as info}] (println "searching for" vendor-code) (let [[matching-vendor default-expense-account] (->> (d/query {:query {:find ['?vendor '?default-expense-account] @@ -175,7 +175,7 @@ _ (println matching-vendor) matching-client (parse/best-match clients customer-identifier) _ (println "New invoice matches client" matching-client) - matching-location (parse/best-location-match matching-client text ) + matching-location (parse/best-location-match matching-client text full-text) [existing-id existing-outstanding-balance existing-status import-status] (when (and matching-client matching-location) (->> (d/query (cond-> {:query {:find ['?e '?outstanding-balance '?status '?import-status2] From 10ee974eaf5e21bca02a852cf24c3d2655505678 Mon Sep 17 00:00:00 2001 From: Bryce Covert Date: Thu, 24 Oct 2019 21:05:01 -0700 Subject: [PATCH 4/6] more invoices. --- src/clj/auto_ap/parse.clj | 1 + src/clj/auto_ap/parse/templates.clj | 16 +++++++++++++++- src/clj/auto_ap/routes/invoices.clj | 3 +-- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/clj/auto_ap/parse.clj b/src/clj/auto_ap/parse.clj index fb8ff478..28743d95 100644 --- a/src/clj/auto_ap/parse.clj +++ b/src/clj/auto_ap/parse.clj @@ -79,6 +79,7 @@ (let [fuzzy-match (->> clients (mapcat (fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}] (map (fn [m] + (println m invoice-client-name) [client (m/jaccard (.toLowerCase invoice-client-name) (.toLowerCase m))]) (conj matches name)))) (filter #(< (second %) 0.25)) diff --git a/src/clj/auto_ap/parse/templates.clj b/src/clj/auto_ap/parse/templates.clj index 9c0c966b..5ce3fc1e 100644 --- a/src/clj/auto_ap/parse/templates.clj +++ b/src/clj/auto_ap/parse/templates.clj @@ -57,7 +57,21 @@ :total #"\$([0-9.]+)"} :parser {:date [:clj-time "MM/dd/yyyy"]} :multi #"\n" - :multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+(\d+)"}]) + :multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+(\d+)"} + {:vendor "Performance Food Group" + :keywords [#"performancefoodservice"] + :extract {:date #"DELIVER TO[^\n]+\n.+?(?=[0-9]+/[0-9]+/[0-9]+)([0-9]+/[0-9]+/[0-9]+)" + :customer-identifier #"DELIVER TO[^\n]+\n\s*[\S ]+?(?=\s{2,}([\S ]+?)\s{2,})" ;; ([\S ]+)\s{2,} + :invoice-number #"DELIVER TO[^\n]+\n.+?(?=\d+)(\d+)\s*\n" + :total #"([0-9.]+)\s+Status Code"} + :parser {#_#_:date [:clj-time "MM/dd/yy"]}} + {:vendor "US Foods" + :keywords [#"US Foods"] + :extract {:date #"INVOICE NUMBER[^\n]+\n\n\d+\s+\d+\s+([0-9]+/[0-9]+/[0-9]+)" + :customer-identifier #"BILL TO[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,} + :invoice-number #"INVOICE NUMBER[^\n]+\n\n\d+\s+(\d+)" + :total #"DELIVERED AMOUNT\s+\$([0-9.]+)"} + :parser {:date [:clj-time "MM/dd/yyyy"]}}]) (def excel-templates [{:vendor "Isp Productions" diff --git a/src/clj/auto_ap/routes/invoices.clj b/src/clj/auto_ap/routes/invoices.clj index e37b3be4..aea3a9ab 100644 --- a/src/clj/auto_ap/routes/invoices.clj +++ b/src/clj/auto_ap/routes/invoices.clj @@ -161,7 +161,6 @@ (defn import-uploaded-invoice [imports] (let [clients (d-clients/get-all) - _ (println imports) transactions (reduce (fn [result {:keys [invoice-number customer-identifier total date vendor-code text full-text] :as info}] (println "searching for" vendor-code) @@ -172,7 +171,7 @@ '[?vendor :vendor/default-expense-account ?default-expense-account]]} :args [(d/db (d/connect uri)) vendor-code]}) first) - _ (println matching-vendor) + _ (println "matching" customer-identifier "-" matching-vendor) matching-client (parse/best-match clients customer-identifier) _ (println "New invoice matches client" matching-client) matching-location (parse/best-location-match matching-client text full-text) From f0e93ec3cf8e891c21cd4ff21d95916e63d4107a Mon Sep 17 00:00:00 2001 From: Bryce Covert Date: Thu, 24 Oct 2019 21:37:32 -0700 Subject: [PATCH 5/6] sysco. --- src/clj/auto_ap/parse/templates.clj | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/clj/auto_ap/parse/templates.clj b/src/clj/auto_ap/parse/templates.clj index 5ce3fc1e..bf0d273a 100644 --- a/src/clj/auto_ap/parse/templates.clj +++ b/src/clj/auto_ap/parse/templates.clj @@ -64,13 +64,20 @@ :customer-identifier #"DELIVER TO[^\n]+\n\s*[\S ]+?(?=\s{2,}([\S ]+?)\s{2,})" ;; ([\S ]+)\s{2,} :invoice-number #"DELIVER TO[^\n]+\n.+?(?=\d+)(\d+)\s*\n" :total #"([0-9.]+)\s+Status Code"} - :parser {#_#_:date [:clj-time "MM/dd/yy"]}} + :parser {:date [:clj-time "MM/dd/yy"]}} {:vendor "US Foods" :keywords [#"US Foods"] :extract {:date #"INVOICE NUMBER[^\n]+\n\n\d+\s+\d+\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,} :invoice-number #"INVOICE NUMBER[^\n]+\n\n\d+\s+(\d+)" :total #"DELIVERED AMOUNT\s+\$([0-9.]+)"} + :parser {:date [:clj-time "MM/dd/yyyy"]}} + {:vendor "Sysco" + :keywords [#"SYSCO"] + :extract {:date #"INVOICE NUMBER[^\n]+\n([^\n]+)\n" + :customer-identifier #"INVOICE NUMBER[^\n]+\n[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,} + :invoice-number #"INVOICE NUMBER[^\n]+\n[^\n]+\n.*?(?=[\d]{9})(\d{9})" + :total #"\s{2,}INVOICE\s{2,}.*?(?=TOTAL)TOTAL\s+([0-9.]+)"} :parser {:date [:clj-time "MM/dd/yyyy"]}}]) (def excel-templates From 332eae2f58268a3bed40eaabae37e519dab13666 Mon Sep 17 00:00:00 2001 From: Bryce Covert Date: Sat, 26 Oct 2019 18:00:40 -0700 Subject: [PATCH 6/6] DVW statements --- src/clj/auto_ap/parse.clj | 5 +- src/clj/auto_ap/parse/excel.clj | 66 ++++++++++--------- src/clj/auto_ap/parse/templates.clj | 33 +++++++++- src/clj/auto_ap/routes/invoices.clj | 1 + .../auto_ap/views/pages/import_invoices.cljs | 3 + 5 files changed, 73 insertions(+), 35 deletions(-) diff --git a/src/clj/auto_ap/parse.clj b/src/clj/auto_ap/parse.clj index 28743d95..e666340d 100644 --- a/src/clj/auto_ap/parse.clj +++ b/src/clj/auto_ap/parse.clj @@ -108,8 +108,11 @@ (or (->> client :client/location-matches (mapcat (fn [{:keys [:location-match/location :location-match/matches]}] + (map (fn [match] [location match]) matches))) - (filter (fn [[location match]] (re-find (re-pattern (str "(?i)" match)) text)) ) + (filter (fn [[location match]] + (println "loc " location match text) + (re-find (re-pattern (str "(?i)" match)) text)) ) first first) (->> client diff --git a/src/clj/auto_ap/parse/excel.clj b/src/clj/auto_ap/parse/excel.clj index 5afcdd6b..6161e87d 100644 --- a/src/clj/auto_ap/parse/excel.clj +++ b/src/clj/auto_ap/parse/excel.clj @@ -10,38 +10,40 @@ (every? #(re-find % text) keywords)) (defn extract [wb {:keys [extract vendor]}] - (reduce-kv - (fn [invoice k [regex offset-row offset-column extract-regex]] - (assoc invoice k - (->> wb - (d/sheet-seq) - first - (d/cell-seq) - (filter (fn [cell] - (re-find regex (str (d/read-cell cell))))) - (map (fn [cell] - (let [address (.getAddress cell) - cell-value (str (d/read-cell (d/select-cell (.toString (CellAddress. (+ offset-row (.getRow address)) (+ offset-column (.getColumn address)) )) - (first (d/sheet-seq wb)))))] - (if extract-regex - (second (re-find extract-regex cell-value)) - - cell-value)))) - first))) - {:vendor-code vendor} - extract)) + (if (fn? extract) + (extract wb vendor) + [(reduce-kv + (fn [invoice k [regex offset-row offset-column extract-regex]] + (assoc invoice k + (->> wb + (d/sheet-seq) + first + (d/cell-seq) + (filter (fn [cell] + (re-find regex (str (d/read-cell cell))))) + (map (fn [cell] + (let [address (.getAddress cell) + cell-value (str (d/read-cell (d/select-cell (.toString (CellAddress. (+ offset-row (.getRow address)) (+ offset-column (.getColumn address)) )) + (first (d/sheet-seq wb)))))] + (if extract-regex + (second (re-find extract-regex cell-value)) + + cell-value)))) + first))) + {:vendor-code vendor} + extract)])) (defn parse-file [file filename] - [(let [wb (d/load-workbook file) - text (->> wb - (d/sheet-seq) - first - (d/cell-seq) - (map d/read-cell) - (str/join " "))] - (->> t/excel-templates - (filter (partial template-applies? text)) - first - (extract wb) - ))]) + (let [wb (d/load-workbook file) + text (->> wb + (d/sheet-seq) + first + (d/cell-seq) + (map d/read-cell) + (str/join " "))] + (->> t/excel-templates + (filter (partial template-applies? text)) + first + (extract wb) + ))) diff --git a/src/clj/auto_ap/parse/templates.clj b/src/clj/auto_ap/parse/templates.clj index bf0d273a..f9e8fd7c 100644 --- a/src/clj/auto_ap/parse/templates.clj +++ b/src/clj/auto_ap/parse/templates.clj @@ -1,4 +1,7 @@ -(ns auto-ap.parse.templates) +(ns auto-ap.parse.templates + (:require [dk.ative.docjure.spreadsheet :as d] + [clojure.string :as str]) + (:import (org.apache.poi.ss.util CellAddress))) (def pdf-templates @@ -80,6 +83,9 @@ :total #"\s{2,}INVOICE\s{2,}.*?(?=TOTAL)TOTAL\s+([0-9.]+)"} :parser {:date [:clj-time "MM/dd/yyyy"]}}]) +(defn offset [c x y] + (.toString (CellAddress. (+ y (.getRow (.getAddress c))) (+ x (.getColumn (.getAddress c))) ))) + (def excel-templates [{:vendor "Isp Productions" :keywords [#"ISP PRODUCTIONS"] @@ -92,4 +98,27 @@ :extract {:customer-identifier [#"Customer #" 1 0] :total [#"Total Invoice" 0 5] :date [#"Date" 0 0 #"Date: (.*)"] - :invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]}}]) + :invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]}} + {:vendor "DVW Commercial" + :keywords [#"Thank you!!!"] + :extract (fn [wb vendor] + (let [[sheet] (d/sheet-seq wb)] + (transduce (comp (filter (fn [c] + (re-find #"Invoice" (str (d/read-cell c))))) + (map (fn [c] + (let [customer-identifier (d/read-cell (->> (d/select-cell (offset c -3 0) sheet) + (iterate (fn [c] + (d/select-cell (offset c 0 -1) sheet))) + (filter (fn [c] + (not (str/blank? (d/read-cell c))))) + first))] + {:customer-identifier customer-identifier + :text customer-identifier + :full-text customer-identifier + :date (d/read-cell (d/select-cell (offset c 2 0) sheet)) + :invoice-number (d/read-cell (d/select-cell (offset c 4 0) sheet)) + :total (str (d/read-cell (d/select-cell (offset c 8 0) sheet))) + :vendor-code vendor})))) + conj + [] + (d/cell-seq sheet))))}]) diff --git a/src/clj/auto_ap/routes/invoices.clj b/src/clj/auto_ap/routes/invoices.clj index aea3a9ab..7ec2e141 100644 --- a/src/clj/auto_ap/routes/invoices.clj +++ b/src/clj/auto_ap/routes/invoices.clj @@ -161,6 +161,7 @@ (defn import-uploaded-invoice [imports] (let [clients (d-clients/get-all) + _ (clojure.pprint/pprint imports) transactions (reduce (fn [result {:keys [invoice-number customer-identifier total date vendor-code text full-text] :as info}] (println "searching for" vendor-code) diff --git a/src/cljs/auto_ap/views/pages/import_invoices.cljs b/src/cljs/auto_ap/views/pages/import_invoices.cljs index 662bcb12..0c889f3c 100644 --- a/src/cljs/auto_ap/views/pages/import_invoices.cljs +++ b/src/cljs/auto_ap/views/pages/import_invoices.cljs @@ -7,6 +7,7 @@ [auto-ap.views.components.layouts :refer [side-bar-layout]] [auto-ap.views.components.invoices.side-bar :refer [invoices-side-bar]] [auto-ap.views.utils :refer [dispatch-event]] + [auto-ap.utils :refer [by]] [auto-ap.entities.vendors :as vendor] [auto-ap.views.components.invoice-table :refer [invoice-table] :as invoice-table] [cljsjs.dropzone :as dropzone] @@ -67,6 +68,8 @@ (fn [db [_ data]] (-> db (assoc ::invoice-page (first (:invoice-page data))) + (update-in [::invoice-page] (fn [ip] + (assoc ip :checked (by :id (:invoices ip))))) (assoc-in [:status :loading] false)))) (re-frame/reg-event-fx