From 823d4998a7230adea9f0237d0e83a2ac3f1a2009 Mon Sep 17 00:00:00 2001 From: Bryce Covert Date: Mon, 21 Oct 2019 23:25:36 -0700 Subject: [PATCH] Supports importing mama lu --- src/clj/auto_ap/parse.clj | 65 ++++++++++++++++------------- src/clj/auto_ap/parse/csv.clj | 46 ++++++++++++++++++++ src/clj/auto_ap/parse/util.clj | 25 +++++++++++ src/clj/auto_ap/routes/invoices.clj | 33 +++++++++------ 4 files changed, 127 insertions(+), 42 deletions(-) create mode 100644 src/clj/auto_ap/parse/csv.clj create mode 100644 src/clj/auto_ap/parse/util.clj diff --git a/src/clj/auto_ap/parse.clj b/src/clj/auto_ap/parse.clj index 30cf3cf6..8f3ce7cd 100644 --- a/src/clj/auto_ap/parse.clj +++ b/src/clj/auto_ap/parse.clj @@ -1,31 +1,16 @@ (ns auto-ap.parse (:require [auto-ap.parse.excel :as excel] [auto-ap.parse.templates :as t] + [auto-ap.parse.util :as u] + [auto-ap.parse.csv :as csv] [clj-fuzzy.metrics :as m] [clojure.java.shell :as sh] [clojure.string :as str] [clj-time.format :as f] - [clj-time.core :as time])) + [clj-time.core :as time] + [clojure.set :as set])) -(defmulti parse-value (fn [method _ _] - method)) - - -(defmethod parse-value :trim-commas - [_ _ value] - (str/replace value #"," "") - ) - -(defmethod parse-value :clj-time - [_ format value] - (time/from-time-zone (f/parse (f/formatter format) value) - (time/time-zone-for-id "America/Los_Angeles"))) - -(defmethod parse-value nil - [_ _ value] - value) - (def last-text (atom nil)) @@ -46,7 +31,7 @@ (let [value (some-> (first (map second (re-seq v text))) str/trim ) [value-parser parser-params] (-> template :parser k)] - (assoc result k (parse-value value-parser parser-params value)))) + (assoc result k (u/parse-value value-parser parser-params value)))) {:vendor-code (:vendor template) :text text}))]))) @@ -67,6 +52,11 @@ :out parse)) +(defmethod parse-file + "csv" + [file filename] + (csv/parse-file file filename)) + (defmethod parse-file "xls" [file filename] @@ -79,15 +69,32 @@ (excel/parse-file file filename)) (defn best-match [clients invoice-client-name] - (->> clients - - (mapcat (fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}] - (map (fn [m] - [client (m/jaccard (.toLowerCase invoice-client-name) (.toLowerCase m))]) - (conj matches name)))) - (filter #(< (second %) 0.25)) - (sort-by second) - ffirst)) + (let [fuzzy-match (->> clients + (mapcat (fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}] + (map (fn [m] + [client (m/jaccard (.toLowerCase invoice-client-name) (.toLowerCase m))]) + (conj matches name)))) + (filter #(< (second %) 0.25)) + (sort-by second) + ffirst) + + word-set (set (str/split (.toLowerCase invoice-client-name) #"\s" )) + client-word-match (->> clients + (map + (fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}] + (let [client-words (-> #{} + (into + (mapcat + (fn [match] (str/split (.toLowerCase match) #"\s" )) + matches)) + (into + (str/split (.toLowerCase name) #"\s" )))] + [client (count (set/intersection client-words word-set))]))) + (filter (fn [[_ c]] (> c 0))) + (sort-by (fn [[_ c]] c)) + reverse + ffirst)] + (or fuzzy-match client-word-match))) (defn best-location-match [client text] (or (->> client diff --git a/src/clj/auto_ap/parse/csv.clj b/src/clj/auto_ap/parse/csv.clj new file mode 100644 index 00000000..bfbf39c1 --- /dev/null +++ b/src/clj/auto_ap/parse/csv.clj @@ -0,0 +1,46 @@ +(ns auto-ap.parse.csv + (:require [auto-ap.parse.util :as u] + [clojure.data.csv :as csv] + [clojure.java.io :as io] + [clojure.string :as str])) + +(defn determine + [[header :as z]] + (prn header) + (cond (str/includes? (second header) "Customer's PO No.") + :mama-lus + + :else + nil)) + +(defmulti parse-csv + determine + :default (fn default [rows] + nil)) + +(defmethod parse-csv :mama-lus + [rows] + (println "MAMA LU") + (transduce + (comp (drop 1) + (map (fn [[_ po-number despatch-number invoice-number invoice-date customer value :as row]] + {:vendor-code "Mama Lu's Foods" + :customer-identifier customer + :invoice-number (str invoice-number " " po-number) + :date (u/parse-value :clj-time "MM/dd/yy HH:ss" invoice-date) + :total value + :text (str/join " " row)}))) + conj + [] + rows)) + +(defmethod parse-csv nil + [rows] + nil) + +(defn parse-file [file filename] + (println "HEREERE") + (with-open [reader (io/reader file)] + (let [rows (csv/read-csv reader :separator \,)] + (parse-csv rows)))) + diff --git a/src/clj/auto_ap/parse/util.clj b/src/clj/auto_ap/parse/util.clj new file mode 100644 index 00000000..380d9b7c --- /dev/null +++ b/src/clj/auto_ap/parse/util.clj @@ -0,0 +1,25 @@ +(ns auto-ap.parse.util + (:require [clj-fuzzy.metrics :as m] + [clojure.java.shell :as sh] + [clojure.string :as str] + [clj-time.format :as f] + [clj-time.core :as time])) + +(defmulti parse-value (fn [method _ _] + method)) + + +(defmethod parse-value :trim-commas + [_ _ value] + (str/replace value #"," "") + ) + +(defmethod parse-value :clj-time + [_ format value] + (time/from-time-zone (f/parse (f/formatter format) value) + (time/time-zone-for-id "America/Los_Angeles"))) + +(defmethod parse-value nil + [_ _ value] + value) + diff --git a/src/clj/auto_ap/routes/invoices.clj b/src/clj/auto_ap/routes/invoices.clj index d02e929c..da54221d 100644 --- a/src/clj/auto_ap/routes/invoices.clj +++ b/src/clj/auto_ap/routes/invoices.clj @@ -175,21 +175,28 @@ matching-client (parse/best-match clients customer-identifier) _ (println "New invoice matches client" matching-client) matching-location (parse/best-location-match matching-client text ) - [existing-id existing-outstanding-balance existing-status import-status] (->> (d/query - (cond-> {:query {:find ['?e '?outstanding-balance '?status '?import-status2] - :in ['$ '?invoice-number '?vendor '?client] - :where '[[?e :invoice/invoice-number ?invoice-number] - [?e :invoice/vendor ?vendor] - [?e :invoice/client ?client] - [?e :invoice/outstanding-balance ?outstanding-balance] - [?e :invoice/status ?status] - [?e :invoice/import-status ?import-status] - [?import-status :db/ident ?import-status2]]} - :args [(d/db (d/connect uri)) invoice-number matching-vendor (:db/id matching-client)]})) - first)] + [existing-id existing-outstanding-balance existing-status import-status] (when (and matching-client matching-location) + (->> (d/query + (cond-> {:query {:find ['?e '?outstanding-balance '?status '?import-status2] + :in ['$ '?invoice-number '?vendor '?client] + :where '[[?e :invoice/invoice-number ?invoice-number] + [?e :invoice/vendor ?vendor] + [?e :invoice/client ?client] + [?e :invoice/outstanding-balance ?outstanding-balance] + [?e :invoice/status ?status] + [?e :invoice/import-status ?import-status] + [?import-status :db/ident ?import-status2]]} + :args [(d/db (d/connect uri)) invoice-number matching-vendor (:db/id matching-client)]})) + first))] - (if (= :import-status/imported import-status) + (cond + (not (and matching-location matching-client)) result + + (= :import-status/imported import-status) + result + + :else (conj result (remove-nils #:invoice {:invoice/client (:db/id matching-client) :invoice/vendor matching-vendor :invoice/invoice-number invoice-number