diff --git a/project.clj b/project.clj index 56586ef9..2e9125f6 100644 --- a/project.clj +++ b/project.clj @@ -16,6 +16,7 @@ [dk.ative/docjure "1.12.0"] [org.clojure/java.jdbc "0.7.3"] [cljsjs/dropzone "4.3.0-0"] + [clj-fuzzy "0.4.1"] ;; https://mvnrepository.com/artifact/postgresql/postgresql [postgresql/postgresql "9.3-1102.jdbc41"] [cljs-http "0.1.44"] diff --git a/src/clj/auto_ap/db/companies.clj b/src/clj/auto_ap/db/companies.clj new file mode 100644 index 00000000..faf83a1f --- /dev/null +++ b/src/clj/auto_ap/db/companies.clj @@ -0,0 +1,9 @@ +(ns auto-ap.db.companies) + +(defn get-all [] + [{:name "Campbell Brewing Company" + :matches ["campbell brewing company" "campbell brewery company" "campbell brewing"]} + {:name "Brown Chicken Brown Cow" + :matches ["brown chicken brown cow"]} + {:name "Naschmarkt Restaurant" + :matches ["naschmarkt" "naschmarkt restaurant"]}]) diff --git a/src/clj/auto_ap/handler.clj b/src/clj/auto_ap/handler.clj index 43e7d9c8..c2bc909a 100644 --- a/src/clj/auto_ap/handler.clj +++ b/src/clj/auto_ap/handler.clj @@ -11,7 +11,17 @@ [ring.middleware.reload :refer [wrap-reload]] [ring.middleware.json :refer [wrap-json-response]] [ring.middleware.edn :refer [wrap-edn-params]] - [clojure.java.jdbc :as j])) + [clojure.java.jdbc :as j] + [clj-fuzzy.metrics :as m] + [auto-ap.db.companies :as companies])) +(defn best-match [companies company-identifier] + (->> companies + (map (fn [company] + [company (apply min (map #(m/jaccard (.toLowerCase company-identifier) %) (:matches company)))])) + (filter #(< (second %) 0.25)) + (sort-by second) + + ffirst)) (defroutes app-routes (GET "/" [] (response/resource-response "index.html" {:root "public"})) @@ -37,25 +47,25 @@ :headers {"Content-Type" "application/edn"}}) (POST "/api/invoices/approve" [] (invoices/approve) - (println (invoices/get-pending)) {:status 200 :body (pr-str (invoices/get-pending)) :headers {"Content-Type" "application/edn"}}) (POST "/api/invoices/reject" [] (invoices/reject) - (println (invoices/get-pending)) {:status 200 :body (pr-str (invoices/get-pending)) :headers {"Content-Type" "application/edn"}}) (POST "/pdf-upload" {{ files "file"} :params :as params} (let [{:keys [filename tempfile]} files - existing-invoices (invoices/get-all)] - (println existing-invoices) + existing-invoices (invoices/get-all) + companies (companies/get-all)] (invoices/insert-multi! (for [{:keys [total date invoice-number customer-identifier vendor] :as row} (parse/parse-file (.getPath tempfile) filename)] (assoc row + :company (:name (best-match companies customer-identifier)) + :imported false :potential-duplicate (boolean (seq (filter #(and (= vendor (:vendor %)) (= invoice-number (:invoice-number %))) diff --git a/src/clj/auto_ap/parse.clj b/src/clj/auto_ap/parse.clj index 6fe8655f..bd6310c2 100644 --- a/src/clj/auto_ap/parse.clj +++ b/src/clj/auto_ap/parse.clj @@ -2,30 +2,10 @@ (:require [clojure.java.io :as io] [clojure.string :as str] [clojure.java.shell :as sh] - [auto-ap.parse.excel :as excel])) + [auto-ap.parse.excel :as excel] + [auto-ap.parse.templates :as t])) -(def templates - [{:vendor "CHFW" - :keywords [#"CHEF'S WAREHOUSE"] - :extract {:total #"2 WKS C\.C\.\s+([\d.,]+)" - :customer-identifier #"\n([A-Z][A-Z ]+)\s{2,}" - :date #"\s+([0-9]+/[0-9]+/[0-9]+)" - :invoice-number #"\s+[0-9]+/[0-9]+/[0-9]+\s+([0-9]+)"}} - {:vendor "GGM" - :keywords [#"Golden Gate Meat"] - :extract {:total #"Invoice Total\:\s+\$([\d.,]+)" - :customer-identifier #"Bill To\s*:\s*([\w ]+)\s{2,}" - :date #"Printed:\s+([0-9]+/[0-9]+/[0-9]+)" - :invoice-number #"Invoice\s+[^\n]+\n[^\n]+\n\s+([0-9]+)"}} - - {:vendor "CINTAS" - :keywords [#"CINTAS CORPORATION"] - :extract {:invoice-number #"INVOICE\s#\s+([\d.,]+)" - :customer-identifier #"BILL TO\s*:\s{2,}([\w ]+)\s{2,}" - :date #"INVOICE DATE\s*\n.*\s+([0-9]+/[0-9]+/[0-9]+)" - :total #"INVOICE TOTAL\s+([0-9.]+)"} - :multi #"\f\f"}]) (defn template-applies? [text {:keys [keywords]}] (every? #(re-find % text) keywords)) @@ -45,13 +25,13 @@ {:vendor (:vendor template)}))])) (defn parse [text] - (->> templates + (->> t/pdf-templates (filter (partial template-applies? text)) first (extract-template text))) -(defmulti parse-file (fn [file filename] (last (str/split filename #"\." )))) +(defmulti parse-file (fn [file filename] (.toLowerCase (last (str/split filename #"\." ))))) (defmethod parse-file "pdf" diff --git a/src/clj/auto_ap/parse/excel.clj b/src/clj/auto_ap/parse/excel.clj index 064a380a..3440d566 100644 --- a/src/clj/auto_ap/parse/excel.clj +++ b/src/clj/auto_ap/parse/excel.clj @@ -1,29 +1,15 @@ (ns auto-ap.parse.excel (:import [org.apache.poi.ss.util CellAddress]) (:require [dk.ative.docjure.spreadsheet :as d] - - [clojure.string :as str])) + [clojure.string :as str] + [auto-ap.parse.templates :as t])) + -(def templates - [{:vendor "Isp Productions" - :keywords [#"ISP PRODUCTIONS"] - :extract {:customer-identifier [#"SERVICES PROVIDED TO" 1 0] - :total [#"PAY THIS" -1 0] - :date [#"INVOICE DATE" 0 1] - :invoice-number [#"INVOICE NUMBER" 0 1]}} - {:vendor "Southern Wine Online" - :keywords [#"Please note that the total invoice amount may"] - :extract {:customer-identifier [#"Customer #" 1 0] - :total [#"Total Invoice" 0 5] - :date [#"Date" 0 0 #"Date: (.*)"] - :invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]}}]) (defn template-applies? [text {:keys [keywords]}] (every? #(re-find % text) keywords)) (defn extract [wb {:keys [extract vendor]}] - (println extract) - (reduce-kv (fn [invoice k [regex offset-row offset-column extract-regex]] (assoc invoice k @@ -54,7 +40,7 @@ (d/cell-seq) (map d/read-cell) (str/join " "))] - (->> templates + (->> t/excel-templates (filter (partial template-applies? text)) first (extract wb) diff --git a/src/clj/auto_ap/parse/templates.clj b/src/clj/auto_ap/parse/templates.clj new file mode 100644 index 00000000..7e1aff6e --- /dev/null +++ b/src/clj/auto_ap/parse/templates.clj @@ -0,0 +1,38 @@ +(ns auto-ap.parse.templates) + +(def pdf-templates + [{:vendor "CHFW" + :keywords [#"CHEF'S WAREHOUSE"] + :extract {:total #"2 WKS C\.C\.\s+([\d.,]+)" + :customer-identifier #"\n([A-Z][A-Z ]+)\s{2,}" + :date #"\s+([0-9]+/[0-9]+/[0-9]+)" + :invoice-number #"\s+[0-9]+/[0-9]+/[0-9]+\s+([0-9]+)"}} + + {:vendor "GGM" + :keywords [#"Golden Gate Meat"] + :extract {:total #"Invoice Total\:\s+\$([\d.,]+)" + :customer-identifier #"Bill To\s*:\s*([\w ]+)\s{2,}" + :date #"Printed:\s+([0-9]+/[0-9]+/[0-9]+)" + :invoice-number #"Invoice\s+[^\n]+\n[^\n]+\n\s+([0-9]+)"}} + + {:vendor "CINTAS" + :keywords [#"CINTAS CORPORATION"] + :extract {:invoice-number #"INVOICE\s#\s+([\d.,]+)" + :customer-identifier #"BILL TO\s*:\s{2,}([\w ]+)\s{2,}" + :date #"INVOICE DATE\s*\n.*\s+([0-9]+/[0-9]+/[0-9]+)" + :total #"INVOICE TOTAL\s+([0-9.]+)"} + :multi #"\f\f"}]) + +(def excel-templates + [{:vendor "Isp Productions" + :keywords [#"ISP PRODUCTIONS"] + :extract {:customer-identifier [#"SERVICES PROVIDED TO" 1 0] + :total [#"PAY THIS" -1 0] + :date [#"INVOICE DATE" 0 1] + :invoice-number [#"INVOICE NUMBER" 0 1]}} + {:vendor "Southern Wine Online" + :keywords [#"Please note that the total invoice amount may"] + :extract {:customer-identifier [#"Customer #" 1 0] + :total [#"Total Invoice" 0 5] + :date [#"Date" 0 0 #"Date: (.*)"] + :invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]}}]) diff --git a/src/cljs/auto_ap/views.cljs b/src/cljs/auto_ap/views.cljs index 867e5ef8..0f2550ff 100644 --- a/src/cljs/auto_ap/views.cljs +++ b/src/cljs/auto_ap/views.cljs @@ -64,11 +64,11 @@ [:th "Invoice #"] [:th "Date"] [:th "Amount"]]] - [:tbody (for [{:keys [customer-identifier invoice-number date total id vendor] :as i} @invoices] - ^{:key (str customer-identifier "-" invoice-number "-" date "-" total "-" id)} + [:tbody (for [{:keys [company invoice-number date total id vendor] :as i} @invoices] + ^{:key (str company "-" invoice-number "-" date "-" total "-" id)} [:tr [:td vendor] - [:td customer-identifier] + [:td company] [:td invoice-number] [:td date] [:td total]])]])])) @@ -114,11 +114,14 @@ [:th "Date"] [:th "Amount"] [:th]]] - [:tbody (for [{:keys [vendor potential-duplicate customer-identifier invoice-number date total id] :as i} @invoices] - ^{:key (str customer-identifier "-" invoice-number "-" date "-" total "-" id)} + [:tbody (for [{:keys [vendor potential-duplicate company customer-identifier invoice-number date total id] :as i} @invoices] + ^{:key (str company "-" invoice-number "-" date "-" total "-" id)} [:tr [:td vendor] - [:td customer-identifier] + (if company + [:td company] + [:td [:i.icon.fa.fa-warning {:title "potential duplicate"}] + (str "'" customer-identifier "' doesn't match any known company")]) [:td invoice-number] [:td date] [:td total]