company identification.
This commit is contained in:
@@ -16,6 +16,7 @@
|
|||||||
[dk.ative/docjure "1.12.0"]
|
[dk.ative/docjure "1.12.0"]
|
||||||
[org.clojure/java.jdbc "0.7.3"]
|
[org.clojure/java.jdbc "0.7.3"]
|
||||||
[cljsjs/dropzone "4.3.0-0"]
|
[cljsjs/dropzone "4.3.0-0"]
|
||||||
|
[clj-fuzzy "0.4.1"]
|
||||||
;; https://mvnrepository.com/artifact/postgresql/postgresql
|
;; https://mvnrepository.com/artifact/postgresql/postgresql
|
||||||
[postgresql/postgresql "9.3-1102.jdbc41"]
|
[postgresql/postgresql "9.3-1102.jdbc41"]
|
||||||
[cljs-http "0.1.44"]
|
[cljs-http "0.1.44"]
|
||||||
|
|||||||
9
src/clj/auto_ap/db/companies.clj
Normal file
9
src/clj/auto_ap/db/companies.clj
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
(ns auto-ap.db.companies)
|
||||||
|
|
||||||
|
(defn get-all []
|
||||||
|
[{:name "Campbell Brewing Company"
|
||||||
|
:matches ["campbell brewing company" "campbell brewery company" "campbell brewing"]}
|
||||||
|
{:name "Brown Chicken Brown Cow"
|
||||||
|
:matches ["brown chicken brown cow"]}
|
||||||
|
{:name "Naschmarkt Restaurant"
|
||||||
|
:matches ["naschmarkt" "naschmarkt restaurant"]}])
|
||||||
@@ -11,7 +11,17 @@
|
|||||||
[ring.middleware.reload :refer [wrap-reload]]
|
[ring.middleware.reload :refer [wrap-reload]]
|
||||||
[ring.middleware.json :refer [wrap-json-response]]
|
[ring.middleware.json :refer [wrap-json-response]]
|
||||||
[ring.middleware.edn :refer [wrap-edn-params]]
|
[ring.middleware.edn :refer [wrap-edn-params]]
|
||||||
[clojure.java.jdbc :as j]))
|
[clojure.java.jdbc :as j]
|
||||||
|
[clj-fuzzy.metrics :as m]
|
||||||
|
[auto-ap.db.companies :as companies]))
|
||||||
|
(defn best-match [companies company-identifier]
|
||||||
|
(->> companies
|
||||||
|
(map (fn [company]
|
||||||
|
[company (apply min (map #(m/jaccard (.toLowerCase company-identifier) %) (:matches company)))]))
|
||||||
|
(filter #(< (second %) 0.25))
|
||||||
|
(sort-by second)
|
||||||
|
|
||||||
|
ffirst))
|
||||||
|
|
||||||
(defroutes app-routes
|
(defroutes app-routes
|
||||||
(GET "/" [] (response/resource-response "index.html" {:root "public"}))
|
(GET "/" [] (response/resource-response "index.html" {:root "public"}))
|
||||||
@@ -37,25 +47,25 @@
|
|||||||
:headers {"Content-Type" "application/edn"}})
|
:headers {"Content-Type" "application/edn"}})
|
||||||
(POST "/api/invoices/approve" []
|
(POST "/api/invoices/approve" []
|
||||||
(invoices/approve)
|
(invoices/approve)
|
||||||
(println (invoices/get-pending))
|
|
||||||
{:status 200
|
{:status 200
|
||||||
:body (pr-str (invoices/get-pending))
|
:body (pr-str (invoices/get-pending))
|
||||||
:headers {"Content-Type" "application/edn"}})
|
:headers {"Content-Type" "application/edn"}})
|
||||||
(POST "/api/invoices/reject" []
|
(POST "/api/invoices/reject" []
|
||||||
(invoices/reject)
|
(invoices/reject)
|
||||||
(println (invoices/get-pending))
|
|
||||||
{:status 200
|
{:status 200
|
||||||
:body (pr-str (invoices/get-pending))
|
:body (pr-str (invoices/get-pending))
|
||||||
:headers {"Content-Type" "application/edn"}})
|
:headers {"Content-Type" "application/edn"}})
|
||||||
(POST "/pdf-upload"
|
(POST "/pdf-upload"
|
||||||
{{ files "file"} :params :as params}
|
{{ files "file"} :params :as params}
|
||||||
(let [{:keys [filename tempfile]} files
|
(let [{:keys [filename tempfile]} files
|
||||||
existing-invoices (invoices/get-all)]
|
existing-invoices (invoices/get-all)
|
||||||
(println existing-invoices)
|
companies (companies/get-all)]
|
||||||
(invoices/insert-multi!
|
(invoices/insert-multi!
|
||||||
(for [{:keys [total date invoice-number customer-identifier vendor] :as row}
|
(for [{:keys [total date invoice-number customer-identifier vendor] :as row}
|
||||||
(parse/parse-file (.getPath tempfile) filename)]
|
(parse/parse-file (.getPath tempfile) filename)]
|
||||||
(assoc row
|
(assoc row
|
||||||
|
:company (:name (best-match companies customer-identifier))
|
||||||
|
|
||||||
:imported false
|
:imported false
|
||||||
:potential-duplicate (boolean (seq (filter #(and (= vendor (:vendor %))
|
:potential-duplicate (boolean (seq (filter #(and (= vendor (:vendor %))
|
||||||
(= invoice-number (:invoice-number %)))
|
(= invoice-number (:invoice-number %)))
|
||||||
|
|||||||
@@ -2,30 +2,10 @@
|
|||||||
(:require [clojure.java.io :as io]
|
(:require [clojure.java.io :as io]
|
||||||
[clojure.string :as str]
|
[clojure.string :as str]
|
||||||
[clojure.java.shell :as sh]
|
[clojure.java.shell :as sh]
|
||||||
[auto-ap.parse.excel :as excel]))
|
[auto-ap.parse.excel :as excel]
|
||||||
|
[auto-ap.parse.templates :as t]))
|
||||||
|
|
||||||
(def templates
|
|
||||||
[{:vendor "CHFW"
|
|
||||||
:keywords [#"CHEF'S WAREHOUSE"]
|
|
||||||
:extract {:total #"2 WKS C\.C\.\s+([\d.,]+)"
|
|
||||||
:customer-identifier #"\n([A-Z][A-Z ]+)\s{2,}"
|
|
||||||
:date #"\s+([0-9]+/[0-9]+/[0-9]+)"
|
|
||||||
:invoice-number #"\s+[0-9]+/[0-9]+/[0-9]+\s+([0-9]+)"}}
|
|
||||||
|
|
||||||
{:vendor "GGM"
|
|
||||||
:keywords [#"Golden Gate Meat"]
|
|
||||||
:extract {:total #"Invoice Total\:\s+\$([\d.,]+)"
|
|
||||||
:customer-identifier #"Bill To\s*:\s*([\w ]+)\s{2,}"
|
|
||||||
:date #"Printed:\s+([0-9]+/[0-9]+/[0-9]+)"
|
|
||||||
:invoice-number #"Invoice\s+[^\n]+\n[^\n]+\n\s+([0-9]+)"}}
|
|
||||||
|
|
||||||
{:vendor "CINTAS"
|
|
||||||
:keywords [#"CINTAS CORPORATION"]
|
|
||||||
:extract {:invoice-number #"INVOICE\s#\s+([\d.,]+)"
|
|
||||||
:customer-identifier #"BILL TO\s*:\s{2,}([\w ]+)\s{2,}"
|
|
||||||
:date #"INVOICE DATE\s*\n.*\s+([0-9]+/[0-9]+/[0-9]+)"
|
|
||||||
:total #"INVOICE TOTAL\s+([0-9.]+)"}
|
|
||||||
:multi #"\f\f"}])
|
|
||||||
|
|
||||||
(defn template-applies? [text {:keys [keywords]}]
|
(defn template-applies? [text {:keys [keywords]}]
|
||||||
(every? #(re-find % text) keywords))
|
(every? #(re-find % text) keywords))
|
||||||
@@ -45,13 +25,13 @@
|
|||||||
{:vendor (:vendor template)}))]))
|
{:vendor (:vendor template)}))]))
|
||||||
|
|
||||||
(defn parse [text]
|
(defn parse [text]
|
||||||
(->> templates
|
(->> t/pdf-templates
|
||||||
(filter (partial template-applies? text))
|
(filter (partial template-applies? text))
|
||||||
first
|
first
|
||||||
(extract-template text)))
|
(extract-template text)))
|
||||||
|
|
||||||
|
|
||||||
(defmulti parse-file (fn [file filename] (last (str/split filename #"\." ))))
|
(defmulti parse-file (fn [file filename] (.toLowerCase (last (str/split filename #"\." )))))
|
||||||
|
|
||||||
(defmethod parse-file
|
(defmethod parse-file
|
||||||
"pdf"
|
"pdf"
|
||||||
|
|||||||
@@ -1,29 +1,15 @@
|
|||||||
(ns auto-ap.parse.excel
|
(ns auto-ap.parse.excel
|
||||||
(:import [org.apache.poi.ss.util CellAddress])
|
(:import [org.apache.poi.ss.util CellAddress])
|
||||||
(:require [dk.ative.docjure.spreadsheet :as d]
|
(:require [dk.ative.docjure.spreadsheet :as d]
|
||||||
|
[clojure.string :as str]
|
||||||
[clojure.string :as str]))
|
[auto-ap.parse.templates :as t]))
|
||||||
|
|
||||||
|
|
||||||
(def templates
|
|
||||||
[{:vendor "Isp Productions"
|
|
||||||
:keywords [#"ISP PRODUCTIONS"]
|
|
||||||
:extract {:customer-identifier [#"SERVICES PROVIDED TO" 1 0]
|
|
||||||
:total [#"PAY THIS" -1 0]
|
|
||||||
:date [#"INVOICE DATE" 0 1]
|
|
||||||
:invoice-number [#"INVOICE NUMBER" 0 1]}}
|
|
||||||
{:vendor "Southern Wine Online"
|
|
||||||
:keywords [#"Please note that the total invoice amount may"]
|
|
||||||
:extract {:customer-identifier [#"Customer #" 1 0]
|
|
||||||
:total [#"Total Invoice" 0 5]
|
|
||||||
:date [#"Date" 0 0 #"Date: (.*)"]
|
|
||||||
:invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]}}])
|
|
||||||
|
|
||||||
(defn template-applies? [text {:keys [keywords]}]
|
(defn template-applies? [text {:keys [keywords]}]
|
||||||
(every? #(re-find % text) keywords))
|
(every? #(re-find % text) keywords))
|
||||||
|
|
||||||
(defn extract [wb {:keys [extract vendor]}]
|
(defn extract [wb {:keys [extract vendor]}]
|
||||||
(println extract)
|
|
||||||
|
|
||||||
(reduce-kv
|
(reduce-kv
|
||||||
(fn [invoice k [regex offset-row offset-column extract-regex]]
|
(fn [invoice k [regex offset-row offset-column extract-regex]]
|
||||||
(assoc invoice k
|
(assoc invoice k
|
||||||
@@ -54,7 +40,7 @@
|
|||||||
(d/cell-seq)
|
(d/cell-seq)
|
||||||
(map d/read-cell)
|
(map d/read-cell)
|
||||||
(str/join " "))]
|
(str/join " "))]
|
||||||
(->> templates
|
(->> t/excel-templates
|
||||||
(filter (partial template-applies? text))
|
(filter (partial template-applies? text))
|
||||||
first
|
first
|
||||||
(extract wb)
|
(extract wb)
|
||||||
|
|||||||
38
src/clj/auto_ap/parse/templates.clj
Normal file
38
src/clj/auto_ap/parse/templates.clj
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
(ns auto-ap.parse.templates)
|
||||||
|
|
||||||
|
(def pdf-templates
|
||||||
|
[{:vendor "CHFW"
|
||||||
|
:keywords [#"CHEF'S WAREHOUSE"]
|
||||||
|
:extract {:total #"2 WKS C\.C\.\s+([\d.,]+)"
|
||||||
|
:customer-identifier #"\n([A-Z][A-Z ]+)\s{2,}"
|
||||||
|
:date #"\s+([0-9]+/[0-9]+/[0-9]+)"
|
||||||
|
:invoice-number #"\s+[0-9]+/[0-9]+/[0-9]+\s+([0-9]+)"}}
|
||||||
|
|
||||||
|
{:vendor "GGM"
|
||||||
|
:keywords [#"Golden Gate Meat"]
|
||||||
|
:extract {:total #"Invoice Total\:\s+\$([\d.,]+)"
|
||||||
|
:customer-identifier #"Bill To\s*:\s*([\w ]+)\s{2,}"
|
||||||
|
:date #"Printed:\s+([0-9]+/[0-9]+/[0-9]+)"
|
||||||
|
:invoice-number #"Invoice\s+[^\n]+\n[^\n]+\n\s+([0-9]+)"}}
|
||||||
|
|
||||||
|
{:vendor "CINTAS"
|
||||||
|
:keywords [#"CINTAS CORPORATION"]
|
||||||
|
:extract {:invoice-number #"INVOICE\s#\s+([\d.,]+)"
|
||||||
|
:customer-identifier #"BILL TO\s*:\s{2,}([\w ]+)\s{2,}"
|
||||||
|
:date #"INVOICE DATE\s*\n.*\s+([0-9]+/[0-9]+/[0-9]+)"
|
||||||
|
:total #"INVOICE TOTAL\s+([0-9.]+)"}
|
||||||
|
:multi #"\f\f"}])
|
||||||
|
|
||||||
|
(def excel-templates
|
||||||
|
[{:vendor "Isp Productions"
|
||||||
|
:keywords [#"ISP PRODUCTIONS"]
|
||||||
|
:extract {:customer-identifier [#"SERVICES PROVIDED TO" 1 0]
|
||||||
|
:total [#"PAY THIS" -1 0]
|
||||||
|
:date [#"INVOICE DATE" 0 1]
|
||||||
|
:invoice-number [#"INVOICE NUMBER" 0 1]}}
|
||||||
|
{:vendor "Southern Wine Online"
|
||||||
|
:keywords [#"Please note that the total invoice amount may"]
|
||||||
|
:extract {:customer-identifier [#"Customer #" 1 0]
|
||||||
|
:total [#"Total Invoice" 0 5]
|
||||||
|
:date [#"Date" 0 0 #"Date: (.*)"]
|
||||||
|
:invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]}}])
|
||||||
@@ -64,11 +64,11 @@
|
|||||||
[:th "Invoice #"]
|
[:th "Invoice #"]
|
||||||
[:th "Date"]
|
[:th "Date"]
|
||||||
[:th "Amount"]]]
|
[:th "Amount"]]]
|
||||||
[:tbody (for [{:keys [customer-identifier invoice-number date total id vendor] :as i} @invoices]
|
[:tbody (for [{:keys [company invoice-number date total id vendor] :as i} @invoices]
|
||||||
^{:key (str customer-identifier "-" invoice-number "-" date "-" total "-" id)}
|
^{:key (str company "-" invoice-number "-" date "-" total "-" id)}
|
||||||
[:tr
|
[:tr
|
||||||
[:td vendor]
|
[:td vendor]
|
||||||
[:td customer-identifier]
|
[:td company]
|
||||||
[:td invoice-number]
|
[:td invoice-number]
|
||||||
[:td date]
|
[:td date]
|
||||||
[:td total]])]])]))
|
[:td total]])]])]))
|
||||||
@@ -114,11 +114,14 @@
|
|||||||
[:th "Date"]
|
[:th "Date"]
|
||||||
[:th "Amount"]
|
[:th "Amount"]
|
||||||
[:th]]]
|
[:th]]]
|
||||||
[:tbody (for [{:keys [vendor potential-duplicate customer-identifier invoice-number date total id] :as i} @invoices]
|
[:tbody (for [{:keys [vendor potential-duplicate company customer-identifier invoice-number date total id] :as i} @invoices]
|
||||||
^{:key (str customer-identifier "-" invoice-number "-" date "-" total "-" id)}
|
^{:key (str company "-" invoice-number "-" date "-" total "-" id)}
|
||||||
[:tr
|
[:tr
|
||||||
[:td vendor]
|
[:td vendor]
|
||||||
[:td customer-identifier]
|
(if company
|
||||||
|
[:td company]
|
||||||
|
[:td [:i.icon.fa.fa-warning {:title "potential duplicate"}]
|
||||||
|
(str "'" customer-identifier "' doesn't match any known company")])
|
||||||
[:td invoice-number]
|
[:td invoice-number]
|
||||||
[:td date]
|
[:td date]
|
||||||
[:td total]
|
[:td total]
|
||||||
|
|||||||
Reference in New Issue
Block a user