company identification.

This commit is contained in:
Bryce Covert
2017-12-12 11:55:09 -08:00
parent c8bcf2aa02
commit 198c1a39a3
7 changed files with 80 additions and 53 deletions

View File

@@ -2,30 +2,10 @@
(:require [clojure.java.io :as io]
[clojure.string :as str]
[clojure.java.shell :as sh]
[auto-ap.parse.excel :as excel]))
[auto-ap.parse.excel :as excel]
[auto-ap.parse.templates :as t]))
(def templates
[{:vendor "CHFW"
:keywords [#"CHEF'S WAREHOUSE"]
:extract {:total #"2 WKS C\.C\.\s+([\d.,]+)"
:customer-identifier #"\n([A-Z][A-Z ]+)\s{2,}"
:date #"\s+([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"\s+[0-9]+/[0-9]+/[0-9]+\s+([0-9]+)"}}
{:vendor "GGM"
:keywords [#"Golden Gate Meat"]
:extract {:total #"Invoice Total\:\s+\$([\d.,]+)"
:customer-identifier #"Bill To\s*:\s*([\w ]+)\s{2,}"
:date #"Printed:\s+([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"Invoice\s+[^\n]+\n[^\n]+\n\s+([0-9]+)"}}
{:vendor "CINTAS"
:keywords [#"CINTAS CORPORATION"]
:extract {:invoice-number #"INVOICE\s#\s+([\d.,]+)"
:customer-identifier #"BILL TO\s*:\s{2,}([\w ]+)\s{2,}"
:date #"INVOICE DATE\s*\n.*\s+([0-9]+/[0-9]+/[0-9]+)"
:total #"INVOICE TOTAL\s+([0-9.]+)"}
:multi #"\f\f"}])
(defn template-applies? [text {:keys [keywords]}]
(every? #(re-find % text) keywords))
@@ -45,13 +25,13 @@
{:vendor (:vendor template)}))]))
(defn parse [text]
(->> templates
(->> t/pdf-templates
(filter (partial template-applies? text))
first
(extract-template text)))
(defmulti parse-file (fn [file filename] (last (str/split filename #"\." ))))
(defmulti parse-file (fn [file filename] (.toLowerCase (last (str/split filename #"\." )))))
(defmethod parse-file
"pdf"