This commit is contained in:
Bryce Covert
2020-01-25 08:58:52 -08:00
4 changed files with 98 additions and 21 deletions

View File

@@ -38,7 +38,9 @@
(first (map second (re-seq v full-text))))
str/trim )
[value-parser parser-params] (-> template :parser k)]
(assoc result k (try (u/parse-value value-parser parser-params value)
(assoc result k (try
(println "applying parser" value-parser "to value" value)
(u/parse-value value-parser parser-params value)
(catch Exception e
(println e))))))
{:vendor-code (:vendor template)

View File

@@ -1,5 +1,6 @@
(ns auto-ap.parse.templates
(:require [dk.ative.docjure.spreadsheet :as d]
[auto-ap.parse.util :as u]
[clojure.string :as str])
(:import (org.apache.poi.ss.util CellAddress)))
@@ -50,7 +51,7 @@
:keywords [#"DVW Commercial"]
:extract {:date #"\s*([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To:[^\n]+\n[^\n]*\n\s*([\w ]+) \("
:invoice-number #"Invoice\s*\n\s*([\w\./]+)*"
:invoice-number #"Invoice\s*\n\s*[S]?([\w\./]+)*"
:total #"Total:\s+\$ ([0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]}}
@@ -97,13 +98,36 @@
;; GOLDEN BRANDS
{:vendor "Golden Brands San Jose"
:keywords [#"GOLDEN BRANDS"]
:extract {:date #"0430\n(.*)"
:extract {:date #"(?:.*\n){4}(.*)" ;; HOW TO GO TO SPCIFIC LINE
:customer-identifier #"Account:(?:.*\n)(.*(?=\s{2,}))"
:invoice-number #"Invoice#: (\d+)"
:total #"Invoice Total\s+([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "EEE MMM dd, yyyy HH:mm aa"]
:total [:trim-commas nil]}}
;; Young's Market Co
{:vendor "Youngs Market"
:keywords [#"Young's Market Co"]
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Customer Name\s+(.*)"
:invoice-number #"^(?:.*?)\s{2,}([0-9]+)"
:total #"([0-9\.,]+)\s+[0-9\.,]+$"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+[0-9]+\s+INV "}
;; Young's Market Co - INVOICE
{:vendor "Youngs Market"
:keywords [#"P.O.Box 743564"]
:extract {:date #"INVOICE DATE\n(?:.*?)(\S+)\n"
:customer-identifier #"INVOICE DATE\n [0-9]+\s+(.*?)\s{2,}"
:invoice-number #"INVOICE DATE\n(?:.*?)\s{2,}(\d+?)\s+\S+\n"
:total #"Net Amount(?:.*\n){4}(?:.*?)([0-9\.]+)\n"}
:parser {:date [:clj-time "dd-MMM-yy"]
:total [:trim-commas nil]}}
;; WINE WAREHOUSE
{:vendor "Wine Warehouse"
:keywords [#"WINE WAREHOUSE"]
@@ -227,17 +251,6 @@
:total #"Balance Due\s+\$([0-9\.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]}}
;; PFG - ROMA
{:vendor "Performance Food Group - ROMA"
:keywords [#"Performance Food Group, Inc\n\f"]
:extract {:date #"Date: ([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})"
:invoice-number #"INVOICE NO.\s+ ([\d]+)"
:total #"([\d\.,]+)\s+INVOICE TOTAL"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
;; PFG - ROMA LOOK 1
{:vendor "Performance Food Group - ROMA"
:keywords [#"inquiries call 1-800-233-6211"]
@@ -248,7 +261,19 @@
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"^\s+[\d]{6,8}\s+\d+"}
:multi-match? #"^\s+[\d]{6,8}\s+\d+"}
;; PFG - ROMA
{:vendor "Performance Food Group - ROMA"
:keywords [#"Performance Food Group, Inc\n\f"]
:extract {:date #"Date: ([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})"
:invoice-number #"INVOICE NO.\s+ ([\d]+)"
:total #"([\d\.,]+)\s+INVOICE TOTAL"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
;; JFC
{:vendor "JFC International"
@@ -298,4 +323,32 @@
:vendor-code vendor}))))
conj
[]
(d/cell-seq sheet))))}
{:vendor "Chef's Choice Produce Co"
:keywords [#"Alt_invoice_number"]
:extract (fn [wb vendor]
(let [[sheet] (d/sheet-seq wb)]
(transduce (comp
(drop-while (fn [c]
(not (re-find #"Customer_id" (str (d/read-cell c))))))
(drop 9)
(filter (fn [c]
(= 0 (.getColumnIndex c))))
(filter (fn [c]
(not (str/blank? (str/trim (or (d/read-cell (d/select-cell (offset c 1 0) sheet)) ""))))))
(map (fn [c]
{:customer-identifier (str/trim (d/read-cell (d/select-cell (offset c 1 0) sheet)))
:text (d/read-cell (d/select-cell (offset c 1 0) sheet))
:full-text (d/read-cell (d/select-cell (offset c 1 0) sheet))
:date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim (d/read-cell (d/select-cell (offset c 5 0) sheet))))
:invoice-number (->>
(re-find #"^(?:0+([A-Z0-9]+))|([A-Z]+[A-Z0-9]+)" (str/trim (d/read-cell (d/select-cell (offset c 3 0) sheet))))
(drop 1 )
(filter identity)
first)
:total (str (d/read-cell (d/select-cell (offset c 7 0) sheet)))
:vendor-code vendor}))
(filter :customer-identifier))
conj
[]
(d/cell-seq sheet))))}])