Files
integreat/src/clj/auto_ap/parse/templates.clj
2020-07-27 21:28:02 -07:00

577 lines
25 KiB
Clojure

(ns auto-ap.parse.templates
(:require [dk.ative.docjure.spreadsheet :as d]
[auto-ap.parse.util :as u]
[clojure.string :as str])
(:import (org.apache.poi.ss.util CellAddress)))
(def pdf-templates
[
;; CHEF's WAREHOUSE
{:vendor "CHFW"
:keywords [#"CHEF'S WAREHOUSE"]
:extract {:total #"2 WKS C\.C\.\s+([\d.,]+)"
:customer-identifier #"\n([A-Z][A-Z ]+)\s{2,}"
:date #"\s+([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"\s+[0-9]+/[0-9]+/[0-9]+\s+([0-9]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]}}
;; GGM
{:vendor "Golden Gate Meat Company, Inc"
:keywords [#"Golden Gate Meat"]
:extract {:total #"Invoice Total\:\s+\$([\d.,]+)"
:customer-identifier #"Bill To\s*:\s*([\w ]+)\s{2,}"
:date #"Printed:\s+([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"Invoice\s+[^\n]+\n[^\n]+\n\s+([0-9]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; CINTAS
{:vendor "CINTAS"
:keywords [#"CINTAS CORPORATION"]
:extract {:invoice-number #"INVOICE\s#\s+([\d.,]+)"
:customer-identifier #"BILL TO\s*:\s{2,}([\w ]+)\s{2,}"
:date #"INVOICE DATE\s*\n.*\s+([0-9]+/[0-9]+/[0-9]+)"
:total #"INVOICE TOTAL\s+([0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]}
:multi #"\f\f"}
;; CARBONIC
{:vendor "Carbonic Service Inc"
:keywords [#"CARBONIC SERVICE INC"]
:extract {:invoice-number #"Invoice #\s*\n\s*[\w\.]+\s+[\w\./]+(.*)\s*\n"
:customer-identifier #"Bill To[^\n]+\n[^\n]*\n([\w ]+)\s{2,}"
:date #"Invoice #\s*\n\s*[\w\.]+\s+([\w\./]+)"
:total #"Total\s+\$([0-9.,]+)"
:account-number #"Account #\s+(\d+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
;; DVW
{:vendor "DVW Commercial"
:keywords [#"DVW Commercial"]
:extract {:date #"\s*([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To:[^\n]+\n[^\n]*\n\s*([\w ]+) \("
:invoice-number #"Invoice\s*\n\s*[S]?([\w\./]+)*"
:total #"Total:\s+\$ ([0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]}}
;; DAYLIGHT FOOD STATEMENT
{:vendor "Daylight Foods"
:keywords [#"DAYLIGHT FOODS" #"Customer Statement"]
:extract {:date #"^.*?([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Phone:.*\n+\s+(.*)"
:invoice-number #"\s+(\w+)"
:total #"([\-]?[0-9]+\.[0-9]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"^\s*[A-Z]\d+\s+([0-9]+/[0-9]+/[0-9]+)"}
;; DAYLIGHT FOOD
{:vendor "Daylight Foods"
:keywords [#"DAYLIGHT FOODS"]
:extract {:date #"\n\s*Date[^\n]+\n\s*([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To:.*\n\s*(.*?)\s{2,}"
:invoice-number #"Invoice\s([\w\./]+)*"
:total #"Total Invoice\s+([\-]?[0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; SOUTHBAY FRESH
{:vendor "Southbay Fresh Produce"
:keywords [#"SOUTH BAY FRESH PRODUCE"]
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"To:[^\n]*\n\s+([A-Za-z' ]+)\s{2}"
:invoice-number #"INV #\/(\d+)"
:total #"\$([0-9.]+)\."}
:parser {:date [:clj-time "MM/dd/yyyy"]}
:multi #"\n"
:multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+INV "}
;; PFG - LEDYARD
{:vendor "Performance Food Group - LEDYARD"
:keywords [#"performancefoodservice"]
:extract {:date #"DELIVER TO[^\n]+\n.+?(?=[0-9]+/[0-9]+/[0-9]+)([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"DELIVER TO[^\n]+\n\s*[\S ]+?(?=\s{2,}([\S ]+?)\s{2,})" ;; ([\S ]+)\s{2,}
:invoice-number #"DELIVER TO[^\n]+\n.+?(?=\d+)(\d+)\s*\n"
:total #"([0-9.\-]+)\s+Status Code"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; SOUTHERN GLAZER'S
{:vendor "Southern Glazers"
:keywords [#"Southern Glazer's"]
:extract {:date #"INVOICE DATE(?s:.*)(?= (?:[0-9]+/[0-9]+/[0-9]+)\s+([0-9]+/[0-9]+/[0-9]+)) "
:customer-identifier #"SOLD TO:(?:.*)(?=\n)\n(.*)(?=\s{2,})" ;; ([\S ]+)\s{2,}
:invoice-number #"INVOICE\n(?:.*?)(?=\d{4,})(\d+)"
:total #"PAY THIS AMOUNT(?s:.*)(?= ([0-9,]+\.[0-9]{2}))"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
;; GOLDEN BRANDS
{:vendor "Golden Brands San Jose"
:keywords [#"GOLDEN BRANDS"]
:extract {:date #"(?:.*\n){4}(.*)" ;; HOW TO GO TO SPCIFIC LINE
:customer-identifier #"Account:(?:.*\n)(.*(?=\s{2,}))"
:invoice-number #"Invoice#: (\d+)"
:total #"Invoice Total\s+([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "EEE MMM dd, yyyy HH:mm aa"]
:total [:trim-commas nil]}}
;; GOLDEN BRANDS
{:vendor "Bigoli Fresh Pasta"
:keywords [#"bigolifreshpasta.com"]
:extract {:date #"INVOICE #.*?\n.*?([0-9]+/[0-9]+/[0-9]+)" ;; HOW TO GO TO SPCIFIC LINE
:customer-identifier #"BILL TO.*\n\s+(.*?)\s{2,}"
:invoice-number #"INVOICE #.*?\n(\d+)"
:total #" TOTAL\s+([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; Del Monte Meats
{:vendor "Del Monte Meat Co"
:keywords [#"Del Monte"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To:(?:.*?(?=\s{2,}))(.*)-"
:invoice-number #"Invoice\s+([0-9]+)"
:total #"([0-9\.,]+)$"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^\s{1,2}[0-9]+"}
;; Royal Hawaiian
{:vendor "Royal Hawaiian"
:keywords [#"ROYAL HAWAIIAN"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO.*\n\s+(.*?(?=(\s{2,}|\()))"
:invoice-number #"Invoice#:\s+([0-9]+)"
:total #"INV TOTAL:\s+([0-9\.,]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; AUTO-CHLOR
{:vendor "Auto-Chlor"
:keywords [#"AUTO-CHLOR"]
:extract {:date #"DATE : ([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"(.*?)\s{2,}.*CUSTOMER#"
:invoice-number #"INVOICE# :\s+([0-9]+)"
:total #"TOTAL SALE :\s+([0-9,.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; Classic Wines
{:vendor "Classic Wines"
:keywords [#"585-9463"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Customer Id:.*\n.*\n.*\n(.*)"
:invoice-number #"^.{23}\s+(\w+)"
:total #"\$([0-9\.,]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^\s+.*?\d{6,}.*?\$"}
;; C & L
{:vendor "C&L Produce"
:keywords [#"440 Franklin Street"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n.*\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice #.*\n.*\n.*?(\d{5,})\n"
:total #"Total\s+\$([0-9\.,]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
{:vendor "General Produce Company"
:keywords [#"generalproduce.com"]
:extract {:date #"INVOICE DATE.*\n.*?([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"SHIP TO.*\n.*\n\s+(.*?)\s{2,}"
:invoice-number #"INVOICE NO.*\n.*?(\d{7,}?)\s+"
:total #"TOTAL\s+([\-0-9,]+\.[0-9]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; Young's Market Co new statement
{:vendor "Youngs Market"
:keywords [#"(YOUNG'S MARKET COMPANY|Young.*Statement)"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Customer Name +([\w ]+)"
:invoice-number #"\s{2,}([0-9]+)"
:total #"\$?([0-9,]+\.[0-9]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^[0-9]+.*\$?([0-9,]+\.[0-9]+).*\$?([0-9,]+\.[0-9]+)"}
;; Young's Market Co - INVOICE
{:vendor "Youngs Market"
:keywords [#"P.O.Box 743564"]
:extract {:date #"(?:INVOICE|CREDIT) DATE\n(?:.*?)(\S+)\n"
:customer-identifier #"(?:INVOICE|CREDIT) DATE\n [0-9]+\s+(.*?)\s{2,}"
:invoice-number #"(?:INVOICE|CREDIT) DATE\n(?:.*?)\s{2,}(\d+?)\s+\S+\n"
:total #"Net Amount(?:.*\n){4}(?:.*?)([\-]?[0-9\.]+)\n"}
:parser {:date [:clj-time "dd-MMM-yy"]
:total [:trim-commas-and-negate nil]}}
;; WINE WAREHOUSE
{:vendor "Wine Warehouse"
:keywords [#"WINE WAREHOUSE" #"Bottle prices include"]
:extract {:date #"INVOICE DATE\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"SHIP-TO-PARTY.*\n(.*?)(?=\s{2,})"
:invoice-number #"INV #\s+(\d+)"
:total #"PLEASE PAY THIS AMOUNT\s+([0-9]+\.[0-9]{2})"
:account-number #"CUSTOMER NUMBER\s+(\d+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; WINE WAREHOUSE 3
{:vendor "Wine Warehouse"
:keywords [#"Wine Warehouse" #"PLEASE APPLY CREDIT"]
:extract {:date #"Credit Memo Number/Date\s+\d+\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Ship-To-Party.*\n(.*?)\s{2,}"
:invoice-number #"Credit Memo Number/Date\s+(\d+)"
:total #"Total\s+([0-9]+\.[0-9]{2}-)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; WINE WAREHOUSE 2
{:vendor "Wine Warehouse"
:keywords [#"WINE WAREHOUSE" #"Bill-to-Party"]
:extract {:date #"Invoice date\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill-to-Party.*\n(.*?)\s{2,}"
:invoice-number #"Invoice number\s+(\d+)"
:total #"Gross\s+([0-9]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; REGAL
{:vendor "Regal Wine Co"
:keywords [#"REGAL WINE"]
:extract {:date #"INVOICE DATE.*\n\n(?:.*?)([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"INVOICE\n(.*?)\s{2,}"
:invoice-number #"INVOICE NUMBER.*\n\n(?:.*?)(\d+)"
:total #"Total Amount Due(?:.*?)([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
;; ALSCO
{:vendor "Alsco"
:keywords [#"Alsco"]
:extract {:date #"Invoice Date:\s+(.*)"
:customer-identifier #"Invoice F o r(?:.*?)\n\s+(.*?)\s{2,}"
:invoice-number #" (\S+)\n\s+Invoice Date"
:total #"Invoice Total\s+\$([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MMM dd yyyy"]
:total [:trim-commas nil]}}
;; SUNCREST
{:vendor "Suncrest USA Inc"
:keywords [#"Suncrest.*Invoice"]
:extract {:date #"Date.*\n\s*\n(?:.*?)([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To(?:.*?)\n\n(.*?)\s{2,}"
:invoice-number #"Invoice #.*\n\s*\n.*? (\d{3,})"
:total #"Balance Due\s+\$([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; PACIFIC SEAFOOD
{:vendor "Pacific Seafood"
:keywords [#"(pacseafood|PACIFIC FRESH)"]
:extract {:date #"DATE(?:.*\n.*(?=([0-9]+/[0-9]+/[0-9]+)))([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"DELIVER TO:(?:.*\n)(.*?)(?=\s{2})"
:invoice-number #"INVOICE NO\.\n(?:.*?(?= [0-9]+\n)) ([0-9]+)"
:total #"TOTAL\n\s+([0-9,\.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; P&R
{:vendor "P & R PAPER SUPPLY CO"
:keywords [#"PAPER SUPPLY COMPANY"]
:extract {:date #"Invoiced.*\n\s+\S+\s+(\S+)"
:customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}"
:invoice-number #"Invoice#.*\n.*\n.*?(\S+)\s+\d+\n"
:total #"INVOICE TOTAL\s+([\-]?[\d,]+\.\d+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; SUNCREST STATEMENT
{:vendor "Suncrest USA Inc"
:keywords [#"Suncrest.*\n.*Statement"]
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"To:(?:.*?)\n\s*(.*?)\s{2,}"
:invoice-number #"INV #(\d+)"
:total #"Orig\. Amount \$([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+INV "}
;; US FOODS
{:vendor "US Foods"
:keywords [#"US Foods"]
:extract {:date #"INVOICE DATE.*\n+.*?(?=([0-9]+/[0-9]+/[0-9]+))([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,}
:invoice-number #"INVOICE NUMBER[^\n]+\n\n\d+\s+(\d+)"
:total #"(?:DELIVERED AMOUNT|PLEASE REMIT|AMOUNT).*?\$([0-9.,]+( CR)?)\n"}
:parser {:date [:clj-time ["MM/dd/yyyy" "yyyy/MM/dd"]]
:total [:trim-commas-and-negate nil]}}
;; SYSCO
{:vendor "Sysco"
:keywords [#"SYSCO"]
:extract {:date #"INVOICE NUMBER[^\n]+\n([^\n]+)\n"
:customer-identifier #"INVOICE NUMBER[^\n]+\n[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,}
:invoice-number #"INVOICE NUMBER[^\n]+\n[^\n]+\n.*?(?=[\d]{9})(\d{9})"
:total #"\s{2,}INVOICE\s{2,}.*?(?=TOTAL)TOTAL\s+([0-9.]+[\-]?)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; LE BOULANGER
{:vendor "Le Boulanger"
:keywords [#"Le Boulanger"]
:extract {:date #"Invoice Date: ([^\n]+)\n"
:customer-identifier #"Ship to\n+\s+([\S ]+?)(?=\s{2,})"
:invoice-number #"Invoice No: ([^\n]+)\n"
:total #" Total:\s+([\d\.]+)"}
:parser {:date [:clj-time "MMM dd, yyyy"]}}
;; A&B
{:vendor "A&B Produce"
:keywords [#"ABProduce"]
:extract {:date #"^\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO:[^\n]+\n[^\n]+\n[^\n]+\n(.*)\s{2,}"
:invoice-number #"(\d+)\s+(?:INV|C/M)"
:total #" (?:INV|C/M)\s+([\d\.\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"^\s+[0-9]+/[0-9]+/[0-9]+\s+\d+\s+(INV|C/M)\s+"}
;; A&B Invoice
{:vendor "A&B Produce"
:keywords [#"415-656-0254"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO.*\n.*?\s{2,}(.*?)\s{2,}"
:invoice-number #"INVOICE#\n.*?(\d+)\n"
:total #"INV TOTAL \s+([\d\.\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; Ocean Queen
{:vendor "Ocean Queen"
:keywords [#"Ocean Queen USA"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}"
:invoice-number #"Invoice #\n.*\n.*?(\d+)\n"
:total #"Total\s+\$([\d\.,\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; CHEF's CHOICE
{:vendor "Chef's Choice Produce Co"
:keywords [#"(2170 MARTIN AVENUE|213-3886)"]
:extract {:date #"([0-9/]{10,10})"
:customer-identifier #"\n B\s+([\S ]+?)(?=\s{2,}I) "
:invoice-number #"^0*([0-9]+)"
:total #"INVOICE\s+([\d\.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]} ;; may want to try two approaches [:clj-time ["MM/dd/yyyy" "MM1dd1yyyy"]]
:multi #"\n"
:multi-match? #"\s+INVOICE\s+"}
;; FRESH AND BELT
{:vendor "Fresh and Best Produce"
:keywords [#"freshbestproduce"]
:extract {:date #"\n\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To[^\n]+\n([A-Za-z ']+)"
:invoice-number #"\n\s+[0-9/]+\s+(\d+)"
:total #"Balance Due\s+\$([0-9\.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]}}
;; PFG - ROMA LOOK 1
{:vendor "Performance Food Group - ROMA"
:keywords [#"inquiries call 1-800-233-6211"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})"
:invoice-number #"^\s+([\dA-Z]+)"
:total #"([\d\.,\-]+\.[\d\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"^\s+[\d]{6,8}\s+\d+"}
;; ACME BREAD
{:vendor "Acme Bread"
:keywords [#"acmebread\.com"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Print Date.*\n.*\n(.*)"
:invoice-number #"^\s*(\d+)"
:total #"\s{2,}(\d+\.\d{2})\s{2,}"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"^\s*\d+\s+([0-9]+/[0-9]+/[0-9]+)"}
;; PFG - ROMA
{:vendor "Performance Food Group - ROMA"
:keywords [#"Performance Food Group, Inc\n\f"]
:extract {:date #"Date: ([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})"
:invoice-number #"INVOICE NO.\s+ ([\d]+)"
:total #"([\d\.,]+)\s+INVOICE TOTAL"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
;; JFC
{:vendor "JFC International"
:keywords [#"48490 MILMONT DRIVE"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"SOLD\s+([\S ]+?)(?=(\s{2,}|\n))"
:invoice-number #"(\S+)\s+(?=[0-9]+/[0-9]+/[0-9]+)"
:total #"(?:INVOICE|TOTAL|CREDIT)\s+([\d\.,\-]+\.[\d\-]+( CR)?)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; ROMA BAKERY
{:vendor "Roma Bakery Inc."
:keywords [#"Roma Bakery Inc"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice (\d+)"
:total #"Total\s+([\d\-\.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; KAEL FOODS
{:vendor "Kael Foods"
:keywords [#"kaelfoods.com"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}"
:invoice-number #"INVOICE 0*(\d+)"
:total #"TOTAL:\s+\$([\d\-\.,]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; Starter Bakery
{:vendor "Starter Bakery"
:keywords [#"starterbakery.com"]
:extract {:date #"INVOICE DATE:\s+(.*?)\s{2,}"
:customer-identifier #"BILL TO:.*\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice.*?(\d+)"
:total #"Total:.*?([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MMMM dd, yyyy"]
:total [:trim-commas-and-negate nil]}}
;; Trimark
{:vendor "TriMark R.W. Smith"
:keywords [#"TriMark"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To\s+(.*?)\s{2,}"
:invoice-number #"Invoice #\n.*?([\d\-]+)\n"
:total #"Invoice Total\s+([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}])
(defn offset [c x y]
(.toString (CellAddress. (+ y (.getRow (.getAddress c))) (+ x (.getColumn (.getAddress c))) )))
(def excel-templates
[{:vendor "Isp Productions"
:keywords [#"ISP PRODUCTIONS"]
:extract {:customer-identifier [#"SERVICES PROVIDED TO" 1 0]
:total [#"PAY THIS" -1 0]
:date [#"INVOICE DATE" 0 1]
:invoice-number [#"INVOICE NUMBER" 0 1]}}
{:vendor "Southern Glazers"
:keywords [#"Please note that the total invoice amount may"]
:extract {:customer-identifier [#"Customer #" 1 0]
:total [#"Subtotal" 0 16 ]
:date [#"Date" 0 0 #"Date: (.*)"]
:invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]
:account-number [#"Customer #" 0 0 #"Customer #: (.*)"]}
:parser { :total [:trim-commas-and-remove-dollars nil]
:date [:clj-time "MM/dd/yyyy"]}}
{:vendor "Mama Lu's Foods"
:keywords [#"Mama Lu's Foods"]
:extract (fn [wb vendor]
(let [[sheet] (d/sheet-seq wb)]
(transduce (comp
(drop 5)
(filter
(fn [r]
(and
r
(->> r d/cell-seq second d/read-cell))))
(map
(fn [r]
(let [[_ customer-order-number num date name amount] (map d/read-cell (d/cell-seq r))]
{:customer-identifier (second (re-find #"([^:]*):" name))
:text name
:full-text name
:date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date))
:invoice-number (str customer-order-number "-" (int num))
:total (str amount)
:vendor-code vendor}))))
conj
[]
(d/row-seq sheet))))}
{:vendor "DVW Commercial"
:keywords [#"Total for" #"Num"]
:extract (fn [wb vendor]
(let [[sheet] (d/sheet-seq wb)]
(transduce (comp (filter (fn [c]
(re-find #"Invoice" (str (d/read-cell c)))))
(map (fn [c]
(let [customer-identifier (d/read-cell (->> (d/select-cell (offset c -3 0) sheet)
(iterate (fn [c]
(d/select-cell (offset c 0 -1) sheet)))
(filter (fn [c]
(not (str/blank? (d/read-cell c)))))
first))]
{:customer-identifier customer-identifier
:text customer-identifier
:full-text customer-identifier
:date (d/read-cell (d/select-cell (offset c 2 0) sheet))
:invoice-number (d/read-cell (d/select-cell (offset c 4 0) sheet))
:total (str (d/read-cell (d/select-cell (offset c 8 0) sheet)))
:vendor-code vendor}))))
conj
[]
(d/cell-seq sheet))))}
{:vendor "Chef's Choice Produce Co"
:keywords [#"Alt_invoice_number"]
:extract (fn [wb vendor]
(let [[sheet] (d/sheet-seq wb)]
(transduce (comp
(drop-while (fn [c]
(not (re-find #"Customer_id" (str (d/read-cell c))))))
(drop 9)
(filter (fn [c]
(= 0 (.getColumnIndex c))))
(filter (fn [c]
(not (str/blank? (str/trim (or (d/read-cell (d/select-cell (offset c 1 0) sheet)) ""))))))
(map (fn [c]
{:customer-identifier (str/trim (d/read-cell (d/select-cell (offset c 1 0) sheet)))
:text (d/read-cell (d/select-cell (offset c 1 0) sheet))
:full-text (d/read-cell (d/select-cell (offset c 1 0) sheet))
:date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim (d/read-cell (d/select-cell (offset c 5 0) sheet))))
:invoice-number (->>
(re-find #"^(?:0+([A-Z0-9]+))|([A-Z]+[A-Z0-9]+)" (str/trim (d/read-cell (d/select-cell (offset c 2 0) sheet))))
(drop 1 )
(filter identity)
first)
:total (str (d/read-cell (d/select-cell (offset c 7 0) sheet)))
:vendor-code vendor}))
(filter :customer-identifier))
conj
[]
(d/cell-seq sheet))))}])