Files
integreat/src/clj/auto_ap/parse/templates.clj

639 lines
26 KiB
Clojure

(ns auto-ap.parse.templates
(:require [auto-ap.parse.util :as u]
[clojure.string :as str]))
(def pdf-templates
[
;; CHEF's WAREHOUSE
{:vendor "CHFW"
:keywords [#"CHEF'S WAREHOUSE"]
:extract {:total #"2 WKS C\.C\.\s+([\d.,]+)"
:customer-identifier #"\n([A-Z][A-Z ]+)\s{2,}"
:date #"\s+([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"\s+[0-9]+/[0-9]+/[0-9]+\s+([0-9]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]}}
;; GGM
{:vendor "Golden Gate Meat Company, Inc"
:keywords [#"Golden Gate Meat"]
:extract {:total #"Invoice Total\:\s+\$([\d.,]+)"
:customer-identifier #"Bill To\s*:\s*([\w ]+)\s{2,}"
:date #"Printed:\s+([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"Invoice\s+[^\n]+\n[^\n]+\n\s+([0-9]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; CINTAS
{:vendor "CINTAS"
:keywords [#"CINTAS CORPORATION"]
:extract {:invoice-number #"INVOICE\s#\s+([\d.,]+)"
:customer-identifier #"BILL TO\s*:\s{2,}([\w ]+)\s{2,}"
:date #"INVOICE DATE\s*\n.*\s+([0-9]+/[0-9]+/[0-9]+)"
:total #"INVOICE TOTAL\s+([0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]}
:multi #"\f\f"}
;; CARBONIC
{:vendor "Carbonic Service Inc"
:keywords [#"CARBONIC SERVICE INC"]
:extract {:invoice-number #"Invoice #\s*\n\s*[\w\.]+\s+[\w\./]+(.*)\s*\n"
:date #"Invoice #\s*\n\s*[\w\.]+\s+([\w\./]+)"
:total #"Total\s+\$([0-9.,]+)"
:account-number #"Account #\s+(\d+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
;; DVW
{:vendor "DVW Commercial"
:keywords [#"DVW Commercial"]
:extract {:date #"\s*([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Ship To:.*?\n.*?\s{2,}(.*)"
:invoice-number #"Invoice\s*\n\s*[S]?([\w\./]+)*"
:total #"Total:\s+\$\s*([0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]}}
;; DAYLIGHT FOOD STATEMENT
{:vendor "Daylight Foods"
:keywords [#"DAYLIGHT FOODS" #"Customer Statement"]
:extract {:date #"^.*?([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Phone:.*\n+\s+(.*)"
:invoice-number #"\s+(\w+)"
:total #"([\-]?[0-9]+\.[0-9]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"^\s*[A-Z]\d+\s+([0-9]+/[0-9]+/[0-9]+)"}
;; DAYLIGHT FOOD
{:vendor "Daylight Foods"
:keywords [#"DAYLIGHT FOODS"]
:extract {:date #"\n\s*Date[^\n]+\n\s*([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To:.*\n\s*(.*?)\s{2,}"
:invoice-number #"Invoice\s([\w\./]+)*"
:total #"Total Invoice\s+([\-]?[0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; SOUTHBAY FRESH
{:vendor "Southbay Fresh Produce"
:keywords [#"SOUTH BAY FRESH PRODUCE"]
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"To:[^\n]*\n\s+([A-Za-z' ]+)\s{2}"
:invoice-number #"INV #\/(\d+)"
:total #"\$([0-9.]+)\."}
:parser {:date [:clj-time "MM/dd/yyyy"]}
:multi #"\n"
:multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+INV "}
;; PFG - LEDYARD
{:vendor "Performance Food Group - LEDYARD"
:keywords [#"performancefoodservice"]
:extract {:date #"DELIVER TO[^\n]+\n.+?(?=[0-9]+/[0-9]+/[0-9]+)([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"DELIVER TO[^\n]+\n\s*[\S ]+?(?=\s{2,}([\S ]+?)\s{2,})" ;; ([\S ]+)\s{2,}
:invoice-number #"DELIVER TO[^\n]+\n.+?(?=\d+)(\d+)\s*\n"
:total #"([0-9.\-]+)\s+Status Code"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; SOUTHERN GLAZER'S
{:vendor "Southern Glazers"
:keywords [#"Southern Glazer's"]
:extract {:date #"INVOICE DATE(?s:.*)(?= (?:[0-9]+/[0-9]+/[0-9]+)\s+([0-9]+/[0-9]+/[0-9]+)) "
:invoice-number #"INVOICE\n(?:.*?)(?=\d{4,})(\d+)"
:total #"PAY THIS AMOUNT(?s:.*)(?= ([0-9,]+\.[0-9]{2}))"
:account-number #"ACCOUNT #.*\n.*\n\s+(\d+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}
:multi #"\f"
:multi-match? #"PAY THIS AMOUNT"}
;; GOLDEN BRANDS
{:vendor "Golden Brands San Jose"
:keywords [#"GOLDEN BRANDS"]
:extract {:date #"(?:.*\n){4}(.*)" ;; HOW TO GO TO SPCIFIC LINE
:account-number #"Account:\s*(.*?)\n"
:invoice-number #"Invoice#: (\d+)"
:total #"Invoice Total\s+([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "EEE MMM dd, yyyy HH:mm aa"]
:total [:trim-commas nil]}}
;; GOLDEN BRANDS
{:vendor "Bigoli Fresh Pasta"
:keywords [#"bigolifreshpasta.com"]
:extract {:date #"INVOICE #.*?\n.*?([0-9]+/[0-9]+/[0-9]+)" ;; HOW TO GO TO SPCIFIC LINE
:customer-identifier #"BILL TO.*\n\s+(.*?)\s{2,}"
:invoice-number #"INVOICE #.*?\n(\d+)"
:total #" TOTAL\s+([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; Del Monte Meats
{:vendor "Del Monte Meat Co"
:keywords [#"Del Monte"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To:(?:.*?(?=\s{2,}))(.*)-"
:invoice-number #"Invoice\s+([0-9]+)"
:total #"([0-9\.,]+)$"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^\s{1,2}[0-9]+"}
{:vendor "Allen Brothers West"
:keywords [#"ALLEN BROTHERS"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To:(?:.*?(?=\s{2,}))(.*)-"
:invoice-number #"Invoice\s+([0-9]+)"
:total #"([0-9\.,]+)$"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^\s{1,2}[0-9]+"}
;; Royal Hawaiian
{:vendor "Royal Hawaiian"
:keywords [#"ROYAL HAWAIIAN"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO.*\n\s+(.*?(?=(\s{2,}|\()))"
:invoice-number #"Invoice#:\s+([0-9]+)"
:total #"INV TOTAL:\s+([0-9\.,]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; WORLDWIDE PRODUCE
{:vendor "Worldwide Produce"
:keywords [#"WORLDWIDE PRODUCE"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"(?:Invoice|Adjustment) No\.\s+(\d+)"
:total #"Total \S*\s+([0-9\.,\-]+)"
:account-number #"Customer No.\s+(.*?)\s{2}"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}
:multi (. java.util.regex.Pattern (compile (-> \formfeed str) java.util.regex.Pattern/CASE_INSENSITIVE))
:multi-match? #"(Total\s+[0-9\.]+|Total Order)"
}
;; AUTO-CHLOR
{:vendor "Auto-Chlor"
:keywords [#"AUTO-CHLOR"]
:extract {:date #"DATE : ([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"INVOICE# :\s+([0-9]+)"
:total #"TOTAL DUE :\s+\$([0-9,.]+)"
:account-number #"CUSTOMER#\s+:\s+(\d+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; Cheetah
{:vendor "Cheetah"
:keywords [#"Truck name" #"Stop number"]
:extract {:date #"Delivery date: ([0-9\-]+)"
:customer-identifier #"Shipping.*\n(.*)"
:invoice-number #"Invoice #: (\d+)"
:total #"TOTAL:.*?\$(.*)"}
:parser {:date [:clj-time "yyyy-MM-dd"]
:total [:trim-commas nil]}}
;; Classic Wines
{:vendor "Classic Wines"
:keywords [#"585-9463"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Customer Id:.*\n.*\n.*\n(.*)"
:invoice-number #"^.{23}\s+(\w+)"
:total #"\$([0-9\.,]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^\s+.*?\d{6,}.*?\$"}
;; C & L
{:vendor "C&L Produce"
:keywords [#"440 Franklin Street"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n.*\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice #.*\n.*\n.*?(\d{5,})\n"
:total #"Total\s+\$([0-9\.,]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
{:vendor "General Produce Company"
:keywords [#"generalproduce.com" #"INVOICE DATE"]
:extract {:date #"INVOICE DATE.*\n.*?([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"SHIP TO.*\n.*\n\s+(.*?)\s{2,}"
:invoice-number #"INVOICE NO.*\n.*?(\d{7,}?)\s+"
:total #"TOTAL\s+([\-0-9,]+\.[0-9]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
{:vendor "General Produce Company"
:keywords [#"generalproduce.com" #"FC ="]
:extract {:date #"^ (\d{2}-\w{3}-\d{2})"
:account-number #"STATEMENT DATE.*?ACCOUNT NO\.\n.*?\n?.*?(\d+)\n"
:invoice-number #".*?\s{2,}.*?\s{2,}(.*?)\s{2,}"
:total #"([\-0-9,]+\.[0-9]+)\s*$"}
:multi #"\n"
:multi-match? #"^ \d{2}-\w{3}-\d{2}\s+(IV|CM)"
:parser {:date [:clj-time "dd-MMM-yy"]
:total [:trim-commas-and-negate nil]}}
;;; credits don't have the same format
{:vendor "General Produce Company"
:keywords [#"1330 NORTH B"]
:extract {:date #"DATE.*\n.*\n.*?([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"CREDIT NO.*\n.*\n.*?(\d{5,}?)\s+"
:account-number #"CUST NO.*\n.*\n\s+(\d+)"
:total #"TOTAL:\s+\|\s*(.*)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
{:vendor "General Produce Company"
:keywords [#"916-552-6495"]
:extract {:date #"DATE.*\n.*\n.*?([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"CREDIT NO.*\n.*\n.*?(\d{5,}?)\s+"
:account-number #"CUST NO.*\n.*\n\s+(\d+)"
:total #"TOTAL:\s+\|\s*(.*)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; Young's Market Co new statement
{:vendor "Youngs Market"
:keywords [#"(YOUNG'S MARKET COMPANY|Young.*Statement)"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Customer Name +([\w ]+)"
:invoice-number #"\s{2,}([0-9]+)"
:total #"\$?([0-9,]+\.[0-9]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^[0-9]+.*\$?([0-9,]+\.[0-9]+).*\$?([0-9,]+\.[0-9]+)"}
;; Young's Market Co - INVOICE
{:vendor "Youngs Market"
:keywords [#"P.O.Box 743564"]
:extract {:date #"(?:INVOICE|CREDIT) DATE\n(?:.*?)(\S+)\n"
#_#_:customer-identifier #"(?:INVOICE|CREDIT) DATE\n [0-9]+\s+(.*?)\s{2,}"
:account-number #"Store Number:\s+(\d+)"
:invoice-number #"(?:INVOICE|CREDIT) DATE\n(?:.*?)\s{2,}(\d+?)\s+\S+\n"
:total #"Net Amount(?:.*\n){4}(?:.*?)([\-]?[0-9\.]+)\n"}
:parser {:date [:clj-time "dd-MMM-yy"]
:total [:trim-commas-and-negate nil]}}
;; WINE WAREHOUSE
{:vendor "Wine Warehouse"
:keywords [#"WINE WAREHOUSE" #"Bottle prices include"]
:extract {:date #"INVOICE DATE\s+([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"INV #\s+(\d+)"
:total #"PLEASE PAY THIS AMOUNT\s+([0-9,]+\.[0-9]{2})"
:account-number #"CUSTOMER NUMBER\s+(\d+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; WINE WAREHOUSE 3
{:vendor "Wine Warehouse"
:keywords [#"Wine Warehouse" #"PLEASE APPLY CREDIT"]
:extract {:date #"Credit Memo Number/Date\s+\d+\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Ship-To-Party.*\n(.*?)\s{2,}"
:invoice-number #"Credit Memo Number/Date\s+(\d+)"
:total #"Total\s+([0-9]+\.[0-9]{2}-)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; WINE WAREHOUSE CREDIT 4
{:vendor "Wine Warehouse"
:keywords [#"WINE WAREHOUSE" #"Credit Memo"]
:extract {:date #"Credit Memo Number/Date\s+\d+\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Ship-To-Party.*\n(.*?)\s{2,}"
:invoice-number #"Credit Memo Number/Date\s+(\d+)"
:total #"Total\s+([0-9]+\.[0-9]{2}-)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; WINE WAREHOUSE 2
{:vendor "Wine Warehouse"
:keywords [#"WINE WAREHOUSE" #"Bill-to-Party"]
:extract {:date #"Invoice date\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill-to-Party.*\n(.*?)\s{2,}"
:invoice-number #"Invoice number\s+(\d+)"
:total #"Gross\s+([0-9]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; THE WATER PROS
{:vendor "The Water Pros"
:keywords [#"The Water Pros, Inc"]
:extract {:date #"DATE\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO.*\n(.*?)(?=\s{2,})"
:invoice-number #"INVOICE #\s+(\w+)"
:total #" TOTAL\s+([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; REGAL
{:vendor "Regal Wine Co"
:keywords [#"REGAL WINE"]
:extract {:date #"INVOICE DATE.*\n\n(?:.*?)([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"INVOICE\n(.*?)\s{2,}"
:invoice-number #"INVOICE NUMBER.*\n\n(?:.*?)(\d+)"
:total #"Total Amount Due(?:.*?)([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
;; ALSCO
{:vendor "Alsco"
:keywords [#"Alsco"]
:extract {:date #"Invoice Date:\s+(.*)"
:customer-identifier #"Invoice F o r(?:.*?)\n\s+(.*?)\s{2,}"
:invoice-number #" (\S+)\n\s+Invoice Date"
:total #"Invoice Total\s+\$([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MMM dd yyyy"]
:total [:trim-commas nil]}}
;; SUNCREST
{:vendor "Suncrest USA Inc"
:keywords [#"Suncrest.*Invoice"]
:extract {:date #"Date.*\n\s*\n(?:.*?)([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To(?:.*?)\n\n(.*?)\s{2,}"
:invoice-number #"Invoice #.*\n\s*\n.*? (\d{3,})"
:total #"Balance Due\s+\$([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; PACIFIC SEAFOOD
{:vendor "Pacific Seafood"
:keywords [#"(pacseafood|PACIFIC FRESH)"]
:extract {:date #"DATE(?:.*\n.*(?=([0-9]+/[0-9]+/[0-9]+)))([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"DELIVER TO:(?:.*\n)(.*?)(?=\s{2})"
:invoice-number #"INVOICE NO\.\n(?:.*?(?= [0-9]+\n)) ([0-9]+)"
:total #"TOTAL\n\s+([0-9,\.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; P&R
{:vendor "P & R PAPER SUPPLY CO"
:keywords [#"PAPER SUPPLY COMPANY"]
:extract {:date #"Invoiced.*\n\s+\S+\s+(\S+)"
:customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}"
:invoice-number #"Invoice#.*\n.*\n.*?(\S+)\s+\d+\n"
:total #"INVOICE TOTAL\s+([\-]?[\d,]+\.\d+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; SUNCREST STATEMENT
{:vendor "Suncrest USA Inc"
:keywords [#"Suncrest.*\n.*Statement"]
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"To:(?:.*?)\n\s*(.*?)\s{2,}"
:invoice-number #"INV #(\d+)"
:total #"Orig\. Amount \$([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+INV "}
;; US FOODS
{:vendor "US Foods"
:keywords [#"US Foods"]
:extract {:date #"INVOICE DATE.*\n+.*?(?=([0-9]+/[0-9]+/[0-9]+))([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,}
:invoice-number #"INVOICE NUMBER[^\n]+\n\n\d+\s+(\d+)"
:total #"(?:DELIVERED AMOUNT|PLEASE REMIT|AMOUNT).*?\$([0-9.,]+( CR)?)\n"}
:parser {:date [:clj-time ["MM/dd/yyyy" "yyyy/MM/dd"]]
:total [:trim-commas-and-negate nil]}}
;; SYSCO
{:vendor "Sysco"
:keywords [#"SYSCO"]
:extract {:date #"INVOICE NUMBER[^\n]+\n([^\n]+)\n"
#_#_:customer-identifier #"INVOICE NUMBER[^\n]+\n[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,}
:account-number #"INVOICE NUMBER[^\n]+\n[^\n]+\n.*?(?=[\d]{6,7})(\d{6,7})"
:invoice-number #"INVOICE NUMBER[^\n]+\n[^\n]+\n.*?(?=[\d]{9})(\d{9})"
:total #"\s{2,}INVOICE\s{2,}.*?(?=TOTAL)TOTAL\s+([0-9.]+[\-]?)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; LE BOULANGER
{:vendor "Le Boulanger"
:keywords [#"Le Boulanger"]
:extract {:date #"Invoice Date: ([^\n]+)\n"
:customer-identifier #"Ship to\n+\s+([\S ]+?)(?=\s{2,})"
:invoice-number #"Invoice No: ([^\n]+)\n"
:total #" Total:\s+([\d\.]+)"}
:parser {:date [:clj-time "MMM dd, yyyy"]}}
;; A&B
{:vendor "A&B Produce"
:keywords [#"ABProduce"]
:extract {:date #"^\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO:[^\n]+\n[^\n]+\n[^\n]+\n(.*)\s{2,}"
:invoice-number #"(\d+)\s+(?:INV|C/M)"
:total #" (?:INV|C/M)\s+([\d\.\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"^\s+[0-9]+/[0-9]+/[0-9]+\s+\d+\s+(INV|C/M)\s+"}
;; A&B Invoice
{:vendor "A&B Produce"
:keywords [#"415-656-0254"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO.*\n.*?\s{2,}(.*?)\s{2,}"
:invoice-number #"INVOICE#\n.*?(\d+)\n"
:total #"INV TOTAL \s+([\d\.\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; American Provisions
{:vendor "American Paper & Provisions"
:keywords [#"imperialdade"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}"
:invoice-number #"INVOICE\n(?:.*?)(\s{2,}\d+)"
:total #"AMOUNT DUE:\s+([\d\.,\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; Ocean Queen statement
{:vendor "Ocean Queen"
:keywords [#"Ocean Queen USA" #"Statement"]
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"To:.*\n\s*(.*?)\s{2,}"
:invoice-number #"INV #(\d+)"
:total #" ([\d\.,\-]+)"}
:multi #"\n"
:multi-match? #"^([0-9]+/[0-9]+/[0-9]+).*INV"
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; Ocean Queen
{:vendor "Ocean Queen"
:keywords [#"Ocean Queen USA"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}"
:invoice-number #"Invoice #\n.*\n.*?(\d+)\n"
:total #"Total\s+\$([\d\.,\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; CHEF's CHOICE
{:vendor "Chef's Choice Produce Co"
:keywords [#"(2170 MARTIN AVENUE|213-3886)"]
:extract {:date #"([0-9/]{10,10})"
:customer-identifier #"\n B\s+([\S ]+?)(?=\s{2,}I) "
:invoice-number #"^0*([0-9]+)"
:total #"INVOICE\s+([\d\.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]} ;; may want to try two approaches [:clj-time ["MM/dd/yyyy" "MM1dd1yyyy"]]
:multi #"\n"
:multi-match? #"\s+INVOICE\s+"}
;; blue marine
{:vendor "Blue Marine"
:keywords [#"Blue Marine, Inc"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Total (.*?)\s{2,}"
:invoice-number #"(\d{6,})"
:total #"([\-0-9,\.]+)$"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"(Invoice|Credit Memo)\s{2,}"}
;; FRESH AND BELT
;; statempnt
{:vendor "Fresh and Best Produce"
:keywords [#"freshbestproduce" #"Statement"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To[^\n]+\n([A-Za-z ']+)"
:invoice-number #"INV #(\d+)\."
:total #"Amount\s+\$([0-9\.]+)\."}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"INV #"}
{:vendor "Fresh and Best Produce"
:keywords [#"freshbestproduce"]
:extract {:date #"\n\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To[^\n]+\n([A-Za-z ']+)"
:invoice-number #"\n\s+[0-9/]+\s+(\d+)"
:total #"Balance Due\s+\$([0-9\.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]}}
;; PFG - ROMA LOOK 1
{:vendor "Performance Food Group - ROMA"
:keywords [#"inquiries call 1-800-233-6211"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})"
:invoice-number #"^\s+([\dA-Z]+)"
:total #"([\d\.,\-]+\.[\d\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"^\s+[\d]{6,8}\s+\d+"}
;; ACME BREAD
{:vendor "Acme Bread"
:keywords [#"acmebread\.com"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Print Date.*\n.*\n(.*)"
:invoice-number #"^\s*(\d+)"
:total #"\s{2,}(\d+\.\d{2})\s{2,}"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"^\s*\d+\s+([0-9]+/[0-9]+/[0-9]+)"}
;; PFG - ROMA
{:vendor "Performance Food Group - ROMA"
:keywords [#"Performance Food Group, Inc\n\f"]
:extract {:date #"Date: ([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})"
:invoice-number #"INVOICE NO.\s+ ([\d]+)"
:total #"([\d\.,]+)\s+INVOICE TOTAL"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
;; JFC
{:vendor "JFC International"
:keywords [#"48490 MILMONT DRIVE"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"SOLD\s+([\S ]+?)(?=(\s{2,}|\n))"
:invoice-number #"(\S+)\s+(?=[0-9]+/[0-9]+/[0-9]+)"
:total #"(?:INVOICE|TOTAL|CREDIT)\s+([\d\.,\-]+\.[\d\-]+( CR)?)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; ROMA BAKERY
{:vendor "Roma Bakery Inc."
:keywords [#"Roma Bakery Inc"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice (\d+)"
:total #"Total\s+([\d\-\.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; KAEL FOODS
{:vendor "Kael Foods"
:keywords [#"kaelfoods.com"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}"
:invoice-number #"INVOICE 0*(\d+)"
:total #"TOTAL:\s+\$([\d\-\.,]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; Starter Bakery
{:vendor "Starter Bakery"
:keywords [#"starterbakery.com"]
:extract {:date #"INVOICE DATE:\s+(.*?)\s{2,}"
:customer-identifier #"BILL TO:.*\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice.*?(\d+)"
:total #"Total:.*?([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MMMM dd, yyyy"]
:total [:trim-commas-and-negate nil]}}
;; Trimark
{:vendor "TriMark R.W. Smith"
:keywords [#"TriMark"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To\s+(.*?)\s{2,}"
:invoice-number #"Invoice #\n.*?([\d\-]+)\n"
:total #"Invoice Total\s+([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}])
(def excel-templates
[{:vendor "Mama Lu's Foods"
:keywords [#"Mama Lu's Foods"]
:extract (fn [sheet vendor]
(transduce (comp
(drop 5)
(filter
(fn [r]
(and
(seq r)
(->> r second not-empty))))
(map
(fn [r]
(let [[_ customer-order-number num date name amount] r]
{:customer-identifier (second (re-find #"([^:]*):" name))
:text name
:full-text name
:date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date))
:invoice-number (str customer-order-number "-" (Integer/parseInt num))
:total (str amount)
:vendor-code vendor}))))
conj
[]
sheet))}])