Files
integreat/src/clj/auto_ap/parse/templates.clj
2025-12-04 11:18:36 -08:00

827 lines
35 KiB
Clojure

(ns auto-ap.parse.templates
(:require [auto-ap.parse.util :as u]
[auto-ap.logging :as alog]
[clj-time.core :as time]
[clojure.string :as str]
[auto-ap.time :as atime]))
(def pdf-templates
[;; CHEF's WAREHOUSE
{:vendor "CHFW"
:keywords [#"CHEF'S WAREHOUSE"]
:extract {:total #"2 WKS C\.C\.\s+([\d.,]+)"
:customer-identifier #"\n([A-Z][A-Z ]+)\s{2,}"
:date #"\s+([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"\s+[0-9]+/[0-9]+/[0-9]+\s+([0-9]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]}}
{:vendor "Gstar Seafood"
:keywords [#"G Star Seafood"]
:extract {:total #"Total\s{2,}([\d\-,]+\.\d{2,2}+)"
:customer-identifier #"(.*?)(?:\s+)Invoice #"
:date #"Invoice Date\s{2,}([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"Invoice #\s+(\d+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; GGM
{:vendor "Golden Gate Meat Company, Inc"
:keywords [#"Golden Gate Meat"]
:extract {:total #"Invoice Total\:\s+\$([\d.,]+)"
:customer-identifier #"Bill To\s*:\s*([\w ]+)\s{2,}"
:date #"Printed:\s+([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"Invoice\s+[^\n]+\n[^\n]+\n\s+([0-9]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; CINTAS
{:vendor "CINTAS"
:keywords [#"CINTAS CORPORATION"]
:extract {:invoice-number #"INVOICE\s#\s+([\d.,]+)"
:customer-identifier #"BILL TO\s*:\s{2,}([\w ]+)\s{2,}"
:date #"INVOICE DATE\s*\n.*\s+([0-9]+/[0-9]+/[0-9]+)"
:total #"INVOICE TOTAL\s+([0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]}
:multi #"\f\f"}
;; IMPACT PAPER
{:vendor "Impact Paper & Ink LTD"
:keywords [#"650-692-5598"]
:extract {:total #"Total Amount\s+\$([\d\.\,\-]+)"
:account-number #"CUST. #\n.*?/\d{4,}\s+(.*?)\n"
:date #"([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"[0-9]+/[0-9]+/[0-9]+\s+(\d+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; CARBONIC
{:vendor "Carbonic Service Inc"
:keywords [#"CARBONIC SERVICE INC"]
:extract {:invoice-number #"Invoice #\s*\n\s*[\w\.]+\s+[\w\./]+(.*)\s*\n"
:date #"Invoice #\s*\n\s*[\w\.]+\s+([\w\./]+)"
:total #"Total\s+\$([0-9.,]+)"
:account-number #"Account #\s+(\d+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
;; DVW
{:vendor "DVW Commercial"
:keywords [#"DVW Commercial"]
:extract {:date #"\s*([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Ship To:.*?\n.*?\s{2,}(.*)"
:invoice-number #"Invoice\s*\n\s*[S]?([\w\./]+)*"
:total #"Total:\s+\$\s*([0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]}}
;; DAYLIGHT FOOD STATEMENT
{:vendor "Daylight Foods"
:keywords [#"DAYLIGHT FOODS" #"Customer Statement"]
:extract {:date #"^.*?([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Phone:.*\n+\s+(.*)"
:invoice-number #"\s+(\w+)"
:total #"([\-]?[0-9]+\.[0-9]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"^\s*[A-Z]\d+\s+([0-9]+/[0-9]+/[0-9]+)"}
;; DAYLIGHT FOOD
{:vendor "Daylight Foods"
:keywords [#"DAYLIGHT FOODS"]
:extract {:date #"\n\s*Date[^\n]+\n\s*([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To:.*\n\s*(.*?)\s{2,}"
:account-number #"Customer\s+(.*?)\s+/"
:invoice-number #"Invoice\s([\w\./]+)*"
:total #"Total Invoice\s+([\-]?[0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
{:vendor "Ben E. Keith"
:keywords [#"BEN E. KEITH"]
:extract {:date #"Customer No Mo Day Yr.*?\n.*?\d{5,}\s{2,}(\d+\s+\d+\s+\d+)"
:customer-identifier #"Customer No Mo Day Yr.*?\n.*?(\d{5,})"
:invoice-number #"Invoice No.*?\n.*?(\d{8,})"
:total #"Total Invoice.*?\n.*?([\-]?[0-9]+\.[0-9]{2,})"}
:parser {:date [:month-day-year nil]
:total [:trim-commas-and-negate nil]}}
;; SOUTHBAY FRESH
{:vendor "Southbay Fresh Produce"
:keywords [#"(SOUTH BAY FRESH PRODUCE|SOUTH BAY PRODUCE)"]
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"To:[^\n]*\n\s+([A-Za-z' ]+)\s{2}"
:invoice-number #"INV #\/(\d+)"
:total #"\$([0-9.]+)\."}
:parser {:date [:clj-time "MM/dd/yyyy"]}
:multi #"\n"
:multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+INV "}
;; DON VITO
{:vendor "Don Vito Ozuna Food Corp"
:keywords [#"408-465-2010"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*?\n(.*?)\s{2,}"
:invoice-number #"(?:[0-9]+/[0-9]+/[0-9]+)\s{2,}(\d+)"
:total #"Please remit payment to\s{2,}\$([\-0-9.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]}}
;; DON VITO STATEMENT
{:vendor "Don Vito Ozuna Food Corp"
:keywords [#"Don Vito Ozuna Food Corp.*?\n.*?Statement"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"To:.*?\n\s*(.*)?\s{2,}"
:invoice-number #"INV #(\d+)"
:total #"Amount \$([\d\-\.]+?)\.\s{2,}"}
:parser {:date [:clj-time "MM/dd/yyyy"]}
:multi #"\n"
:multi-match? #"\d+/\d+/\d+.*?INV"}
;; PFG - LEDYARD
{:vendor "Performance Food Group - LEDYARD"
:keywords [#"performancefoodservice"]
:extract {:date #"DELIVER TO[^\n]+\n.+?(?=[0-9]+/[0-9]+/[0-9]+)([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"DELIVER TO[^\n]+\n\s*[\S ]+?(?=\s{2,}([\S ]+?)\s{2,})" ;; ([\S ]+)\s{2,}
:invoice-number #"DELIVER TO[^\n]+\n.+?(?=\d+)(\d+)\s*\n"
:total #"([0-9.\-]+)\s+Status Code"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; SOUTHERN GLAZER'S
{:vendor "Southern Glazers"
:keywords [#"Southern Glazer's"]
:extract {:date #"INVOICE DATE(?s:.*)(?= (?:[0-9]+/[0-9]+/[0-9]+)\s+([0-9]+/[0-9]+/[0-9]+)) "
:invoice-number #"(?s)INVOICE\n(?:.*?)(?=\d{4,})(\d+)"
:total #"PAY THIS AMOUNT(?s:.*)(?= ([0-9,]+\.[0-9]{2}))"
:account-number #"ACCOUNT #.*\n.*?[\n]?\s+(\d+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}
:multi #"\f"
:multi-match? #"PAY THIS AMOUNT"}
;; GOLDEN BRANDS
{:vendor "Golden Brands San Jose"
:keywords [#"GOLDEN BRANDS"]
:extract {:date #"(?:.*)((?:Mon|Tue|Wed|Thu|Thurs|Fri|Sat|Sun) .*)" ;; HOW TO GO TO SPCIFIC LINE
:account-number #"Account:\s*(.*?)(\s{2,}|\n)"
:invoice-number #"Invoice#: (\d+)"
:total #"Invoice Total\s+([\-0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "EEE MMM dd, yyyy HH:mm aa"]
:total [:trim-commas nil]}}
;; GOLDEN BRANDS
{:vendor "Bigoli Fresh Pasta"
:keywords [#"bigolifreshpasta.com"]
:extract {:date #"Invoice Date.*?([0-9]+/[0-9]+/[0-9]+)" ;; HOW TO GO TO SPCIFIC LINE
:customer-identifier #"BILL TO.*\n\s*(.*?)\s{2,}"
:invoice-number #"Invoice #.*?(\d+)"
:total #" BALANCE DUE\s+(?:\$)?([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; Del Monte Meats
{:vendor "Del Monte Meat Co"
:keywords [#"Del Monte"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To:(?:.*?(?=\s{2,}))(.*)-"
:invoice-number #"Invoice\s+([0-9]+)"
:total #"([0-9\.,]+)$"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^\s{1,2}[0-9]+"}
{:vendor "Allen Brothers West"
:keywords [#"ALLEN BROTHERS"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To:(?:.*?(?=\s{2,}))(.*)-"
:invoice-number #"Invoice\s+([0-9]+)"
:total #"([0-9\.,]+)$"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^\s{1,2}[0-9]+"}
;; Royal Hawaiian
{:vendor "Royal Hawaiian"
:keywords [#"ROYAL HAWAIIAN"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO.*\n\s+(.*?(?=(\s{2,}|\()))"
:invoice-number #"Invoice#:\s+([0-9]+)"
:total #"INV TOTAL:\s+([0-9\.,]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
;; WORLDWIDE PRODUCE
{:vendor "Worldwide Produce"
:keywords [#"WORLDWIDE\s+PRODUCE"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"(?:Invoice|Adjustment) No\.\s+(\d+)"
:total #"Total \S*\s+([0-9\.,\-]+)"
:account-number #"Customer No.\s+(.*?)\s{2}"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}
:multi (. java.util.regex.Pattern (compile (-> \formfeed str) java.util.regex.Pattern/CASE_INSENSITIVE))
:multi-match? #"(Total\s+[0-9\.]+|Total Order)"}
;; AUTO-CHLOR
{:vendor "Auto-Chlor"
:keywords [#"AUTO-CHLOR"]
:extract {:date #"DATE : ([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"INVOICE# :\s+([0-9]+)"
:total #"TOTAL DUE :\s+\$([0-9,.]+)"
:account-number #"CUSTOMER#\s+:\s+(\d+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; Cheetah
{:vendor "Cheetah"
:keywords [#"Delivery date: [\d\-]+\s{2,}"]
:extract {:date #"Delivery date: ([0-9\-]+)"
:customer-identifier #"Shipping.*\n(.*)"
:invoice-number #"Invoice #: (\d+)"
:total #"TOTAL:.*?\$(.*)"}
:parser {:date [:clj-time "yyyy-MM-dd"]
:total [:trim-commas nil]}}
;; Classic Wines
{:vendor "Classic Wines"
:keywords [#"585-9463"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Customer Id:.*\n.*\n.*\n(.*)"
:invoice-number #"^.{23}\s+(\w+)"
:total #"\$([0-9\.,]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^\s+.*?\d{6,}.*?\$"}
;; C & L
{:vendor "C&L Produce"
:keywords [#"440 Franklin Street"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n.*\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice #.*\n.*\n.*?(\d{5,})\n"
:total #"Total\s+\$([0-9\.,]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
{:vendor "General Produce Company"
:keywords [#"generalproduce.com" #"INVOICE DATE"]
:extract {:date #"INVOICE DATE.*\n.*?([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"SHIP TO.*\n.*\n\s+(.*?)\s{2,}"
:invoice-number #"INVOICE NO.*\n.*?(\d{7,}?)\s+"
:total #"TOTAL\s+([\-0-9,]+\.[0-9]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
{:vendor "General Produce Company"
:keywords [#"generalproduce.com" #"FC ="]
:extract {:date #"^ (\d{2}-\w{3}-\d{2})"
:account-number #"STATEMENT DATE.*?ACCOUNT NO\.\n.*?\n?.*?(\d+)\n"
:invoice-number #".*?\s{2,}.*?\s{2,}(.*?)\s{2,}"
:total #"([\-0-9,]+\.[0-9]+)\s*$"}
:multi #"\n"
:multi-match? #"^ \d{2}-\w{3}-\d{2}\s+(IV|CM)"
:parser {:date [:clj-time "dd-MMM-yy"]
:total [:trim-commas-and-negate nil]}}
;;; credits don't have the same format
{:vendor "General Produce Company"
:keywords [#"1330 NORTH B"]
:extract {:date #"DATE.*\n.*\n.*?([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"CREDIT NO.*\n.*\n.*?(\d{5,}?)\s+"
:account-number #"CUST NO.*\n.*\n\s+(\d+)"
:total #"TOTAL:\s+\|\s*(.*)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
{:vendor "General Produce Company"
:keywords [#"916-552-6495"]
:extract {:date #"DATE.*\n.*\n.*?([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"CREDIT NO.*\n.*\n.*?(\d{5,}?)\s+"
:account-number #"CUST NO.*\n.*\n\s+(\d+)"
:total #"TOTAL:\s+\|\s*(.*)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; Young's Market Co new statement
{:vendor "RNDC"
:keywords [#"(YOUNG'S MARKET COMPANY|Young.*Statement)"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Customer Name +([\w ]+)"
:invoice-number #"\s{2,}([0-9]+)"
:total #"\$?([0-9,]+\.[0-9]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^[0-9]+.*\$?([0-9,]+\.[0-9]+).*\$?([0-9,]+\.[0-9]+)"}
;; Young's Market Co - INVOICE
{:vendor "RNDC"
:keywords [#"P.O.Box 743564"]
:extract {:date #"(?:INVOICE|CREDIT) DATE\n(?:.*?)(\S+)\n"
#_#_:customer-identifier #"(?:INVOICE|CREDIT) DATE\n [0-9]+\s+(.*?)\s{2,}"
:account-number #"Store Number:\s+(\d+)"
:invoice-number #"(?:INVOICE|CREDIT) DATE\n(?:.*?)\s{2,}(\d+?)\s+\S+\n"
:total #"Net Amount(?:.*\n){4}(?:.*?)([\-]?[0-9\.]+)\n"}
:parser {:date [:clj-time ["MM/dd/yy"
"dd-MMM-yy"]]
:total [:trim-commas-and-negate nil]}}
;; WINE WAREHOUSE
{:vendor "Wine Warehouse"
:keywords [#"WINE WAREHOUSE" #"Bottle prices include"]
:extract {:date #"Invoice Date:\s+([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"Invoice #: \s+(\d+)"
:total #"Total\s+([\-0-9,]+\.[0-9]{2})"
:account-number #"Customer #:\s+(\d+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; WINE WAREHOUSE 3
{:vendor "Wine Warehouse"
:keywords [#"Wine Warehouse" #"PLEASE APPLY CREDIT"]
:extract {:date #"Credit Memo Number/Date\s+\d+\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Ship-To-Party.*\n(.*?)\s{2,}"
:invoice-number #"Credit Memo Number/Date\s+(\d+)"
:total #"Total\s+([0-9]+\.[0-9]{2}-)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; WINE WAREHOUSE CREDIT 4
{:vendor "Wine Warehouse"
:keywords [#"WINE WAREHOUSE" #"Credit Memo"]
:extract {:date #"Credit Memo Number/Date\s+\d+\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Ship-To-Party.*\n(.*?)\s{2,}"
:invoice-number #"Credit Memo Number/Date\s+(\d+)"
:total #"Total\s+([0-9]+\.[0-9]{2}-)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; WINE WAREHOUSE 2
{:vendor "Wine Warehouse"
:keywords [#"WINE WAREHOUSE" #"Bill-to-Party"]
:extract {:date #"Invoice date\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill-to-Party.*\n(.*?)\s{2,}"
:invoice-number #"Invoice number\s+(\d+)"
:total #"Gross\s+([0-9]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; Breakthru Bev
{:vendor "Wine Warehouse"
:keywords [#"BREAKTHRU BEVERAGE"]
:extract {:date #"Invoice Date:\s+([0-9]+/[0-9]+/[0-9]+)"
:invoice-number #"Invoice #: \s+(\d+)"
:total #"Total\s+([\-0-9,]+\.[0-9]{2})"
:account-number #"Customer #:\s+(\d+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; THE WATER PROS
{:vendor "The Water Pros"
:keywords [#"The Water Pros, Inc"]
:extract {:date #"DATE\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO.*\n(.*?)(?=\s{2,})"
:invoice-number #"INVOICE #\s+(\w+)"
:total #" TOTAL\s+([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; REGAL
{:vendor "Regal Wine Co"
:keywords [#"REGAL WINE"]
:extract {:date #"INVOICE DATE.*\n\n(?:.*?)([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"INVOICE\n(.*?)\s{2,}"
:invoice-number #"INVOICE NUMBER.*\n\n(?:.*?)(\d+)"
:total #"Total Amount Due(?:.*?)([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
;; ALSCO
{:vendor "Alsco"
:keywords [#"Alsco"]
:extract {:date #"Invoice Date:\s+(.*)"
:customer-identifier #"Invoice F o r(?:.*?)\n\s+(.*?)\s{2,}"
:invoice-number #" (\S+)\n\s+Invoice Date"
:total #"Invoice Total\s+\$([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MMM dd yyyy"]
:total [:trim-commas nil]}}
;; SUNCREST
{:vendor "Suncrest USA Inc"
:keywords [#"Suncrest.*Invoice"]
:extract {:date #"Date.*\n\s*\n(?:.*?)([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To(?:.*?)\n\n(.*?)\s{2,}"
:invoice-number #"Invoice #.*\n\s*\n.*? (\d{3,})"
:total #"Balance Due\s+\$([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; PACIFIC SEAFOOD
{:vendor "Pacific Seafood"
:keywords [#"(pacseafood|PACIFIC FRESH)"]
:extract {:date #"DATE(?:.*\n.*(?=([0-9]+/[0-9]+/[0-9]+)))([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"DELIVER TO:(?:.*\n)(.*?)(?=\s{2})"
:invoice-number #"INVOICE NO\.\n(?:.*?(?= [0-9]+\n)) ([0-9]+)"
:total #"TOTAL\n\s+([0-9,\.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}}
;; P&R
{:vendor "P & R PAPER SUPPLY CO"
:keywords [#"PAPER SUPPLY COMPANY"]
:extract {:date #"Invoiced.*\n\s+\S+\s+(\S+)"
:customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}"
:invoice-number #"Invoice#.*\n.*\n.*?(\S+)\s+\d+\n"
:total #"INVOICE TOTAL\s+([\-]?[\d,]+\.\d+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; SUNCREST STATEMENT
{:vendor "Suncrest USA Inc"
:keywords [#"Suncrest.*\n.*Statement"]
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"To:(?:.*?)\n\s*(.*?)\s{2,}"
:invoice-number #"INV #(\d+)"
:total #"Orig\. Amount \$([0-9,]+\.[0-9]{2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas nil]}
:multi #"\n"
:multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+INV "}
;; US FOODS
{:vendor "US Foods"
:keywords [#"US Foods"]
:extract {:date #"INVOICE DATE.*\n+.*?(?=([0-9]+/[0-9]+/[0-9]+))([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,}
:invoice-number #"INVOICE NUMBER[^\n]+\n\n\d+\s+(\d+)"
:total #"(?:DELIVERED AMOUNT|PLEASE REMIT|AMOUNT).*?\$([0-9.,]+( CR)?)\n"}
:parser {:date [:clj-time ["MM/dd/yyyy" "yyyy/MM/dd"]]
:total [:trim-commas-and-negate nil]}}
;; SYSCO
{:vendor "Sysco"
:keywords [#"SYSCO"]
:extract {:date #"INVOICE NUMBER[^\n]+\n([^\n]+)\n"
#_#_:customer-identifier #"INVOICE NUMBER[^\n]+\n[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,}
:account-number #"INVOICE NUMBER[^\n]+\n[^\n]+\n.*?(?=[\d]{6,7})(\d{6,7})"
:invoice-number #"INVOICE NUMBER[^\n]+\n[^\n]+\n.*?(?=[\d]{9})(\d{9})"
:total #"\s{2,}INVOICE\s{2,}.*?(?=TOTAL)TOTAL\s+([0-9.]+[\-]?)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; LE BOULANGER
{:vendor "Le Boulanger"
:keywords [#"Le Boulanger"]
:extract {:date #"Invoice Date: ([^\n]+)\n"
:customer-identifier #"Ship to\n+\s+([\S ]+?)(?=\s{2,})"
:invoice-number #"Invoice No: ([^\n]+)\n"
:total #" Total:\s+([\d\.]+)"}
:parser {:date [:clj-time "MMM dd, yyyy"]}}
;; BiRite -- parent company for a&b produce
{:vendor "BiRite"
:keywords [#"BIRITE"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO.*\n.*?\s{2,}(.*?)\s{2,}"
:invoice-number #"INVOICE#\n.*?(\d+)\n"
:total #"INV TOTAL \s+([\d\.\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; A&B
{:vendor "A&B Produce"
:keywords [#"ABProduce"]
:extract {:date #"^\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO:[^\n]+\n[^\n]+\n[^\n]+\n(.*)\s{2,}"
:invoice-number #"(\d+)\s+(?:INV|C/M)"
:total #" (?:INV|C/M)\s+([\d\.\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"^\s+[0-9]+/[0-9]+/[0-9]+\s+\d+\s+(INV|C/M)\s+"}
;; A&B Invoice
{:vendor "A&B Produce"
:keywords [#"415-656-0254"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO.*\n.*?\s{2,}(.*?)\s{2,}"
:invoice-number #"INVOICE#\n.*?(\d+)\n"
:total #"INV TOTAL \s+([\d\.\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; American Provisions
{:vendor "American Paper & Provisions"
:keywords [#"imperialdade"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}"
:invoice-number #"INVOICE\n(?:.*?)(\s{2,}\d+)"
:total #"AMOUNT DUE:\s+([\d\.,\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; Ocean Queen statement
{:vendor "Ocean Queen"
:keywords [#"Ocean Queen USA" #"Statement"]
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"To:.*\n\s*(.*?)\s{2,}"
:invoice-number #"INV #(\d+)"
:total #" ([\d\.,\-]+)"}
:multi #"\n"
:multi-match? #"^([0-9]+/[0-9]+/[0-9]+).*INV"
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; Ocean Queen
{:vendor "Ocean Queen"
:keywords [#"Ocean Queen USA"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}"
:invoice-number #"Invoice #\n.*\n.*?(\d+)\n"
:total #"Total\s+\$([\d\.,\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; CHEF's CHOICE
{:vendor "Chef's Choice Produce Co"
:keywords [#"(2170 MARTIN AVENUE|213-3886)"]
:extract {:date #"([0-9/]{10,10})"
:customer-identifier #"\n B\s+([\S ]+?)(?=\s{2,}I) "
:invoice-number #"^0*([0-9]+)"
:total #"INVOICE\s+([\d\.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]} ;; may want to try two approaches [:clj-time ["MM/dd/yyyy" "MM1dd1yyyy"]]
:multi #"\n"
:multi-match? #"\s+INVOICE\s+"}
;; blue marine
{:vendor "Blue Marine"
:keywords [#"Blue Marine, Inc"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Total (.*?)\s{2,}"
:invoice-number #"(\d{6,})"
:total #"([\-0-9,\.]+)$"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"(Invoice|Credit Memo)\s{2,}"}
;; FRESH AND BELT
;; statempnt
{:vendor "Fresh and Best Produce"
:keywords [#"freshbestproduce" #"Statement"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To[^\n]+\n([A-Za-z ']+)"
:invoice-number #"INV #(\d+)\."
:total #"Amount\s+\$([0-9\.]+)\."}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"INV #"}
{:vendor "Fresh and Best Produce"
:keywords [#"freshbestproduce"]
:extract {:date #"\n\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To[^\n]+\n([A-Za-z ']+)"
:invoice-number #"\n\s+[0-9/]+\s+(\d+)"
:total #"Balance Due\s+\$([0-9\.]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]}}
;; PFG - ROMA LOOK 1
{:vendor "Performance Food Group - ROMA"
:keywords [#"inquiries call 1-800-233-6211"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})"
:invoice-number #"^\s+([\dA-Z]+)"
:total #"([\d\.,\-]+\.[\d\-]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"^\s+[\d]{6,8}\s+\d+"}
;; ACME BREAD
{:vendor "Acme Bread"
:keywords [#"acmebread\.com"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Print Date.*\n.*\n(.*)"
:invoice-number #"^\s*(\d+)"
:total #"\s{2,}(\d+\.\d{2})\s{2,}"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"^\s*\d+\s+([0-9]+/[0-9]+/[0-9]+)"}
;; PFG - ROMA
{:vendor "Performance Food Group - ROMA"
:keywords [#"Performance Food Group, Inc\n\f"]
:extract {:date #"Date: ([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})"
:invoice-number #"INVOICE NO.\s+ ([\d]+)"
:total #"([\d\.,]+)\s+INVOICE TOTAL"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}
;; JFC
{:vendor "JFC International"
:keywords [#"48490 MILMONT DRIVE"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:account-number #"CUSTOMER #.*?\n\n\s+(.*?)\s{2,}"
:customer-identifier #"SOLD\s+(?:TO\s+)?([\S ]+?)(?=(\s{2,}|\n))"
:invoice-number #"(\S+)\s+(?:[0-9]+/[0-9]+/[0-9]+)"
:total #"(?:INVOICE|TOTAL|CREDIT)\s+([\d\.,\-]+\.[\d\-]+( CR)?)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
{:vendor "Primizie Foods"
:keywords [#"primiziefoods.com"]
:extract {:date #"Invoice Date\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"BILL TO.*?\n(.*?)\s{2,}"
:invoice-number #"Invoice #\s+(.*?)\n"
:total #"BALANCE DUE\s+\$([\d\.,\-]+\.[\d\-])"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; ROMA BAKERY
{:vendor "Roma Bakery Inc."
:keywords [#"Roma Bakery Inc"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice (\d+)"
:total #"Total\s+([\d\-\.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; KAEL FOODS
{:vendor "Kael Foods"
:keywords [#"kaelfoods.com"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}"
:invoice-number #"INVOICE 0*(\d+)"
:total #"TOTAL:\s+\$([\d\-\.,]+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
;; Starter Bakery
{:vendor "Starter Bakery"
:keywords [#"starterbakery.com"]
:extract {:date #"INVOICE DATE:\s+(.*?)\s{2,}"
:customer-identifier #"BILL TO:.*\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice.*?(\d+)"
:total #"Total:.*?([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MMMM dd, yyyy"]
:total [:trim-commas-and-negate nil]}}
;; Trimark
{:vendor "TriMark R.W. Smith"
:keywords [#"TriMark"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To\s+(.*?)\s{2,}"
:invoice-number #"Invoice #\n.*?([\d\-]+)\n"
:total #"Invoice Total\s+([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
;; TODO DISABLING TO FOCUS ON STATEMENT
#_{:vendor "Reel Produce"
:keywords [#"reelproduce.com"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Bill To(?:.*?)\n\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice #\n.*?\n.*?([\d\-]+)\n"
:total #"Total\s*\n\s+\$([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas-and-negate nil]}}
{:vendor "Eddie's Produce"
:keywords [#"Eddie's Produce"]
:extract {:date #"Invoice\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Amount\s*\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice\s+\S+\s+(\d+)"
:total #"([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"^\s*Invoice\s{2,}"}
;; DAY LEE
{:vendor "Day Lee Foods"
:keywords [#"DAY-LEE FOODS"]
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"9720\n\n(.*?)\n"
:invoice-number #"^\s*(\d+)\s"
:total #"\$([\d\-,]+)$"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"RI"}
;; NA sales
{:vendor "N.A. Sales Company, Inc"
:keywords [#"2695 McCone Avenue"]
:extract {:date #"Invoice Date:\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Sold To:.*?\n\s+(.*?)\s{2,}"
:invoice-number #"Invoice Number:\s+(.*?)\n"
:total #"Invoice Total:\s+([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
{:vendor "Mani Imports"
:keywords [#"Mani Imports"]
:extract {:date #"Order Date\s+([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"Ship To:\s*(.*?)\n"
:invoice-number #"Invoice Number:\s+(.*?)\n"
:total #"Invoice Total:\s+([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}}
{:vendor "Reel Produce"
:keywords [#"reelproduce.com" #"Statem"]
:extract {:date #"\s*([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"ELECTRONICALLY.*\n\s*(.*?)\s{2,}"
:invoice-number #"#(\d+)"
:total #"([\d\-,]+\.\d{2,2})"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"INV #"}
{:vendor "Paulino's Bakery"
:keywords [#"paulinosbakery"]
:extract {:date #"\s*([0-9]+/[0-9]+/[0-9]+)"
:customer-identifier #"831-333-1010\s*\n\s+(.*?)\s{2,}"
:invoice-number #"INV #(\d+)"
:total #"INV #(?:.*?)\s{2,}([\d\-,]+\.\d{2,2}+)"}
:parser {:date [:clj-time "MM/dd/yyyy"]
:total [:trim-commas-and-negate nil]}
:multi #"\n"
:multi-match? #"INV #"}])
(def excel-templates
[{:vendor "Mama Lu's Foods"
:keywords [#"Mama Lu's Foods"]
:extract (fn [sheet vendor]
(transduce (comp
(drop 5)
(filter
(fn [r]
(and
(seq r)
(->> r second not-empty))))
(map
(fn [r]
(let [[_ customer-order-number num date name amount] r]
{:customer-identifier (second (re-find #"([^:]*):" name))
:text name
:full-text name
:date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date))
:invoice-number (str customer-order-number "-" (Integer/parseInt num))
:total (str amount)
:vendor-code vendor}))))
conj
[]
sheet))}
{:vendor "Daylight Foods"
:keywords [#"CUSTNO"]
:extract (fn [sheet vendor]
(alog/peek ::daylight-invoices
(transduce (comp
(drop 1)
(filter
(fn [r]
(and
(seq r)
(->> r first not-empty))))
(map
(fn [[customer-number _ _ _ invoice-number date amount :as row]]
(println "DAT E is" date)
{:customer-identifier customer-number
:text (str/join " " row)
:full-text (str/join " " row)
:date (try (or (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date))
(try
(atime/as-local-time
(time/plus (time/date-time 1900 1 1)
(time/days (dec (dec (Integer/parseInt "45663"))))))
(catch Exception e
nil)
))
(catch Exception e
(try
(atime/as-local-time
(time/plus (time/date-time 1900 1 1)
(time/days (dec (dec (Integer/parseInt "45663"))))))
(catch Exception e
nil)
)
))
:invoice-number invoice-number
:total (str amount)
:vendor-code vendor})))
conj
[]
sheet)))}])