(ns auto-ap.parse.templates (:require [auto-ap.parse.util :as u] [auto-ap.logging :as alog] [clj-time.core :as time] [clojure.string :as str] [auto-ap.time :as atime])) (def pdf-templates [;; CHEF's WAREHOUSE {:vendor "CHFW" :keywords [#"CHEF'S WAREHOUSE"] :extract {:total #"2 WKS C\.C\.\s+([\d.,]+)" :customer-identifier #"\n([A-Z][A-Z ]+)\s{2,}" :date #"\s+([0-9]+/[0-9]+/[0-9]+)" :invoice-number #"\s+[0-9]+/[0-9]+/[0-9]+\s+([0-9]+)"} :parser {:date [:clj-time "MM/dd/yyyy"]}} {:vendor "Gstar Seafood" :keywords [#"G Star Seafood"] :extract {:total #"Total\s{2,}([\d\-,]+\.\d{2,2}+)" :customer-identifier #"(.*?)(?:\s+)Invoice #" :date #"Invoice Date\s{2,}([0-9]+/[0-9]+/[0-9]+)" :invoice-number #"Invoice #\s+(\d+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; GGM {:vendor "Golden Gate Meat Company, Inc" :keywords [#"Golden Gate Meat"] :extract {:total #"Invoice Total\:\s+\$([\d.,]+)" :customer-identifier #"Bill To\s*:\s*([\w ]+)\s{2,}" :date #"Printed:\s+([0-9]+/[0-9]+/[0-9]+)" :invoice-number #"Invoice\s+[^\n]+\n[^\n]+\n\s+([0-9]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; CINTAS {:vendor "CINTAS" :keywords [#"CINTAS CORPORATION"] :extract {:invoice-number #"INVOICE\s#\s+([\d.,]+)" :customer-identifier #"BILL TO\s*:\s{2,}([\w ]+)\s{2,}" :date #"INVOICE DATE\s*\n.*\s+([0-9]+/[0-9]+/[0-9]+)" :total #"INVOICE TOTAL\s+([0-9.]+)"} :parser {:date [:clj-time "MM/dd/yy"]} :multi #"\f\f"} ;; IMPACT PAPER {:vendor "Impact Paper & Ink LTD" :keywords [#"650-692-5598"] :extract {:total #"Total Amount\s+\$([\d\.\,\-]+)" :account-number #"CUST. #\n.*?/\d{4,}\s+(.*?)\n" :date #"([0-9]+/[0-9]+/[0-9]+)" :invoice-number #"[0-9]+/[0-9]+/[0-9]+\s+(\d+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; CARBONIC {:vendor "Carbonic Service Inc" :keywords [#"CARBONIC SERVICE INC"] :extract {:invoice-number #"Invoice #\s*\n\s*[\w\.]+\s+[\w\./]+(.*)\s*\n" :date #"Invoice #\s*\n\s*[\w\.]+\s+([\w\./]+)" :total #"Total\s+\$([0-9.,]+)" :account-number #"Account #\s+(\d+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]}} ;; DVW {:vendor "DVW Commercial" :keywords [#"DVW Commercial"] :extract {:date #"\s*([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Ship To:.*?\n.*?\s{2,}(.*)" :invoice-number #"Invoice\s*\n\s*[S]?([\w\./]+)*" :total #"Total:\s+\$\s*([0-9.]+)"} :parser {:date [:clj-time "MM/dd/yy"]}} ;; DAYLIGHT FOOD STATEMENT {:vendor "Daylight Foods" :keywords [#"DAYLIGHT FOODS" #"Customer Statement"] :extract {:date #"^.*?([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Phone:.*\n+\s+(.*)" :invoice-number #"\s+(\w+)" :total #"([\-]?[0-9]+\.[0-9]+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]} :multi #"\n" :multi-match? #"^\s*[A-Z]\d+\s+([0-9]+/[0-9]+/[0-9]+)"} ;; DAYLIGHT FOOD {:vendor "Daylight Foods" :keywords [#"DAYLIGHT FOODS"] :extract {:date #"\n\s*Date[^\n]+\n\s*([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To:.*\n\s*(.*?)\s{2,}" :account-number #"Customer\s+(.*?)\s+/" :invoice-number #"Invoice\s([\w\./]+)*" :total #"Total Invoice\s+([\-]?[0-9.]+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} {:vendor "Ben E. Keith" :keywords [#"BEN E. KEITH"] :extract {:date #"Customer No Mo Day Yr.*?\n.*?\d{5,}\s{2,}(\d+\s+\d+\s+\d+)" :customer-identifier #"Customer No Mo Day Yr.*?\n.*?(\d{5,})" :invoice-number #"Invoice No.*?\n.*?(\d{8,})" :total #"Total Invoice.*?\n.*?([\-]?[0-9]+\.[0-9]{2,})"} :parser {:date [:month-day-year nil] :total [:trim-commas-and-negate nil]}} ;; SOUTHBAY FRESH {:vendor "Southbay Fresh Produce" :keywords [#"(SOUTH BAY FRESH PRODUCE|SOUTH BAY PRODUCE)"] :extract {:date #"^([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"To:[^\n]*\n\s+([A-Za-z' ]+)\s{2}" :invoice-number #"INV #\/(\d+)" :total #"\$([0-9.]+)\."} :parser {:date [:clj-time "MM/dd/yyyy"]} :multi #"\n" :multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+INV "} ;; DON VITO {:vendor "Don Vito Ozuna Food Corp" :keywords [#"408-465-2010"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To.*?\n(.*?)\s{2,}" :invoice-number #"(?:[0-9]+/[0-9]+/[0-9]+)\s{2,}(\d+)" :total #"Please remit payment to\s{2,}\$([\-0-9.]+)"} :parser {:date [:clj-time "MM/dd/yyyy"]}} ;; DON VITO STATEMENT {:vendor "Don Vito Ozuna Food Corp" :keywords [#"Don Vito Ozuna Food Corp.*?\n.*?Statement"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"To:.*?\n\s*(.*)?\s{2,}" :invoice-number #"INV #(\d+)" :total #"Amount \$([\d\-\.]+?)\.\s{2,}"} :parser {:date [:clj-time "MM/dd/yyyy"]} :multi #"\n" :multi-match? #"\d+/\d+/\d+.*?INV"} ;; PFG - LEDYARD {:vendor "Performance Food Group - LEDYARD" :keywords [#"performancefoodservice"] :extract {:date #"DELIVER TO[^\n]+\n.+?(?=[0-9]+/[0-9]+/[0-9]+)([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"DELIVER TO[^\n]+\n\s*[\S ]+?(?=\s{2,}([\S ]+?)\s{2,})" ;; ([\S ]+)\s{2,} :invoice-number #"DELIVER TO[^\n]+\n.+?(?=\d+)(\d+)\s*\n" :total #"([0-9.\-]+)\s+Status Code"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} ;; SOUTHERN GLAZER'S {:vendor "Southern Glazers" :keywords [#"Southern Glazer's"] :extract {:date #"INVOICE DATE(?s:.*)(?= (?:[0-9]+/[0-9]+/[0-9]+)\s+([0-9]+/[0-9]+/[0-9]+)) " :invoice-number #"(?s)INVOICE\n(?:.*?)(?=\d{4,})(\d+)" :total #"PAY THIS AMOUNT(?s:.*)(?= ([0-9,]+\.[0-9]{2}))" :account-number #"ACCOUNT #.*\n.*?[\n]?\s+(\d+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]} :multi #"\f" :multi-match? #"PAY THIS AMOUNT"} ;; GOLDEN BRANDS {:vendor "Golden Brands San Jose" :keywords [#"GOLDEN BRANDS"] :extract {:date #"(?:.*)((?:Mon|Tue|Wed|Thu|Thurs|Fri|Sat|Sun) .*)" ;; HOW TO GO TO SPCIFIC LINE :account-number #"Account:\s*(.*?)(\s{2,}|\n)" :invoice-number #"Invoice#: (\d+)" :total #"Invoice Total\s+([\-0-9,]+\.[0-9]{2})"} :parser {:date [:clj-time "EEE MMM dd, yyyy HH:mm aa"] :total [:trim-commas nil]}} ;; GOLDEN BRANDS {:vendor "Bigoli Fresh Pasta" :keywords [#"bigolifreshpasta.com"] :extract {:date #"Invoice Date.*?([0-9]+/[0-9]+/[0-9]+)" ;; HOW TO GO TO SPCIFIC LINE :customer-identifier #"BILL TO.*\n\s*(.*?)\s{2,}" :invoice-number #"Invoice #.*?(\d+)" :total #" BALANCE DUE\s+(?:\$)?([0-9,]+\.[0-9]{2})"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; Del Monte Meats {:vendor "Del Monte Meat Co" :keywords [#"Del Monte"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To:(?:.*?(?=\s{2,}))(.*)-" :invoice-number #"Invoice\s+([0-9]+)" :total #"([0-9\.,]+)$"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]} :multi #"\n" :multi-match? #"^\s{1,2}[0-9]+"} {:vendor "Allen Brothers West" :keywords [#"ALLEN BROTHERS"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To:(?:.*?(?=\s{2,}))(.*)-" :invoice-number #"Invoice\s+([0-9]+)" :total #"([0-9\.,]+)$"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]} :multi #"\n" :multi-match? #"^\s{1,2}[0-9]+"} ;; Royal Hawaiian {:vendor "Royal Hawaiian" :keywords [#"ROYAL HAWAIIAN"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO.*\n\s+(.*?(?=(\s{2,}|\()))" :invoice-number #"Invoice#:\s+([0-9]+)" :total #"INV TOTAL:\s+([0-9\.,]+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]}} ;; WORLDWIDE PRODUCE {:vendor "Worldwide Produce" :keywords [#"WORLDWIDE\s+PRODUCE"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :invoice-number #"(?:Invoice|Adjustment) No\.\s+(\d+)" :total #"Total \S*\s+([0-9\.,\-]+)" :account-number #"Customer No.\s+(.*?)\s{2}"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]} :multi (. java.util.regex.Pattern (compile (-> \formfeed str) java.util.regex.Pattern/CASE_INSENSITIVE)) :multi-match? #"(Total\s+[0-9\.]+|Total Order)"} ;; AUTO-CHLOR {:vendor "Auto-Chlor" :keywords [#"AUTO-CHLOR"] :extract {:date #"DATE : ([0-9]+/[0-9]+/[0-9]+)" :invoice-number #"INVOICE# :\s+([0-9]+)" :total #"TOTAL DUE :\s+\$([0-9,.]+)" :account-number #"CUSTOMER#\s+:\s+(\d+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; Cheetah {:vendor "Cheetah" :keywords [#"Delivery date: [\d\-]+\s{2,}"] :extract {:date #"Delivery date: ([0-9\-]+)" :customer-identifier #"Shipping.*\n(.*)" :invoice-number #"Invoice #: (\d+)" :total #"TOTAL:.*?\$(.*)"} :parser {:date [:clj-time "yyyy-MM-dd"] :total [:trim-commas nil]}} ;; Classic Wines {:vendor "Classic Wines" :keywords [#"585-9463"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Customer Id:.*\n.*\n.*\n(.*)" :invoice-number #"^.{23}\s+(\w+)" :total #"\$([0-9\.,]+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]} :multi #"\n" :multi-match? #"^\s+.*?\d{6,}.*?\$"} ;; C & L {:vendor "C&L Produce" :keywords [#"440 Franklin Street"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To.*\n.*\n\s+(.*?)\s{2,}" :invoice-number #"Invoice #.*\n.*\n.*?(\d{5,})\n" :total #"Total\s+\$([0-9\.,]+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]}} {:vendor "General Produce Company" :keywords [#"generalproduce.com" #"INVOICE DATE"] :extract {:date #"INVOICE DATE.*\n.*?([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"SHIP TO.*\n.*\n\s+(.*?)\s{2,}" :invoice-number #"INVOICE NO.*\n.*?(\d{7,}?)\s+" :total #"TOTAL\s+([\-0-9,]+\.[0-9]+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} {:vendor "General Produce Company" :keywords [#"generalproduce.com" #"FC ="] :extract {:date #"^ (\d{2}-\w{3}-\d{2})" :account-number #"STATEMENT DATE.*?ACCOUNT NO\.\n.*?\n?.*?(\d+)\n" :invoice-number #".*?\s{2,}.*?\s{2,}(.*?)\s{2,}" :total #"([\-0-9,]+\.[0-9]+)\s*$"} :multi #"\n" :multi-match? #"^ \d{2}-\w{3}-\d{2}\s+(IV|CM)" :parser {:date [:clj-time "dd-MMM-yy"] :total [:trim-commas-and-negate nil]}} ;;; credits don't have the same format {:vendor "General Produce Company" :keywords [#"1330 NORTH B"] :extract {:date #"DATE.*\n.*\n.*?([0-9]+/[0-9]+/[0-9]+)" :invoice-number #"CREDIT NO.*\n.*\n.*?(\d{5,}?)\s+" :account-number #"CUST NO.*\n.*\n\s+(\d+)" :total #"TOTAL:\s+\|\s*(.*)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} {:vendor "General Produce Company" :keywords [#"916-552-6495"] :extract {:date #"DATE.*\n.*\n.*?([0-9]+/[0-9]+/[0-9]+)" :invoice-number #"CREDIT NO.*\n.*\n.*?(\d{5,}?)\s+" :account-number #"CUST NO.*\n.*\n\s+(\d+)" :total #"TOTAL:\s+\|\s*(.*)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} ;; Young's Market Co new statement {:vendor "RNDC" :keywords [#"(YOUNG'S MARKET COMPANY|Young.*Statement)"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Customer Name +([\w ]+)" :invoice-number #"\s{2,}([0-9]+)" :total #"\$?([0-9,]+\.[0-9]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]} :multi #"\n" :multi-match? #"^[0-9]+.*\$?([0-9,]+\.[0-9]+).*\$?([0-9,]+\.[0-9]+)"} ;; Young's Market Co - INVOICE {:vendor "RNDC" :keywords [#"P.O.Box 743564"] :extract {:date #"(?:INVOICE|CREDIT) DATE\n(?:.*?)(\S+)\n" #_#_:customer-identifier #"(?:INVOICE|CREDIT) DATE\n [0-9]+\s+(.*?)\s{2,}" :account-number #"Store Number:\s+(\d+)" :invoice-number #"(?:INVOICE|CREDIT) DATE\n(?:.*?)\s{2,}(\d+?)\s+\S+\n" :total #"Net Amount(?:.*\n){4}(?:.*?)([\-]?[0-9\.]+)\n"} :parser {:date [:clj-time ["MM/dd/yy" "dd-MMM-yy"]] :total [:trim-commas-and-negate nil]}} ;; WINE WAREHOUSE {:vendor "Wine Warehouse" :keywords [#"WINE WAREHOUSE" #"Bottle prices include"] :extract {:date #"Invoice Date:\s+([0-9]+/[0-9]+/[0-9]+)" :invoice-number #"Invoice #: \s+(\d+)" :total #"Total\s+([\-0-9,]+\.[0-9]{2})" :account-number #"Customer #:\s+(\d+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; WINE WAREHOUSE 3 {:vendor "Wine Warehouse" :keywords [#"Wine Warehouse" #"PLEASE APPLY CREDIT"] :extract {:date #"Credit Memo Number/Date\s+\d+\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Ship-To-Party.*\n(.*?)\s{2,}" :invoice-number #"Credit Memo Number/Date\s+(\d+)" :total #"Total\s+([0-9]+\.[0-9]{2}-)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} ;; WINE WAREHOUSE CREDIT 4 {:vendor "Wine Warehouse" :keywords [#"WINE WAREHOUSE" #"Credit Memo"] :extract {:date #"Credit Memo Number/Date\s+\d+\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Ship-To-Party.*\n(.*?)\s{2,}" :invoice-number #"Credit Memo Number/Date\s+(\d+)" :total #"Total\s+([0-9]+\.[0-9]{2}-)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} ;; WINE WAREHOUSE 2 {:vendor "Wine Warehouse" :keywords [#"WINE WAREHOUSE" #"Bill-to-Party"] :extract {:date #"Invoice date\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill-to-Party.*\n(.*?)\s{2,}" :invoice-number #"Invoice number\s+(\d+)" :total #"Gross\s+([0-9]+\.[0-9]{2})"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; Breakthru Bev {:vendor "Wine Warehouse" :keywords [#"BREAKTHRU BEVERAGE"] :extract {:date #"Invoice Date:\s+([0-9]+/[0-9]+/[0-9]+)" :invoice-number #"Invoice #: \s+(\d+)" :total #"Total\s+([\-0-9,]+\.[0-9]{2})" :account-number #"Customer #:\s+(\d+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; THE WATER PROS {:vendor "The Water Pros" :keywords [#"The Water Pros, Inc"] :extract {:date #"DATE\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO.*\n(.*?)(?=\s{2,})" :invoice-number #"INVOICE #\s+(\w+)" :total #" TOTAL\s+([0-9,]+\.[0-9]{2})"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; REGAL {:vendor "Regal Wine Co" :keywords [#"REGAL WINE"] :extract {:date #"INVOICE DATE.*\n\n(?:.*?)([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"INVOICE\n(.*?)\s{2,}" :invoice-number #"INVOICE NUMBER.*\n\n(?:.*?)(\d+)" :total #"Total Amount Due(?:.*?)([0-9,]+\.[0-9]{2})"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]}} ;; ALSCO {:vendor "Alsco" :keywords [#"Alsco"] :extract {:date #"Invoice Date:\s+(.*)" :customer-identifier #"Invoice F o r(?:.*?)\n\s+(.*?)\s{2,}" :invoice-number #" (\S+)\n\s+Invoice Date" :total #"Invoice Total\s+\$([0-9,]+\.[0-9]{2})"} :parser {:date [:clj-time "MMM dd yyyy"] :total [:trim-commas nil]}} ;; SUNCREST {:vendor "Suncrest USA Inc" :keywords [#"Suncrest.*Invoice"] :extract {:date #"Date.*\n\s*\n(?:.*?)([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To(?:.*?)\n\n(.*?)\s{2,}" :invoice-number #"Invoice #.*\n\s*\n.*? (\d{3,})" :total #"Balance Due\s+\$([0-9,]+\.[0-9]{2})"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; PACIFIC SEAFOOD {:vendor "Pacific Seafood" :keywords [#"(pacseafood|PACIFIC FRESH)"] :extract {:date #"DATE(?:.*\n.*(?=([0-9]+/[0-9]+/[0-9]+)))([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"DELIVER TO:(?:.*\n)(.*?)(?=\s{2})" :invoice-number #"INVOICE NO\.\n(?:.*?(?= [0-9]+\n)) ([0-9]+)" :total #"TOTAL\n\s+([0-9,\.]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; P&R {:vendor "P & R PAPER SUPPLY CO" :keywords [#"PAPER SUPPLY COMPANY"] :extract {:date #"Invoiced.*\n\s+\S+\s+(\S+)" :customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}" :invoice-number #"Invoice#.*\n.*\n.*?(\S+)\s+\d+\n" :total #"INVOICE TOTAL\s+([\-]?[\d,]+\.\d+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} ;; SUNCREST STATEMENT {:vendor "Suncrest USA Inc" :keywords [#"Suncrest.*\n.*Statement"] :extract {:date #"^([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"To:(?:.*?)\n\s*(.*?)\s{2,}" :invoice-number #"INV #(\d+)" :total #"Orig\. Amount \$([0-9,]+\.[0-9]{2})"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]} :multi #"\n" :multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+INV "} ;; US FOODS {:vendor "US Foods" :keywords [#"US Foods"] :extract {:date #"INVOICE DATE.*\n+.*?(?=([0-9]+/[0-9]+/[0-9]+))([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,} :invoice-number #"INVOICE NUMBER[^\n]+\n\n\d+\s+(\d+)" :total #"(?:DELIVERED AMOUNT|PLEASE REMIT|AMOUNT).*?\$([0-9.,]+( CR)?)\n"} :parser {:date [:clj-time ["MM/dd/yyyy" "yyyy/MM/dd"]] :total [:trim-commas-and-negate nil]}} ;; SYSCO {:vendor "Sysco" :keywords [#"SYSCO"] :extract {:date #"INVOICE NUMBER[^\n]+\n([^\n]+)\n" #_#_:customer-identifier #"INVOICE NUMBER[^\n]+\n[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,} :account-number #"INVOICE NUMBER[^\n]+\n[^\n]+\n.*?(?=[\d]{6,7})(\d{6,7})" :invoice-number #"INVOICE NUMBER[^\n]+\n[^\n]+\n.*?(?=[\d]{9})(\d{9})" :total #"\s{2,}INVOICE\s{2,}.*?(?=TOTAL)TOTAL\s+([0-9.]+[\-]?)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} ;; LE BOULANGER {:vendor "Le Boulanger" :keywords [#"Le Boulanger"] :extract {:date #"Invoice Date: ([^\n]+)\n" :customer-identifier #"Ship to\n+\s+([\S ]+?)(?=\s{2,})" :invoice-number #"Invoice No: ([^\n]+)\n" :total #" Total:\s+([\d\.]+)"} :parser {:date [:clj-time "MMM dd, yyyy"]}} ;; BiRite -- parent company for a&b produce {:vendor "BiRite" :keywords [#"BIRITE"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO.*\n.*?\s{2,}(.*?)\s{2,}" :invoice-number #"INVOICE#\n.*?(\d+)\n" :total #"INV TOTAL \s+([\d\.\-]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} ;; A&B {:vendor "A&B Produce" :keywords [#"ABProduce"] :extract {:date #"^\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO:[^\n]+\n[^\n]+\n[^\n]+\n(.*)\s{2,}" :invoice-number #"(\d+)\s+(?:INV|C/M)" :total #" (?:INV|C/M)\s+([\d\.\-]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]} :multi #"\n" :multi-match? #"^\s+[0-9]+/[0-9]+/[0-9]+\s+\d+\s+(INV|C/M)\s+"} ;; A&B Invoice {:vendor "A&B Produce" :keywords [#"415-656-0254"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO.*\n.*?\s{2,}(.*?)\s{2,}" :invoice-number #"INVOICE#\n.*?(\d+)\n" :total #"INV TOTAL \s+([\d\.\-]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} ;; American Provisions {:vendor "American Paper & Provisions" :keywords [#"imperialdade"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}" :invoice-number #"INVOICE\n(?:.*?)(\s{2,}\d+)" :total #"AMOUNT DUE:\s+([\d\.,\-]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} ;; Ocean Queen statement {:vendor "Ocean Queen" :keywords [#"Ocean Queen USA" #"Statement"] :extract {:date #"^([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"To:.*\n\s*(.*?)\s{2,}" :invoice-number #"INV #(\d+)" :total #" ([\d\.,\-]+)"} :multi #"\n" :multi-match? #"^([0-9]+/[0-9]+/[0-9]+).*INV" :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} ;; Ocean Queen {:vendor "Ocean Queen" :keywords [#"Ocean Queen USA"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}" :invoice-number #"Invoice #\n.*\n.*?(\d+)\n" :total #"Total\s+\$([\d\.,\-]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} ;; CHEF's CHOICE {:vendor "Chef's Choice Produce Co" :keywords [#"(2170 MARTIN AVENUE|213-3886)"] :extract {:date #"([0-9/]{10,10})" :customer-identifier #"\n B\s+([\S ]+?)(?=\s{2,}I) " :invoice-number #"^0*([0-9]+)" :total #"INVOICE\s+([\d\.]+)"} :parser {:date [:clj-time "MM/dd/yyyy"]} ;; may want to try two approaches [:clj-time ["MM/dd/yyyy" "MM1dd1yyyy"]] :multi #"\n" :multi-match? #"\s+INVOICE\s+"} ;; blue marine {:vendor "Blue Marine" :keywords [#"Blue Marine, Inc"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Total (.*?)\s{2,}" :invoice-number #"(\d{6,})" :total #"([\-0-9,\.]+)$"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]} :multi #"\n" :multi-match? #"(Invoice|Credit Memo)\s{2,}"} ;; FRESH AND BELT ;; statempnt {:vendor "Fresh and Best Produce" :keywords [#"freshbestproduce" #"Statement"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To[^\n]+\n([A-Za-z ']+)" :invoice-number #"INV #(\d+)\." :total #"Amount\s+\$([0-9\.]+)\."} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]} :multi #"\n" :multi-match? #"INV #"} {:vendor "Fresh and Best Produce" :keywords [#"freshbestproduce"] :extract {:date #"\n\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To[^\n]+\n([A-Za-z ']+)" :invoice-number #"\n\s+[0-9/]+\s+(\d+)" :total #"Balance Due\s+\$([0-9\.]+)"} :parser {:date [:clj-time "MM/dd/yyyy"]}} ;; PFG - ROMA LOOK 1 {:vendor "Performance Food Group - ROMA" :keywords [#"inquiries call 1-800-233-6211"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})" :invoice-number #"^\s+([\dA-Z]+)" :total #"([\d\.,\-]+\.[\d\-]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]} :multi #"\n" :multi-match? #"^\s+[\d]{6,8}\s+\d+"} ;; ACME BREAD {:vendor "Acme Bread" :keywords [#"acmebread\.com"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Print Date.*\n.*\n(.*)" :invoice-number #"^\s*(\d+)" :total #"\s{2,}(\d+\.\d{2})\s{2,}"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]} :multi #"\n" :multi-match? #"^\s*\d+\s+([0-9]+/[0-9]+/[0-9]+)"} ;; PFG - ROMA {:vendor "Performance Food Group - ROMA" :keywords [#"Performance Food Group, Inc\n\f"] :extract {:date #"Date: ([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})" :invoice-number #"INVOICE NO.\s+ ([\d]+)" :total #"([\d\.,]+)\s+INVOICE TOTAL"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]}} ;; JFC {:vendor "JFC International" :keywords [#"48490 MILMONT DRIVE"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :account-number #"CUSTOMER #.*?\n\n\s+(.*?)\s{2,}" :customer-identifier #"SOLD\s+(?:TO\s+)?([\S ]+?)(?=(\s{2,}|\n))" :invoice-number #"(\S+)\s+(?:[0-9]+/[0-9]+/[0-9]+)" :total #"(?:INVOICE|TOTAL|CREDIT)\s+([\d\.,\-]+\.[\d\-]+( CR)?)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} {:vendor "Primizie Foods" :keywords [#"primiziefoods.com"] :extract {:date #"Invoice Date\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO.*?\n(.*?)\s{2,}" :invoice-number #"Invoice #\s+(.*?)\n" :total #"BALANCE DUE\s+\$([\d\.,\-]+\.[\d\-])"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} ;; ROMA BAKERY {:vendor "Roma Bakery Inc." :keywords [#"Roma Bakery Inc"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}" :invoice-number #"Invoice (\d+)" :total #"Total\s+([\d\-\.]+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} ;; KAEL FOODS {:vendor "Kael Foods" :keywords [#"kaelfoods.com"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}" :invoice-number #"INVOICE 0*(\d+)" :total #"TOTAL:\s+\$([\d\-\.,]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} ;; Starter Bakery {:vendor "Starter Bakery" :keywords [#"starterbakery.com"] :extract {:date #"INVOICE DATE:\s+(.*?)\s{2,}" :customer-identifier #"BILL TO:.*\n\s+(.*?)\s{2,}" :invoice-number #"Invoice.*?(\d+)" :total #"Total:.*?([\d\-,]+\.\d{2,2}+)"} :parser {:date [:clj-time "MMMM dd, yyyy"] :total [:trim-commas-and-negate nil]}} ;; Trimark {:vendor "TriMark R.W. Smith" :keywords [#"TriMark"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To\s+(.*?)\s{2,}" :invoice-number #"Invoice #\n.*?([\d\-]+)\n" :total #"Invoice Total\s+([\d\-,]+\.\d{2,2}+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} ;; TODO DISABLING TO FOCUS ON STATEMENT #_{:vendor "Reel Produce" :keywords [#"reelproduce.com"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To(?:.*?)\n\n\s+(.*?)\s{2,}" :invoice-number #"Invoice #\n.*?\n.*?([\d\-]+)\n" :total #"Total\s*\n\s+\$([\d\-,]+\.\d{2,2}+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} {:vendor "Eddie's Produce" :keywords [#"Eddie's Produce"] :extract {:date #"Invoice\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Amount\s*\n\s+(.*?)\s{2,}" :invoice-number #"Invoice\s+\S+\s+(\d+)" :total #"([\d\-,]+\.\d{2,2}+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]} :multi #"\n" :multi-match? #"^\s*Invoice\s{2,}"} ;; DAY LEE {:vendor "Day Lee Foods" :keywords [#"DAY-LEE FOODS"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"9720\n\n(.*?)\n" :invoice-number #"^\s*(\d+)\s" :total #"\$([\d\-,]+)$"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]} :multi #"\n" :multi-match? #"RI"} ;; NA sales {:vendor "N.A. Sales Company, Inc" :keywords [#"2695 McCone Avenue"] :extract {:date #"Invoice Date:\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Sold To:.*?\n\s+(.*?)\s{2,}" :invoice-number #"Invoice Number:\s+(.*?)\n" :total #"Invoice Total:\s+([\d\-,]+\.\d{2,2}+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} {:vendor "Mani Imports" :keywords [#"Mani Imports"] :extract {:date #"Order Date\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Ship To:\s*(.*?)\n" :invoice-number #"Invoice Number:\s+(.*?)\n" :total #"Invoice Total:\s+([\d\-,]+\.\d{2,2}+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} {:vendor "Reel Produce" :keywords [#"reelproduce.com" #"Statem"] :extract {:date #"\s*([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"ELECTRONICALLY.*\n\s*(.*?)\s{2,}" :invoice-number #"#(\d+)" :total #"([\d\-,]+\.\d{2,2})"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]} :multi #"\n" :multi-match? #"INV #"} {:vendor "Paulino's Bakery" :keywords [#"paulinosbakery"] :extract {:date #"\s*([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"831-333-1010\s*\n\s+(.*?)\s{2,}" :invoice-number #"INV #(\d+)" :total #"INV #(?:.*?)\s{2,}([\d\-,]+\.\d{2,2}+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]} :multi #"\n" :multi-match? #"INV #"}]) (def excel-templates [{:vendor "Mama Lu's Foods" :keywords [#"Mama Lu's Foods"] :extract (fn [sheet vendor] (transduce (comp (drop 5) (filter (fn [r] (and (seq r) (->> r second not-empty)))) (map (fn [r] (let [[_ customer-order-number num date name amount] r] {:customer-identifier (second (re-find #"([^:]*):" name)) :text name :full-text name :date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date)) :invoice-number (str customer-order-number "-" (Integer/parseInt num)) :total (str amount) :vendor-code vendor})))) conj [] sheet))} {:vendor "Daylight Foods" :keywords [#"CUSTNO"] :extract (fn [sheet vendor] (alog/peek ::daylight-invoices (transduce (comp (drop 1) (filter (fn [r] (and (seq r) (->> r first not-empty)))) (map (fn [[customer-number _ _ _ invoice-number date amount :as row]] (println "DAT E is" date) {:customer-identifier customer-number :text (str/join " " row) :full-text (str/join " " row) :date (try (or (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date)) (try (atime/as-local-time (time/plus (time/date-time 1900 1 1) (time/days (dec (dec (Integer/parseInt "45663")))))) (catch Exception e nil) )) (catch Exception e (try (atime/as-local-time (time/plus (time/date-time 1900 1 1) (time/days (dec (dec (Integer/parseInt "45663")))))) (catch Exception e nil) ) )) :invoice-number invoice-number :total (str amount) :vendor-code vendor}))) conj [] sheet)))}])