(ns auto-ap.parse.templates (:require [dk.ative.docjure.spreadsheet :as d] [auto-ap.parse.util :as u] [clojure.string :as str]) (:import (org.apache.poi.ss.util CellAddress))) (def pdf-templates [ ;; CHEF's WAREHOUSE {:vendor "CHFW" :keywords [#"CHEF'S WAREHOUSE"] :extract {:total #"2 WKS C\.C\.\s+([\d.,]+)" :customer-identifier #"\n([A-Z][A-Z ]+)\s{2,}" :date #"\s+([0-9]+/[0-9]+/[0-9]+)" :invoice-number #"\s+[0-9]+/[0-9]+/[0-9]+\s+([0-9]+)"} :parser {:date [:clj-time "MM/dd/yyyy"]}} ;; GGM {:vendor "Golden Gate Meat Company, Inc" :keywords [#"Golden Gate Meat"] :extract {:total #"Invoice Total\:\s+\$([\d.,]+)" :customer-identifier #"Bill To\s*:\s*([\w ]+)\s{2,}" :date #"Printed:\s+([0-9]+/[0-9]+/[0-9]+)" :invoice-number #"Invoice\s+[^\n]+\n[^\n]+\n\s+([0-9]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; CINTAS {:vendor "CINTAS" :keywords [#"CINTAS CORPORATION"] :extract {:invoice-number #"INVOICE\s#\s+([\d.,]+)" :customer-identifier #"BILL TO\s*:\s{2,}([\w ]+)\s{2,}" :date #"INVOICE DATE\s*\n.*\s+([0-9]+/[0-9]+/[0-9]+)" :total #"INVOICE TOTAL\s+([0-9.]+)"} :parser {:date [:clj-time "MM/dd/yy"]} :multi #"\f\f"} ;; CARBONIC {:vendor "Carbonic Service Inc" :keywords [#"CARBONIC SERVICE INC"] :extract {:invoice-number #"Invoice #\s*\n\s*[\w\.]+\s+[\w\./]+(.*)\s*\n" :customer-identifier #"Bill To[^\n]+\n[^\n]*\n([\w ]+)\s{2,}" :date #"Invoice #\s*\n\s*[\w\.]+\s+([\w\./]+)" :total #"Total\s+\$([0-9.,]+)" :account-number #"Account #\s+(\d+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]}} ;; DVW {:vendor "DVW Commercial" :keywords [#"DVW Commercial"] :extract {:date #"\s*([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To:[^\n]+\n[^\n]*\n\s*([\w ]+) \(" :invoice-number #"Invoice\s*\n\s*[S]?([\w\./]+)*" :total #"Total:\s+\$ ([0-9.]+)"} :parser {:date [:clj-time "MM/dd/yy"]}} ;; DAYLIGHT FOOD STATEMENT {:vendor "Daylight Foods" :keywords [#"DAYLIGHT FOODS" #"Customer Statement"] :extract {:date #"^.*?([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Phone:.*\n+\s+(.*)" :invoice-number #"\s+(\w+)" :total #"([\-]?[0-9]+\.[0-9]+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]} :multi #"\n" :multi-match? #"^\s*[A-Z]\d+\s+([0-9]+/[0-9]+/[0-9]+)"} ;; DAYLIGHT FOOD {:vendor "Daylight Foods" :keywords [#"DAYLIGHT FOODS"] :extract {:date #"\n\s*Date[^\n]+\n\s*([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To:.*\n\s*(.*?)\s{2,}" :invoice-number #"Invoice\s([\w\./]+)*" :total #"Total Invoice\s+([\-]?[0-9.]+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} ;; SOUTHBAY FRESH {:vendor "Southbay Fresh Produce" :keywords [#"SOUTH BAY FRESH PRODUCE"] :extract {:date #"^([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"To:[^\n]*\n\s+([A-Za-z' ]+)\s{2}" :invoice-number #"INV #\/(\d+)" :total #"\$([0-9.]+)\."} :parser {:date [:clj-time "MM/dd/yyyy"]} :multi #"\n" :multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+INV "} ;; PFG - LEDYARD {:vendor "Performance Food Group - LEDYARD" :keywords [#"performancefoodservice"] :extract {:date #"DELIVER TO[^\n]+\n.+?(?=[0-9]+/[0-9]+/[0-9]+)([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"DELIVER TO[^\n]+\n\s*[\S ]+?(?=\s{2,}([\S ]+?)\s{2,})" ;; ([\S ]+)\s{2,} :invoice-number #"DELIVER TO[^\n]+\n.+?(?=\d+)(\d+)\s*\n" :total #"([0-9.\-]+)\s+Status Code"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} ;; SOUTHERN GLAZER'S {:vendor "Southern Glazers" :keywords [#"Southern Glazer's"] :extract {:date #"INVOICE DATE(?s:.*)(?= (?:[0-9]+/[0-9]+/[0-9]+)\s+([0-9]+/[0-9]+/[0-9]+)) " :customer-identifier #"SOLD TO:(?:.*)(?=\n)\n(.*)(?=\s{2,})" ;; ([\S ]+)\s{2,} :invoice-number #"INVOICE\n(?:.*?)(?=\d{4,})(\d+)" :total #"PAY THIS AMOUNT(?s:.*)(?= ([0-9,]+\.[0-9]{2}))"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]}} ;; GOLDEN BRANDS {:vendor "Golden Brands San Jose" :keywords [#"GOLDEN BRANDS"] :extract {:date #"(?:.*\n){4}(.*)" ;; HOW TO GO TO SPCIFIC LINE :customer-identifier #"Account:(?:.*\n)(.*(?=\s{2,}))" :invoice-number #"Invoice#: (\d+)" :total #"Invoice Total\s+([0-9,]+\.[0-9]{2})"} :parser {:date [:clj-time "EEE MMM dd, yyyy HH:mm aa"] :total [:trim-commas nil]}} ;; GOLDEN BRANDS {:vendor "Bigoli Fresh Pasta" :keywords [#"bigolifreshpasta.com"] :extract {:date #"INVOICE #.*?\n.*?([0-9]+/[0-9]+/[0-9]+)" ;; HOW TO GO TO SPCIFIC LINE :customer-identifier #"BILL TO.*\n\s+(.*?)\s{2,}" :invoice-number #"INVOICE #.*?\n(\d+)" :total #" TOTAL\s+([0-9,]+\.[0-9]{2})"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; Del Monte Meats {:vendor "Del Monte Meat Co" :keywords [#"Del Monte"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To:(?:.*?(?=\s{2,}))(.*)-" :invoice-number #"Invoice\s+([0-9]+)" :total #"([0-9\.,]+)$"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]} :multi #"\n" :multi-match? #"^\s{1,2}[0-9]+"} ;; Royal Hawaiian {:vendor "Royal Hawaiian" :keywords [#"ROYAL HAWAIIAN"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO.*\n\s+(.*?(?=(\s{2,}|\()))" :invoice-number #"Invoice#:\s+([0-9]+)" :total #"INV TOTAL:\s+([0-9\.,]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; AUTO-CHLOR {:vendor "Auto-Chlor" :keywords [#"AUTO-CHLOR"] :extract {:date #"DATE : ([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"(.*?)\s{2,}.*CUSTOMER#" :invoice-number #"INVOICE# :\s+([0-9]+)" :total #"TOTAL SALE :\s+([0-9,.]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; Classic Wines {:vendor "Classic Wines" :keywords [#"585-9463"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Customer Id:.*\n.*\n.*\n(.*)" :invoice-number #"^.{23}\s+(\w+)" :total #"\$([0-9\.,]+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]} :multi #"\n" :multi-match? #"^\s+.*?\d{6,}.*?\$"} ;; C & L {:vendor "C&L Produce" :keywords [#"440 Franklin Street"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To.*\n.*\n\s+(.*?)\s{2,}" :invoice-number #"Invoice #.*\n.*\n.*?(\d{5,})\n" :total #"Total\s+\$([0-9\.,]+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]}} {:vendor "General Produce Company" :keywords [#"generalproduce.com"] :extract {:date #"INVOICE DATE.*\n.*?([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"SHIP TO.*\n.*\n\s+(.*?)\s{2,}" :invoice-number #"INVOICE NO.*\n.*?(\d{7,}?)\s+" :total #"TOTAL\s+([\-0-9,]+\.[0-9]+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} ;; Young's Market Co new statement {:vendor "Youngs Market" :keywords [#"(YOUNG'S MARKET COMPANY|Young.*Statement)"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Customer Name +([\w ]+)" :invoice-number #"\s{2,}([0-9]+)" :total #"\$?([0-9,]+\.[0-9]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]} :multi #"\n" :multi-match? #"^[0-9]+.*\$?([0-9,]+\.[0-9]+).*\$?([0-9,]+\.[0-9]+)"} ;; Young's Market Co - INVOICE {:vendor "Youngs Market" :keywords [#"P.O.Box 743564"] :extract {:date #"(?:INVOICE|CREDIT) DATE\n(?:.*?)(\S+)\n" :customer-identifier #"(?:INVOICE|CREDIT) DATE\n [0-9]+\s+(.*?)\s{2,}" :invoice-number #"(?:INVOICE|CREDIT) DATE\n(?:.*?)\s{2,}(\d+?)\s+\S+\n" :total #"Net Amount(?:.*\n){4}(?:.*?)([\-]?[0-9\.]+)\n"} :parser {:date [:clj-time "dd-MMM-yy"] :total [:trim-commas-and-negate nil]}} ;; WINE WAREHOUSE {:vendor "Wine Warehouse" :keywords [#"WINE WAREHOUSE" #"Bottle prices include"] :extract {:date #"INVOICE DATE\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"SHIP-TO-PARTY.*\n(.*?)(?=\s{2,})" :invoice-number #"INV #\s+(\d+)" :total #"PLEASE PAY THIS AMOUNT\s+([0-9]+\.[0-9]{2})" :account-number #"CUSTOMER NUMBER\s+(\d+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; WINE WAREHOUSE 3 {:vendor "Wine Warehouse" :keywords [#"Wine Warehouse" #"PLEASE APPLY CREDIT"] :extract {:date #"Credit Memo Number/Date\s+\d+\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Ship-To-Party.*\n(.*?)\s{2,}" :invoice-number #"Credit Memo Number/Date\s+(\d+)" :total #"Total\s+([0-9]+\.[0-9]{2}-)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} ;; WINE WAREHOUSE 2 {:vendor "Wine Warehouse" :keywords [#"WINE WAREHOUSE" #"Bill-to-Party"] :extract {:date #"Invoice date\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill-to-Party.*\n(.*?)\s{2,}" :invoice-number #"Invoice number\s+(\d+)" :total #"Gross\s+([0-9]+\.[0-9]{2})"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; REGAL {:vendor "Regal Wine Co" :keywords [#"REGAL WINE"] :extract {:date #"INVOICE DATE.*\n\n(?:.*?)([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"INVOICE\n(.*?)\s{2,}" :invoice-number #"INVOICE NUMBER.*\n\n(?:.*?)(\d+)" :total #"Total Amount Due(?:.*?)([0-9,]+\.[0-9]{2})"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]}} ;; ALSCO {:vendor "Alsco" :keywords [#"Alsco"] :extract {:date #"Invoice Date:\s+(.*)" :customer-identifier #"Invoice F o r(?:.*?)\n\s+(.*?)\s{2,}" :invoice-number #" (\S+)\n\s+Invoice Date" :total #"Invoice Total\s+\$([0-9,]+\.[0-9]{2})"} :parser {:date [:clj-time "MMM dd yyyy"] :total [:trim-commas nil]}} ;; SUNCREST {:vendor "Suncrest USA Inc" :keywords [#"Suncrest.*Invoice"] :extract {:date #"Date.*\n\s*\n(?:.*?)([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To(?:.*?)\n\n(.*?)\s{2,}" :invoice-number #"Invoice #.*\n\s*\n.*? (\d{3,})" :total #"Balance Due\s+\$([0-9,]+\.[0-9]{2})"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; PACIFIC SEAFOOD {:vendor "Pacific Seafood" :keywords [#"(pacseafood|PACIFIC FRESH)"] :extract {:date #"DATE(?:.*\n.*(?=([0-9]+/[0-9]+/[0-9]+)))([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"DELIVER TO:(?:.*\n)(.*?)(?=\s{2})" :invoice-number #"INVOICE NO\.\n(?:.*?(?= [0-9]+\n)) ([0-9]+)" :total #"TOTAL\n\s+([0-9,\.]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]}} ;; P&R {:vendor "P & R PAPER SUPPLY CO" :keywords [#"PAPER SUPPLY COMPANY"] :extract {:date #"Invoiced.*\n\s+\S+\s+(\S+)" :customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}" :invoice-number #"Invoice#.*\n.*\n.*?(\S+)\s+\d+\n" :total #"INVOICE TOTAL\s+([\-]?[\d,]+\.\d+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} ;; SUNCREST STATEMENT {:vendor "Suncrest USA Inc" :keywords [#"Suncrest.*\n.*Statement"] :extract {:date #"^([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"To:(?:.*?)\n\s*(.*?)\s{2,}" :invoice-number #"INV #(\d+)" :total #"Orig\. Amount \$([0-9,]+\.[0-9]{2})"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas nil]} :multi #"\n" :multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+INV "} ;; US FOODS {:vendor "US Foods" :keywords [#"US Foods"] :extract {:date #"INVOICE DATE.*\n+.*?(?=([0-9]+/[0-9]+/[0-9]+))([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,} :invoice-number #"INVOICE NUMBER[^\n]+\n\n\d+\s+(\d+)" :total #"(?:DELIVERED AMOUNT|PLEASE REMIT|AMOUNT).*?\$([0-9.,]+( CR)?)\n"} :parser {:date [:clj-time ["MM/dd/yyyy" "yyyy/MM/dd"]] :total [:trim-commas-and-negate nil]}} ;; SYSCO {:vendor "Sysco" :keywords [#"SYSCO"] :extract {:date #"INVOICE NUMBER[^\n]+\n([^\n]+)\n" :customer-identifier #"INVOICE NUMBER[^\n]+\n[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,} :invoice-number #"INVOICE NUMBER[^\n]+\n[^\n]+\n.*?(?=[\d]{9})(\d{9})" :total #"\s{2,}INVOICE\s{2,}.*?(?=TOTAL)TOTAL\s+([0-9.]+[\-]?)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} ;; LE BOULANGER {:vendor "Le Boulanger" :keywords [#"Le Boulanger"] :extract {:date #"Invoice Date: ([^\n]+)\n" :customer-identifier #"Ship to\n+\s+([\S ]+?)(?=\s{2,})" :invoice-number #"Invoice No: ([^\n]+)\n" :total #" Total:\s+([\d\.]+)"} :parser {:date [:clj-time "MMM dd, yyyy"]}} ;; A&B {:vendor "A&B Produce" :keywords [#"ABProduce"] :extract {:date #"^\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO:[^\n]+\n[^\n]+\n[^\n]+\n(.*)\s{2,}" :invoice-number #"(\d+)\s+(?:INV|C/M)" :total #" (?:INV|C/M)\s+([\d\.\-]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]} :multi #"\n" :multi-match? #"^\s+[0-9]+/[0-9]+/[0-9]+\s+\d+\s+(INV|C/M)\s+"} ;; A&B Invoice {:vendor "A&B Produce" :keywords [#"415-656-0254"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO.*\n.*?\s{2,}(.*?)\s{2,}" :invoice-number #"INVOICE#\n.*?(\d+)\n" :total #"INV TOTAL \s+([\d\.\-]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} ;; Ocean Queen {:vendor "Ocean Queen" :keywords [#"Ocean Queen USA"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}" :invoice-number #"Invoice #\n.*\n.*?(\d+)\n" :total #"Total\s+\$([\d\.,\-]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} ;; CHEF's CHOICE {:vendor "Chef's Choice Produce Co" :keywords [#"(2170 MARTIN AVENUE|213-3886)"] :extract {:date #"([0-9/]{10,10})" :customer-identifier #"\n B\s+([\S ]+?)(?=\s{2,}I) " :invoice-number #"^0*([0-9]+)" :total #"INVOICE\s+([\d\.]+)"} :parser {:date [:clj-time "MM/dd/yyyy"]} ;; may want to try two approaches [:clj-time ["MM/dd/yyyy" "MM1dd1yyyy"]] :multi #"\n" :multi-match? #"\s+INVOICE\s+"} ;; FRESH AND BELT {:vendor "Fresh and Best Produce" :keywords [#"freshbestproduce"] :extract {:date #"\n\s+([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To[^\n]+\n([A-Za-z ']+)" :invoice-number #"\n\s+[0-9/]+\s+(\d+)" :total #"Balance Due\s+\$([0-9\.]+)"} :parser {:date [:clj-time "MM/dd/yyyy"]}} ;; PFG - ROMA LOOK 1 {:vendor "Performance Food Group - ROMA" :keywords [#"inquiries call 1-800-233-6211"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})" :invoice-number #"^\s+([\dA-Z]+)" :total #"([\d\.,\-]+\.[\d\-]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]} :multi #"\n" :multi-match? #"^\s+[\d]{6,8}\s+\d+"} ;; ACME BREAD {:vendor "Acme Bread" :keywords [#"acmebread\.com"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Print Date.*\n.*\n(.*)" :invoice-number #"^\s*(\d+)" :total #"\s{2,}(\d+\.\d{2})\s{2,}"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]} :multi #"\n" :multi-match? #"^\s*\d+\s+([0-9]+/[0-9]+/[0-9]+)"} ;; PFG - ROMA {:vendor "Performance Food Group - ROMA" :keywords [#"Performance Food Group, Inc\n\f"] :extract {:date #"Date: ([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})" :invoice-number #"INVOICE NO.\s+ ([\d]+)" :total #"([\d\.,]+)\s+INVOICE TOTAL"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]}} ;; JFC {:vendor "JFC International" :keywords [#"48490 MILMONT DRIVE"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"SOLD\s+([\S ]+?)(?=(\s{2,}|\n))" :invoice-number #"(\S+)\s+(?=[0-9]+/[0-9]+/[0-9]+)" :total #"(?:INVOICE|TOTAL|CREDIT)\s+([\d\.,\-]+\.[\d\-]+( CR)?)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} ;; ROMA BAKERY {:vendor "Roma Bakery Inc." :keywords [#"Roma Bakery Inc"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}" :invoice-number #"Invoice (\d+)" :total #"Total\s+([\d\-\.]+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}} ;; KAEL FOODS {:vendor "Kael Foods" :keywords [#"kaelfoods.com"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}" :invoice-number #"INVOICE 0*(\d+)" :total #"TOTAL:\s+\$([\d\-\.,]+)"} :parser {:date [:clj-time "MM/dd/yyyy"] :total [:trim-commas-and-negate nil]}} ;; Starter Bakery {:vendor "Starter Bakery" :keywords [#"starterbakery.com"] :extract {:date #"INVOICE DATE:\s+(.*?)\s{2,}" :customer-identifier #"BILL TO:.*\n\s+(.*?)\s{2,}" :invoice-number #"Invoice.*?(\d+)" :total #"Total:.*?([\d\-,]+\.\d{2,2}+)"} :parser {:date [:clj-time "MMMM dd, yyyy"] :total [:trim-commas-and-negate nil]}} ;; Trimark {:vendor "TriMark R.W. Smith" :keywords [#"TriMark"] :extract {:date #"([0-9]+/[0-9]+/[0-9]+)" :customer-identifier #"Bill To\s+(.*?)\s{2,}" :invoice-number #"Invoice #\n.*?([\d\-]+)\n" :total #"Invoice Total\s+([\d\-,]+\.\d{2,2}+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas-and-negate nil]}}]) (defn offset [c x y] (.toString (CellAddress. (+ y (.getRow (.getAddress c))) (+ x (.getColumn (.getAddress c))) ))) (def excel-templates [{:vendor "Isp Productions" :keywords [#"ISP PRODUCTIONS"] :extract {:customer-identifier [#"SERVICES PROVIDED TO" 1 0] :total [#"PAY THIS" -1 0] :date [#"INVOICE DATE" 0 1] :invoice-number [#"INVOICE NUMBER" 0 1]}} {:vendor "Southern Glazers" :keywords [#"Please note that the total invoice amount may"] :extract {:customer-identifier [#"Customer #" 1 0] :total [#"Subtotal" 0 16 ] :date [#"Date" 0 0 #"Date: (.*)"] :invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"] :account-number [#"Customer #" 0 0 #"Customer #: (.*)"]} :parser { :total [:trim-commas-and-remove-dollars nil] :date [:clj-time "MM/dd/yyyy"]}} {:vendor "Mama Lu's Foods" :keywords [#"Mama Lu's Foods"] :extract (fn [wb vendor] (let [[sheet] (d/sheet-seq wb)] (transduce (comp (drop 5) (filter (fn [r] (and r (->> r d/cell-seq second d/read-cell)))) (map (fn [r] (let [[_ customer-order-number num date name amount] (map d/read-cell (d/cell-seq r))] {:customer-identifier (second (re-find #"([^:]*):" name)) :text name :full-text name :date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date)) :invoice-number (str customer-order-number "-" (int num)) :total (str amount) :vendor-code vendor})))) conj [] (d/row-seq sheet))))} {:vendor "DVW Commercial" :keywords [#"Total for" #"Num"] :extract (fn [wb vendor] (let [[sheet] (d/sheet-seq wb)] (transduce (comp (filter (fn [c] (re-find #"Invoice" (str (d/read-cell c))))) (map (fn [c] (let [customer-identifier (d/read-cell (->> (d/select-cell (offset c -3 0) sheet) (iterate (fn [c] (d/select-cell (offset c 0 -1) sheet))) (filter (fn [c] (not (str/blank? (d/read-cell c))))) first))] {:customer-identifier customer-identifier :text customer-identifier :full-text customer-identifier :date (d/read-cell (d/select-cell (offset c 2 0) sheet)) :invoice-number (d/read-cell (d/select-cell (offset c 4 0) sheet)) :total (str (d/read-cell (d/select-cell (offset c 8 0) sheet))) :vendor-code vendor})))) conj [] (d/cell-seq sheet))))} {:vendor "Chef's Choice Produce Co" :keywords [#"Alt_invoice_number"] :extract (fn [wb vendor] (let [[sheet] (d/sheet-seq wb)] (transduce (comp (drop-while (fn [c] (not (re-find #"Customer_id" (str (d/read-cell c)))))) (drop 9) (filter (fn [c] (= 0 (.getColumnIndex c)))) (filter (fn [c] (not (str/blank? (str/trim (or (d/read-cell (d/select-cell (offset c 1 0) sheet)) "")))))) (map (fn [c] {:customer-identifier (str/trim (d/read-cell (d/select-cell (offset c 1 0) sheet))) :text (d/read-cell (d/select-cell (offset c 1 0) sheet)) :full-text (d/read-cell (d/select-cell (offset c 1 0) sheet)) :date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim (d/read-cell (d/select-cell (offset c 5 0) sheet)))) :invoice-number (->> (re-find #"^(?:0+([A-Z0-9]+))|([A-Z]+[A-Z0-9]+)" (str/trim (d/read-cell (d/select-cell (offset c 2 0) sheet)))) (drop 1 ) (filter identity) first) :total (str (d/read-cell (d/select-cell (offset c 7 0) sheet))) :vendor-code vendor})) (filter :customer-identifier)) conj [] (d/cell-seq sheet))))}])