577 lines
25 KiB
Clojure
577 lines
25 KiB
Clojure
(ns auto-ap.parse.templates
|
|
(:require [dk.ative.docjure.spreadsheet :as d]
|
|
[auto-ap.parse.util :as u]
|
|
[clojure.string :as str])
|
|
(:import (org.apache.poi.ss.util CellAddress)))
|
|
|
|
|
|
(def pdf-templates
|
|
[
|
|
;; CHEF's WAREHOUSE
|
|
{:vendor "CHFW"
|
|
:keywords [#"CHEF'S WAREHOUSE"]
|
|
:extract {:total #"2 WKS C\.C\.\s+([\d.,]+)"
|
|
:customer-identifier #"\n([A-Z][A-Z ]+)\s{2,}"
|
|
:date #"\s+([0-9]+/[0-9]+/[0-9]+)"
|
|
:invoice-number #"\s+[0-9]+/[0-9]+/[0-9]+\s+([0-9]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]}}
|
|
|
|
;; GGM
|
|
{:vendor "Golden Gate Meat Company, Inc"
|
|
:keywords [#"Golden Gate Meat"]
|
|
:extract {:total #"Invoice Total\:\s+\$([\d.,]+)"
|
|
:customer-identifier #"Bill To\s*:\s*([\w ]+)\s{2,}"
|
|
:date #"Printed:\s+([0-9]+/[0-9]+/[0-9]+)"
|
|
:invoice-number #"Invoice\s+[^\n]+\n[^\n]+\n\s+([0-9]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
;; CINTAS
|
|
{:vendor "CINTAS"
|
|
:keywords [#"CINTAS CORPORATION"]
|
|
:extract {:invoice-number #"INVOICE\s#\s+([\d.,]+)"
|
|
:customer-identifier #"BILL TO\s*:\s{2,}([\w ]+)\s{2,}"
|
|
:date #"INVOICE DATE\s*\n.*\s+([0-9]+/[0-9]+/[0-9]+)"
|
|
:total #"INVOICE TOTAL\s+([0-9.]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]}
|
|
:multi #"\f\f"}
|
|
|
|
;; CARBONIC
|
|
{:vendor "Carbonic Service Inc"
|
|
:keywords [#"CARBONIC SERVICE INC"]
|
|
:extract {:invoice-number #"Invoice #\s*\n\s*[\w\.]+\s+[\w\./]+(.*)\s*\n"
|
|
:customer-identifier #"Bill To[^\n]+\n[^\n]*\n([\w ]+)\s{2,}"
|
|
:date #"Invoice #\s*\n\s*[\w\.]+\s+([\w\./]+)"
|
|
:total #"Total\s+\$([0-9.,]+)"
|
|
:account-number #"Account #\s+(\d+)"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
;; DVW
|
|
{:vendor "DVW Commercial"
|
|
:keywords [#"DVW Commercial"]
|
|
:extract {:date #"\s*([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Bill To:[^\n]+\n[^\n]*\n\s*([\w ]+) \("
|
|
:invoice-number #"Invoice\s*\n\s*[S]?([\w\./]+)*"
|
|
:total #"Total:\s+\$ ([0-9.]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]}}
|
|
|
|
;; DAYLIGHT FOOD STATEMENT
|
|
{:vendor "Daylight Foods"
|
|
:keywords [#"DAYLIGHT FOODS" #"Customer Statement"]
|
|
:extract {:date #"^.*?([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Phone:.*\n+\s+(.*)"
|
|
:invoice-number #"\s+(\w+)"
|
|
:total #"([\-]?[0-9]+\.[0-9]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]
|
|
:total [:trim-commas-and-negate nil]}
|
|
:multi #"\n"
|
|
:multi-match? #"^\s*[A-Z]\d+\s+([0-9]+/[0-9]+/[0-9]+)"}
|
|
|
|
;; DAYLIGHT FOOD
|
|
{:vendor "Daylight Foods"
|
|
:keywords [#"DAYLIGHT FOODS"]
|
|
:extract {:date #"\n\s*Date[^\n]+\n\s*([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Bill To:.*\n\s*(.*?)\s{2,}"
|
|
:invoice-number #"Invoice\s([\w\./]+)*"
|
|
:total #"Total Invoice\s+([\-]?[0-9.]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]
|
|
:total [:trim-commas-and-negate nil]}}
|
|
|
|
|
|
|
|
;; SOUTHBAY FRESH
|
|
{:vendor "Southbay Fresh Produce"
|
|
:keywords [#"SOUTH BAY FRESH PRODUCE"]
|
|
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"To:[^\n]*\n\s+([A-Za-z' ]+)\s{2}"
|
|
:invoice-number #"INV #\/(\d+)"
|
|
:total #"\$([0-9.]+)\."}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]}
|
|
:multi #"\n"
|
|
:multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+INV "}
|
|
|
|
;; PFG - LEDYARD
|
|
{:vendor "Performance Food Group - LEDYARD"
|
|
:keywords [#"performancefoodservice"]
|
|
:extract {:date #"DELIVER TO[^\n]+\n.+?(?=[0-9]+/[0-9]+/[0-9]+)([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"DELIVER TO[^\n]+\n\s*[\S ]+?(?=\s{2,}([\S ]+?)\s{2,})" ;; ([\S ]+)\s{2,}
|
|
:invoice-number #"DELIVER TO[^\n]+\n.+?(?=\d+)(\d+)\s*\n"
|
|
:total #"([0-9.\-]+)\s+Status Code"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]
|
|
:total [:trim-commas-and-negate nil]}}
|
|
|
|
;; SOUTHERN GLAZER'S
|
|
{:vendor "Southern Glazers"
|
|
:keywords [#"Southern Glazer's"]
|
|
:extract {:date #"INVOICE DATE(?s:.*)(?= (?:[0-9]+/[0-9]+/[0-9]+)\s+([0-9]+/[0-9]+/[0-9]+)) "
|
|
:customer-identifier #"SOLD TO:(?:.*)(?=\n)\n(.*)(?=\s{2,})" ;; ([\S ]+)\s{2,}
|
|
:invoice-number #"INVOICE\n(?:.*?)(?=\d{4,})(\d+)"
|
|
:total #"PAY THIS AMOUNT(?s:.*)(?= ([0-9,]+\.[0-9]{2}))"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
;; GOLDEN BRANDS
|
|
{:vendor "Golden Brands San Jose"
|
|
:keywords [#"GOLDEN BRANDS"]
|
|
:extract {:date #"(?:.*\n){4}(.*)" ;; HOW TO GO TO SPCIFIC LINE
|
|
:customer-identifier #"Account:(?:.*\n)(.*(?=\s{2,}))"
|
|
:invoice-number #"Invoice#: (\d+)"
|
|
:total #"Invoice Total\s+([0-9,]+\.[0-9]{2})"}
|
|
:parser {:date [:clj-time "EEE MMM dd, yyyy HH:mm aa"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
;; GOLDEN BRANDS
|
|
{:vendor "Bigoli Fresh Pasta"
|
|
:keywords [#"bigolifreshpasta.com"]
|
|
:extract {:date #"INVOICE #.*?\n.*?([0-9]+/[0-9]+/[0-9]+)" ;; HOW TO GO TO SPCIFIC LINE
|
|
:customer-identifier #"BILL TO.*\n\s+(.*?)\s{2,}"
|
|
:invoice-number #"INVOICE #.*?\n(\d+)"
|
|
:total #" TOTAL\s+([0-9,]+\.[0-9]{2})"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
;; Del Monte Meats
|
|
{:vendor "Del Monte Meat Co"
|
|
:keywords [#"Del Monte"]
|
|
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Bill To:(?:.*?(?=\s{2,}))(.*)-"
|
|
:invoice-number #"Invoice\s+([0-9]+)"
|
|
:total #"([0-9\.,]+)$"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas nil]}
|
|
:multi #"\n"
|
|
:multi-match? #"^\s{1,2}[0-9]+"}
|
|
|
|
;; Royal Hawaiian
|
|
{:vendor "Royal Hawaiian"
|
|
:keywords [#"ROYAL HAWAIIAN"]
|
|
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"BILL TO.*\n\s+(.*?(?=(\s{2,}|\()))"
|
|
:invoice-number #"Invoice#:\s+([0-9]+)"
|
|
:total #"INV TOTAL:\s+([0-9\.,]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
;; AUTO-CHLOR
|
|
{:vendor "Auto-Chlor"
|
|
:keywords [#"AUTO-CHLOR"]
|
|
:extract {:date #"DATE : ([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"(.*?)\s{2,}.*CUSTOMER#"
|
|
:invoice-number #"INVOICE# :\s+([0-9]+)"
|
|
:total #"TOTAL SALE :\s+([0-9,.]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
;; Classic Wines
|
|
{:vendor "Classic Wines"
|
|
:keywords [#"585-9463"]
|
|
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Customer Id:.*\n.*\n.*\n(.*)"
|
|
:invoice-number #"^.{23}\s+(\w+)"
|
|
:total #"\$([0-9\.,]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]
|
|
:total [:trim-commas nil]}
|
|
:multi #"\n"
|
|
:multi-match? #"^\s+.*?\d{6,}.*?\$"}
|
|
|
|
|
|
;; C & L
|
|
{:vendor "C&L Produce"
|
|
:keywords [#"440 Franklin Street"]
|
|
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Bill To.*\n.*\n\s+(.*?)\s{2,}"
|
|
:invoice-number #"Invoice #.*\n.*\n.*?(\d{5,})\n"
|
|
:total #"Total\s+\$([0-9\.,]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
{:vendor "General Produce Company"
|
|
:keywords [#"generalproduce.com"]
|
|
:extract {:date #"INVOICE DATE.*\n.*?([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"SHIP TO.*\n.*\n\s+(.*?)\s{2,}"
|
|
:invoice-number #"INVOICE NO.*\n.*?(\d{7,}?)\s+"
|
|
:total #"TOTAL\s+([\-0-9,]+\.[0-9]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]
|
|
:total [:trim-commas-and-negate nil]}}
|
|
|
|
;; Young's Market Co new statement
|
|
{:vendor "Youngs Market"
|
|
:keywords [#"(YOUNG'S MARKET COMPANY|Young.*Statement)"]
|
|
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Customer Name +([\w ]+)"
|
|
:invoice-number #"\s{2,}([0-9]+)"
|
|
:total #"\$?([0-9,]+\.[0-9]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas nil]}
|
|
:multi #"\n"
|
|
:multi-match? #"^[0-9]+.*\$?([0-9,]+\.[0-9]+).*\$?([0-9,]+\.[0-9]+)"}
|
|
|
|
;; Young's Market Co - INVOICE
|
|
{:vendor "Youngs Market"
|
|
:keywords [#"P.O.Box 743564"]
|
|
:extract {:date #"(?:INVOICE|CREDIT) DATE\n(?:.*?)(\S+)\n"
|
|
:customer-identifier #"(?:INVOICE|CREDIT) DATE\n [0-9]+\s+(.*?)\s{2,}"
|
|
:invoice-number #"(?:INVOICE|CREDIT) DATE\n(?:.*?)\s{2,}(\d+?)\s+\S+\n"
|
|
:total #"Net Amount(?:.*\n){4}(?:.*?)([\-]?[0-9\.]+)\n"}
|
|
:parser {:date [:clj-time "dd-MMM-yy"]
|
|
:total [:trim-commas-and-negate nil]}}
|
|
|
|
;; WINE WAREHOUSE
|
|
{:vendor "Wine Warehouse"
|
|
:keywords [#"WINE WAREHOUSE" #"Bottle prices include"]
|
|
:extract {:date #"INVOICE DATE\s+([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"SHIP-TO-PARTY.*\n(.*?)(?=\s{2,})"
|
|
:invoice-number #"INV #\s+(\d+)"
|
|
:total #"PLEASE PAY THIS AMOUNT\s+([0-9]+\.[0-9]{2})"
|
|
:account-number #"CUSTOMER NUMBER\s+(\d+)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
;; WINE WAREHOUSE 3
|
|
{:vendor "Wine Warehouse"
|
|
:keywords [#"Wine Warehouse" #"PLEASE APPLY CREDIT"]
|
|
:extract {:date #"Credit Memo Number/Date\s+\d+\s+([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Ship-To-Party.*\n(.*?)\s{2,}"
|
|
:invoice-number #"Credit Memo Number/Date\s+(\d+)"
|
|
:total #"Total\s+([0-9]+\.[0-9]{2}-)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas-and-negate nil]}}
|
|
|
|
;; WINE WAREHOUSE 2
|
|
{:vendor "Wine Warehouse"
|
|
:keywords [#"WINE WAREHOUSE" #"Bill-to-Party"]
|
|
:extract {:date #"Invoice date\s+([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Bill-to-Party.*\n(.*?)\s{2,}"
|
|
:invoice-number #"Invoice number\s+(\d+)"
|
|
:total #"Gross\s+([0-9]+\.[0-9]{2})"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
;; REGAL
|
|
{:vendor "Regal Wine Co"
|
|
:keywords [#"REGAL WINE"]
|
|
:extract {:date #"INVOICE DATE.*\n\n(?:.*?)([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"INVOICE\n(.*?)\s{2,}"
|
|
:invoice-number #"INVOICE NUMBER.*\n\n(?:.*?)(\d+)"
|
|
:total #"Total Amount Due(?:.*?)([0-9,]+\.[0-9]{2})"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
;; ALSCO
|
|
{:vendor "Alsco"
|
|
:keywords [#"Alsco"]
|
|
:extract {:date #"Invoice Date:\s+(.*)"
|
|
:customer-identifier #"Invoice F o r(?:.*?)\n\s+(.*?)\s{2,}"
|
|
:invoice-number #" (\S+)\n\s+Invoice Date"
|
|
:total #"Invoice Total\s+\$([0-9,]+\.[0-9]{2})"}
|
|
:parser {:date [:clj-time "MMM dd yyyy"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
;; SUNCREST
|
|
{:vendor "Suncrest USA Inc"
|
|
:keywords [#"Suncrest.*Invoice"]
|
|
:extract {:date #"Date.*\n\s*\n(?:.*?)([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Bill To(?:.*?)\n\n(.*?)\s{2,}"
|
|
:invoice-number #"Invoice #.*\n\s*\n.*? (\d{3,})"
|
|
:total #"Balance Due\s+\$([0-9,]+\.[0-9]{2})"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
|
|
|
|
;; PACIFIC SEAFOOD
|
|
{:vendor "Pacific Seafood"
|
|
:keywords [#"(pacseafood|PACIFIC FRESH)"]
|
|
:extract {:date #"DATE(?:.*\n.*(?=([0-9]+/[0-9]+/[0-9]+)))([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"DELIVER TO:(?:.*\n)(.*?)(?=\s{2})"
|
|
:invoice-number #"INVOICE NO\.\n(?:.*?(?= [0-9]+\n)) ([0-9]+)"
|
|
:total #"TOTAL\n\s+([0-9,\.]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
;; P&R
|
|
{:vendor "P & R PAPER SUPPLY CO"
|
|
:keywords [#"PAPER SUPPLY COMPANY"]
|
|
:extract {:date #"Invoiced.*\n\s+\S+\s+(\S+)"
|
|
:customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}"
|
|
:invoice-number #"Invoice#.*\n.*\n.*?(\S+)\s+\d+\n"
|
|
:total #"INVOICE TOTAL\s+([\-]?[\d,]+\.\d+)"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]
|
|
:total [:trim-commas-and-negate nil]}}
|
|
|
|
;; SUNCREST STATEMENT
|
|
{:vendor "Suncrest USA Inc"
|
|
:keywords [#"Suncrest.*\n.*Statement"]
|
|
:extract {:date #"^([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"To:(?:.*?)\n\s*(.*?)\s{2,}"
|
|
:invoice-number #"INV #(\d+)"
|
|
:total #"Orig\. Amount \$([0-9,]+\.[0-9]{2})"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas nil]}
|
|
:multi #"\n"
|
|
:multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+INV "}
|
|
|
|
;; US FOODS
|
|
{:vendor "US Foods"
|
|
:keywords [#"US Foods"]
|
|
:extract {:date #"INVOICE DATE.*\n+.*?(?=([0-9]+/[0-9]+/[0-9]+))([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"BILL TO[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,}
|
|
:invoice-number #"INVOICE NUMBER[^\n]+\n\n\d+\s+(\d+)"
|
|
:total #"(?:DELIVERED AMOUNT|PLEASE REMIT|AMOUNT).*?\$([0-9.,]+( CR)?)\n"}
|
|
:parser {:date [:clj-time ["MM/dd/yyyy" "yyyy/MM/dd"]]
|
|
:total [:trim-commas-and-negate nil]}}
|
|
|
|
;; SYSCO
|
|
{:vendor "Sysco"
|
|
:keywords [#"SYSCO"]
|
|
:extract {:date #"INVOICE NUMBER[^\n]+\n([^\n]+)\n"
|
|
:customer-identifier #"INVOICE NUMBER[^\n]+\n[^\n]+\n([\S ]+?)(?=\s{2,})" ;; ([\S ]+)\s{2,}
|
|
:invoice-number #"INVOICE NUMBER[^\n]+\n[^\n]+\n.*?(?=[\d]{9})(\d{9})"
|
|
:total #"\s{2,}INVOICE\s{2,}.*?(?=TOTAL)TOTAL\s+([0-9.]+[\-]?)"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]
|
|
:total [:trim-commas-and-negate nil]}}
|
|
|
|
;; LE BOULANGER
|
|
{:vendor "Le Boulanger"
|
|
:keywords [#"Le Boulanger"]
|
|
:extract {:date #"Invoice Date: ([^\n]+)\n"
|
|
:customer-identifier #"Ship to\n+\s+([\S ]+?)(?=\s{2,})"
|
|
:invoice-number #"Invoice No: ([^\n]+)\n"
|
|
:total #" Total:\s+([\d\.]+)"}
|
|
:parser {:date [:clj-time "MMM dd, yyyy"]}}
|
|
|
|
|
|
;; A&B
|
|
{:vendor "A&B Produce"
|
|
:keywords [#"ABProduce"]
|
|
:extract {:date #"^\s+([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"BILL TO:[^\n]+\n[^\n]+\n[^\n]+\n(.*)\s{2,}"
|
|
:invoice-number #"(\d+)\s+(?:INV|C/M)"
|
|
:total #" (?:INV|C/M)\s+([\d\.\-]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas-and-negate nil]}
|
|
:multi #"\n"
|
|
:multi-match? #"^\s+[0-9]+/[0-9]+/[0-9]+\s+\d+\s+(INV|C/M)\s+"}
|
|
|
|
;; A&B Invoice
|
|
{:vendor "A&B Produce"
|
|
:keywords [#"415-656-0254"]
|
|
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"BILL TO.*\n.*?\s{2,}(.*?)\s{2,}"
|
|
:invoice-number #"INVOICE#\n.*?(\d+)\n"
|
|
:total #"INV TOTAL \s+([\d\.\-]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas-and-negate nil]}}
|
|
|
|
;; Ocean Queen
|
|
{:vendor "Ocean Queen"
|
|
:keywords [#"Ocean Queen USA"]
|
|
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Bill To.*\n\s*(.*?)\s{2,}"
|
|
:invoice-number #"Invoice #\n.*\n.*?(\d+)\n"
|
|
:total #"Total\s+\$([\d\.,\-]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas-and-negate nil]}}
|
|
|
|
;; CHEF's CHOICE
|
|
{:vendor "Chef's Choice Produce Co"
|
|
:keywords [#"(2170 MARTIN AVENUE|213-3886)"]
|
|
:extract {:date #"([0-9/]{10,10})"
|
|
:customer-identifier #"\n B\s+([\S ]+?)(?=\s{2,}I) "
|
|
:invoice-number #"^0*([0-9]+)"
|
|
:total #"INVOICE\s+([\d\.]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]} ;; may want to try two approaches [:clj-time ["MM/dd/yyyy" "MM1dd1yyyy"]]
|
|
:multi #"\n"
|
|
:multi-match? #"\s+INVOICE\s+"}
|
|
|
|
;; FRESH AND BELT
|
|
{:vendor "Fresh and Best Produce"
|
|
:keywords [#"freshbestproduce"]
|
|
:extract {:date #"\n\s+([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Bill To[^\n]+\n([A-Za-z ']+)"
|
|
:invoice-number #"\n\s+[0-9/]+\s+(\d+)"
|
|
:total #"Balance Due\s+\$([0-9\.]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]}}
|
|
|
|
;; PFG - ROMA LOOK 1
|
|
{:vendor "Performance Food Group - ROMA"
|
|
:keywords [#"inquiries call 1-800-233-6211"]
|
|
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})"
|
|
:invoice-number #"^\s+([\dA-Z]+)"
|
|
:total #"([\d\.,\-]+\.[\d\-]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas-and-negate nil]}
|
|
:multi #"\n"
|
|
:multi-match? #"^\s+[\d]{6,8}\s+\d+"}
|
|
|
|
;; ACME BREAD
|
|
{:vendor "Acme Bread"
|
|
:keywords [#"acmebread\.com"]
|
|
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Print Date.*\n.*\n(.*)"
|
|
:invoice-number #"^\s*(\d+)"
|
|
:total #"\s{2,}(\d+\.\d{2})\s{2,}"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas-and-negate nil]}
|
|
:multi #"\n"
|
|
:multi-match? #"^\s*\d+\s+([0-9]+/[0-9]+/[0-9]+)"}
|
|
|
|
;; PFG - ROMA
|
|
{:vendor "Performance Food Group - ROMA"
|
|
:keywords [#"Performance Food Group, Inc\n\f"]
|
|
:extract {:date #"Date: ([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"BILL TO:\s+([\S ]+?)(?=\s{2,})"
|
|
:invoice-number #"INVOICE NO.\s+ ([\d]+)"
|
|
:total #"([\d\.,]+)\s+INVOICE TOTAL"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]
|
|
:total [:trim-commas nil]}}
|
|
|
|
|
|
|
|
;; JFC
|
|
{:vendor "JFC International"
|
|
:keywords [#"48490 MILMONT DRIVE"]
|
|
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"SOLD\s+([\S ]+?)(?=(\s{2,}|\n))"
|
|
:invoice-number #"(\S+)\s+(?=[0-9]+/[0-9]+/[0-9]+)"
|
|
:total #"(?:INVOICE|TOTAL|CREDIT)\s+([\d\.,\-]+\.[\d\-]+( CR)?)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas-and-negate nil]}}
|
|
|
|
;; ROMA BAKERY
|
|
{:vendor "Roma Bakery Inc."
|
|
:keywords [#"Roma Bakery Inc"]
|
|
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}"
|
|
:invoice-number #"Invoice (\d+)"
|
|
:total #"Total\s+([\d\-\.]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]
|
|
:total [:trim-commas-and-negate nil]}}
|
|
|
|
;; KAEL FOODS
|
|
{:vendor "Kael Foods"
|
|
:keywords [#"kaelfoods.com"]
|
|
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Bill To.*\n\s+(.*?)\s{2,}"
|
|
:invoice-number #"INVOICE 0*(\d+)"
|
|
:total #"TOTAL:\s+\$([\d\-\.,]+)"}
|
|
:parser {:date [:clj-time "MM/dd/yyyy"]
|
|
:total [:trim-commas-and-negate nil]}}
|
|
|
|
;; Starter Bakery
|
|
{:vendor "Starter Bakery"
|
|
:keywords [#"starterbakery.com"]
|
|
:extract {:date #"INVOICE DATE:\s+(.*?)\s{2,}"
|
|
:customer-identifier #"BILL TO:.*\n\s+(.*?)\s{2,}"
|
|
:invoice-number #"Invoice.*?(\d+)"
|
|
:total #"Total:.*?([\d\-,]+\.\d{2,2}+)"}
|
|
:parser {:date [:clj-time "MMMM dd, yyyy"]
|
|
:total [:trim-commas-and-negate nil]}}
|
|
|
|
;; Trimark
|
|
{:vendor "TriMark R.W. Smith"
|
|
:keywords [#"TriMark"]
|
|
:extract {:date #"([0-9]+/[0-9]+/[0-9]+)"
|
|
:customer-identifier #"Bill To\s+(.*?)\s{2,}"
|
|
:invoice-number #"Invoice #\n.*?([\d\-]+)\n"
|
|
:total #"Invoice Total\s+([\d\-,]+\.\d{2,2}+)"}
|
|
:parser {:date [:clj-time "MM/dd/yy"]
|
|
:total [:trim-commas-and-negate nil]}}])
|
|
|
|
(defn offset [c x y]
|
|
(.toString (CellAddress. (+ y (.getRow (.getAddress c))) (+ x (.getColumn (.getAddress c))) )))
|
|
|
|
(def excel-templates
|
|
[{:vendor "Isp Productions"
|
|
:keywords [#"ISP PRODUCTIONS"]
|
|
:extract {:customer-identifier [#"SERVICES PROVIDED TO" 1 0]
|
|
:total [#"PAY THIS" -1 0]
|
|
:date [#"INVOICE DATE" 0 1]
|
|
:invoice-number [#"INVOICE NUMBER" 0 1]}}
|
|
{:vendor "Southern Glazers"
|
|
:keywords [#"Please note that the total invoice amount may"]
|
|
:extract {:customer-identifier [#"Customer #" 1 0]
|
|
:total [#"Subtotal" 0 16 ]
|
|
:date [#"Date" 0 0 #"Date: (.*)"]
|
|
:invoice-number [#"Invoice #" 0 0 #"Invoice #: (.*)"]
|
|
:account-number [#"Customer #" 0 0 #"Customer #: (.*)"]}
|
|
:parser { :total [:trim-commas-and-remove-dollars nil]
|
|
:date [:clj-time "MM/dd/yyyy"]}}
|
|
{:vendor "Mama Lu's Foods"
|
|
:keywords [#"Mama Lu's Foods"]
|
|
:extract (fn [wb vendor]
|
|
(let [[sheet] (d/sheet-seq wb)]
|
|
(transduce (comp
|
|
(drop 5)
|
|
(filter
|
|
(fn [r]
|
|
(and
|
|
r
|
|
(->> r d/cell-seq second d/read-cell))))
|
|
(map
|
|
(fn [r]
|
|
(let [[_ customer-order-number num date name amount] (map d/read-cell (d/cell-seq r))]
|
|
{:customer-identifier (second (re-find #"([^:]*):" name))
|
|
:text name
|
|
:full-text name
|
|
:date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim date))
|
|
:invoice-number (str customer-order-number "-" (int num))
|
|
:total (str amount)
|
|
:vendor-code vendor}))))
|
|
conj
|
|
[]
|
|
(d/row-seq sheet))))}
|
|
{:vendor "DVW Commercial"
|
|
:keywords [#"Total for" #"Num"]
|
|
:extract (fn [wb vendor]
|
|
(let [[sheet] (d/sheet-seq wb)]
|
|
(transduce (comp (filter (fn [c]
|
|
(re-find #"Invoice" (str (d/read-cell c)))))
|
|
(map (fn [c]
|
|
(let [customer-identifier (d/read-cell (->> (d/select-cell (offset c -3 0) sheet)
|
|
(iterate (fn [c]
|
|
(d/select-cell (offset c 0 -1) sheet)))
|
|
(filter (fn [c]
|
|
(not (str/blank? (d/read-cell c)))))
|
|
first))]
|
|
{:customer-identifier customer-identifier
|
|
:text customer-identifier
|
|
:full-text customer-identifier
|
|
:date (d/read-cell (d/select-cell (offset c 2 0) sheet))
|
|
:invoice-number (d/read-cell (d/select-cell (offset c 4 0) sheet))
|
|
:total (str (d/read-cell (d/select-cell (offset c 8 0) sheet)))
|
|
:vendor-code vendor}))))
|
|
conj
|
|
[]
|
|
(d/cell-seq sheet))))}
|
|
{:vendor "Chef's Choice Produce Co"
|
|
:keywords [#"Alt_invoice_number"]
|
|
:extract (fn [wb vendor]
|
|
(let [[sheet] (d/sheet-seq wb)]
|
|
(transduce (comp
|
|
(drop-while (fn [c]
|
|
(not (re-find #"Customer_id" (str (d/read-cell c))))))
|
|
(drop 9)
|
|
(filter (fn [c]
|
|
(= 0 (.getColumnIndex c))))
|
|
(filter (fn [c]
|
|
(not (str/blank? (str/trim (or (d/read-cell (d/select-cell (offset c 1 0) sheet)) ""))))))
|
|
(map (fn [c]
|
|
{:customer-identifier (str/trim (d/read-cell (d/select-cell (offset c 1 0) sheet)))
|
|
:text (d/read-cell (d/select-cell (offset c 1 0) sheet))
|
|
:full-text (d/read-cell (d/select-cell (offset c 1 0) sheet))
|
|
:date (u/parse-value :clj-time "MM/dd/yyyy" (str/trim (d/read-cell (d/select-cell (offset c 5 0) sheet))))
|
|
:invoice-number (->>
|
|
(re-find #"^(?:0+([A-Z0-9]+))|([A-Z]+[A-Z0-9]+)" (str/trim (d/read-cell (d/select-cell (offset c 2 0) sheet))))
|
|
(drop 1 )
|
|
(filter identity)
|
|
first)
|
|
:total (str (d/read-cell (d/select-cell (offset c 7 0) sheet)))
|
|
:vendor-code vendor}))
|
|
(filter :customer-identifier))
|
|
conj
|
|
[]
|
|
(d/cell-seq sheet))))}])
|