From d95e24a1d751881ebc8907d76e61e668b13b8b22 Mon Sep 17 00:00:00 2001 From: Bryce Date: Sat, 7 Feb 2026 10:10:35 -0800 Subject: [PATCH] Improve Bonanza Produce customer identifier extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract customer name in customer-identifier field - Extract street address in account-number field - Use non-greedy regex with lookahead to capture clean values - Update test to verify both name and address extraction 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/clj/auto_ap/parse/templates.clj | 3 ++- test/clj/auto_ap/parse/templates_test.clj | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/clj/auto_ap/parse/templates.clj b/src/clj/auto_ap/parse/templates.clj index c342c57c..a13404cf 100644 --- a/src/clj/auto_ap/parse/templates.clj +++ b/src/clj/auto_ap/parse/templates.clj @@ -758,7 +758,8 @@ :keywords [#"530-544-4136"] :extract {:invoice-number #"NO\s+(\d{8,})\s+\d{2}/\d{2}/\d{2}" :date #"NO\s+\d{8,}\s+(\d{2}/\d{2}/\d{2})" - :customer-identifier #"I\s+(NICK\s+THE\s+GREEK)" + :customer-identifier #"(?s)I\s+([A-Z][A-Z\s]+?)\s{2,}.*?L\s+([0-9][A-Z0-9\s]+)" + :account-number #"(?s)L\s+([0-9][0-9A-Z\s]+?)(?=\n|\s{2,})" :total #"SHIPPED\s+[\d\.]+\s+TOTAL\s+([\d\.]+)"} :parser {:date [:clj-time "MM/dd/yy"] :total [:trim-commas nil]}}]) diff --git a/test/clj/auto_ap/parse/templates_test.clj b/test/clj/auto_ap/parse/templates_test.clj index 77715aa0..7f656383 100644 --- a/test/clj/auto_ap/parse/templates_test.clj +++ b/test/clj/auto_ap/parse/templates_test.clj @@ -2,6 +2,7 @@ (:require [auto-ap.parse :as sut] [clojure.test :refer [deftest is testing]] [clojure.java.io :as io] + [clojure.string :as str] [clj-time.core :as time])) (deftest parse-bonanza-produce-invoice-03881260 @@ -14,6 +15,8 @@ (is (some? results) "parse should return a result") (is (some? result) "Template should match and return a result") (when result + (println "DEBUG: customer-identifier =" (pr-str (:customer-identifier result))) + (println "DEBUG: account-number =" (pr-str (:account-number result))) (is (= "Bonanza Produce" (:vendor-code result))) (is (= "03881260" (:invoice-number result))) ;; Date is parsed as org.joda.time.DateTime - compare year/month/day @@ -21,7 +24,8 @@ (is (= 2026 (time/year d))) (is (= 1 (time/month d))) (is (= 20 (time/day d)))) - ;; Customer identifier includes name for now (address extraction can be enhanced) + ;; Customer identifier should include name and address (is (= "NICK THE GREEK" (:customer-identifier result))) + (is (= "600 VISTA WAY" (str/trim (:account-number result)))) ;; Total is parsed as string, not number (per current behavior) (is (= "23.22" (:total result)))))))