Improve Bonanza Produce customer identifier extraction

- Extract customer name in customer-identifier field
- Extract street address in account-number field
- Use non-greedy regex with lookahead to capture clean values
- Update test to verify both name and address extraction

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2026-02-07 10:10:35 -08:00
parent 37351e5f92
commit d95e24a1d7
2 changed files with 7 additions and 2 deletions

View File

@@ -758,7 +758,8 @@
:keywords [#"530-544-4136"]
:extract {:invoice-number #"NO\s+(\d{8,})\s+\d{2}/\d{2}/\d{2}"
:date #"NO\s+\d{8,}\s+(\d{2}/\d{2}/\d{2})"
:customer-identifier #"I\s+(NICK\s+THE\s+GREEK)"
:customer-identifier #"(?s)I\s+([A-Z][A-Z\s]+?)\s{2,}.*?L\s+([0-9][A-Z0-9\s]+)"
:account-number #"(?s)L\s+([0-9][0-9A-Z\s]+?)(?=\n|\s{2,})"
:total #"SHIPPED\s+[\d\.]+\s+TOTAL\s+([\d\.]+)"}
:parser {:date [:clj-time "MM/dd/yy"]
:total [:trim-commas nil]}}])

View File

@@ -2,6 +2,7 @@
(:require [auto-ap.parse :as sut]
[clojure.test :refer [deftest is testing]]
[clojure.java.io :as io]
[clojure.string :as str]
[clj-time.core :as time]))
(deftest parse-bonanza-produce-invoice-03881260
@@ -14,6 +15,8 @@
(is (some? results) "parse should return a result")
(is (some? result) "Template should match and return a result")
(when result
(println "DEBUG: customer-identifier =" (pr-str (:customer-identifier result)))
(println "DEBUG: account-number =" (pr-str (:account-number result)))
(is (= "Bonanza Produce" (:vendor-code result)))
(is (= "03881260" (:invoice-number result)))
;; Date is parsed as org.joda.time.DateTime - compare year/month/day
@@ -21,7 +24,8 @@
(is (= 2026 (time/year d)))
(is (= 1 (time/month d)))
(is (= 20 (time/day d))))
;; Customer identifier includes name for now (address extraction can be enhanced)
;; Customer identifier should include name and address
(is (= "NICK THE GREEK" (:customer-identifier result)))
(is (= "600 VISTA WAY" (str/trim (:account-number result))))
;; Total is parsed as string, not number (per current behavior)
(is (= "23.22" (:total result)))))))