Validates existing template correctly parses multi-page invoice with: - Invoice number 03882095 - Customer identifier NICK THE GREEK - Account number 600 VISTA WAY - Total of $946.24
73 lines
3.8 KiB
Clojure
73 lines
3.8 KiB
Clojure
(ns auto-ap.parse.templates-test
|
|
(:require [auto-ap.parse :as sut]
|
|
[clojure.test :refer [deftest is testing]]
|
|
[clojure.java.io :as io]
|
|
[clojure.string :as str]
|
|
[clj-time.core :as time]))
|
|
|
|
(deftest parse-bonanza-produce-invoice-03881260
|
|
(testing "Should parse Bonanza Produce invoice 03881260 with customer identifier including address"
|
|
(let [pdf-file (io/file "dev-resources/INVOICE - 03881260.pdf")
|
|
;; Extract text same way parse-file does
|
|
pdf-text (:out (clojure.java.shell/sh "pdftotext" "-layout" (str pdf-file) "-"))
|
|
results (sut/parse pdf-text)
|
|
result (first results)]
|
|
(is (some? results) "parse should return a result")
|
|
(is (some? result) "Template should match and return a result")
|
|
(when result
|
|
(println "DEBUG: customer-identifier =" (pr-str (:customer-identifier result)))
|
|
(println "DEBUG: account-number =" (pr-str (:account-number result)))
|
|
(is (= "Bonanza Produce" (:vendor-code result)))
|
|
(is (= "03881260" (:invoice-number result)))
|
|
;; Date is parsed as org.joda.time.DateTime - compare year/month/day
|
|
(let [d (:date result)]
|
|
(is (= 2026 (time/year d)))
|
|
(is (= 1 (time/month d)))
|
|
(is (= 20 (time/day d))))
|
|
;; Customer identifier includes name, account-number includes street address
|
|
;; Together they form the full customer identification
|
|
(is (= "NICK THE GREEK" (:customer-identifier result)))
|
|
(is (= "600 VISTA WAY" (str/trim (:account-number result))))
|
|
(is (= "NICK THE GREEK 600 VISTA WAY"
|
|
(str (:customer-identifier result) " " (str/trim (:account-number result)))))
|
|
;; Total is parsed as string, not number (per current behavior)
|
|
(is (= "23.22" (:total result)))))))
|
|
|
|
(deftest parse-bonanza-produce-statement-13595522
|
|
(testing "Should parse Bonanza Produce statement 13595522 with multiple invoices"
|
|
(let [pdf-file (io/file "dev-resources/13595522.pdf")
|
|
pdf-text (:out (clojure.java.shell/sh "pdftotext" "-layout" (str pdf-file) "-"))
|
|
results (sut/parse pdf-text)]
|
|
(is (some? results) "parse should return results")
|
|
(is (= 4 (count results)) "Should parse 4 invoices from statement")
|
|
(doseq [result results]
|
|
(is (= "Bonanza Produce" (:vendor-code result)))
|
|
(is (= "600 VISTA WAY" (:customer-identifier result))))
|
|
(is (= "03876838" (:invoice-number (nth results 0))))
|
|
(is (= "03877314" (:invoice-number (nth results 1))))
|
|
(is (= "03878619" (:invoice-number (nth results 2))))
|
|
(is (= "03879035" (:invoice-number (nth results 3))))
|
|
(is (= "891.65" (:total (nth results 0))))
|
|
(is (= "720.33" (:total (nth results 1))))
|
|
(is (= "853.16" (:total (nth results 2))))
|
|
(is (= "1066.60" (:total (nth results 3)))))))
|
|
|
|
(deftest parse-bonanza-produce-invoice-03882095
|
|
(testing "Should parse Bonanza Produce invoice 03882095 with customer identifier including address"
|
|
(let [pdf-file (io/file "dev-resources/INVOICE - 03882095.pdf")
|
|
pdf-text (:out (clojure.java.shell/sh "pdftotext" "-layout" (str pdf-file) "-"))
|
|
results (sut/parse pdf-text)
|
|
result (first results)]
|
|
(is (some? results) "parse should return a result")
|
|
(is (some? result) "Template should match and return a result")
|
|
(when result
|
|
(is (= "Bonanza Produce" (:vendor-code result)))
|
|
(is (= "03882095" (:invoice-number result)))
|
|
(let [d (:date result)]
|
|
(is (= 2026 (time/year d)))
|
|
(is (= 1 (time/month d)))
|
|
(is (= 23 (time/day d))))
|
|
(is (= "NICK THE GREEK" (:customer-identifier result)))
|
|
(is (= "600 VISTA WAY" (str/trim (:account-number result))))
|
|
(is (= "946.24" (:total result)))))))
|