diff --git a/.claude/skills/invoice-template-creator/SKILL.md b/.claude/skills/invoice-template-creator/SKILL.md index 2afd24a0..c156cbab 100644 --- a/.claude/skills/invoice-template-creator/SKILL.md +++ b/.claude/skills/invoice-template-creator/SKILL.md @@ -146,7 +146,7 @@ The `(?s)` flag makes `.` match newlines. Use non-greedy `+?` and lookaheads `(? ## Testing Best Practices -1. **Start with a failing test** - Define expected values before implementing +1. IMPORTANT, CRITICAL!! **Start with a failing test** - Define expected values before implementing 2. **Test actual PDF parsing** - Use `parse-file` or `parse` with real PDF text 3. **Verify each field individually** - Separate assertions for clarity 4. **Handle date comparisons carefully** - Compare year/month/day separately if needed diff --git a/.opencode/skills/clojure-eval/SKILL.md b/.opencode/skills/clojure-eval/SKILL.md new file mode 100644 index 00000000..36d58650 --- /dev/null +++ b/.opencode/skills/clojure-eval/SKILL.md @@ -0,0 +1,174 @@ +--- +name: clojure-eval +description: Evaluate Clojure code via nREPL using clj-nrepl-eval. Use this when you need to test code, check if edited files compile, verify function behavior, or interact with a running REPL session. +--- + +# Clojure REPL Evaluation + +## When to Use This Skill + +Use this skill when you need to: +- **Verify that edited Clojure files compile and load correctly** +- Test function behavior interactively +- Check the current state of the REPL +- Debug code by evaluating expressions +- Require or load namespaces for testing +- Validate that code changes work before committing + +## How It Works + +The `clj-nrepl-eval` command evaluates Clojure code against an nREPL server. **Session state persists between evaluations**, so you can require a namespace in one evaluation and use it in subsequent calls. Each host:port combination maintains its own session file. + +## Instructions + +### 0. Discover and select nREPL server + +First, discover what nREPL servers are running in the current directory: + +```bash +clj-nrepl-eval --discover-ports +``` + +This will show all nREPL servers (Clojure, Babashka, shadow-cljs, etc.) running in the current project directory. + +**Then use the AskUserQuestion tool:** + +- **If ports are discovered:** Prompt user to select which nREPL port to use: + - **question:** "Which nREPL port would you like to use?" + - **header:** "nREPL Port" + - **options:** Present each discovered port as an option with: + - **label:** The port number + - **description:** The server type and status (e.g., "Clojure nREPL server in current directory") + - Include up to 4 discovered ports as options + - The user can select "Other" to enter a custom port number + +- **If no ports are discovered:** Prompt user how to start an nREPL server: + - **question:** "No nREPL servers found. How would you like to start one?" + - **header:** "Start nREPL" + - **options:** + - **label:** "deps.edn alias", **description:** "Find and use an nREPL alias in deps.edn" + - **label:** "Leiningen", **description:** "Start nREPL using 'lein repl'" + - The user can select "Other" for alternative methods or if they already have a server running on a specific port + +IMPORTANT: IF you start a REPL do not supply a port let the nREPL start and return the port that it was started on. + +### 1. Evaluate Clojure Code + +> Evaluation automatically connects to the given port + +Use the `-p` flag to specify the port and pass your Clojure code. + +**Recommended: Pass code as a command-line argument:** +```bash +clj-nrepl-eval -p "(+ 1 2 3)" +``` + +**For multiple expressions (single line):** +```bash +clj-nrepl-eval -p "(def x 10) (+ x 20)" +``` + +**Alternative: Using heredoc (may require permission approval for multiline commands):** +```bash +clj-nrepl-eval -p <<'EOF' +(def x 10) +(+ x 20) +EOF +``` + +**Alternative: Via stdin pipe:** +```bash +echo "(+ 1 2 3)" | clj-nrepl-eval -p +``` + +### 2. Display nREPL Sessions + +**Discover all nREPL servers in current directory:** +```bash +clj-nrepl-eval --discover-ports +``` +Shows all running nREPL servers in the current project directory, including their type (clj/bb/basilisp) and whether they match the current working directory. + +**Check previously connected sessions:** +```bash +clj-nrepl-eval --connected-ports +``` +Shows only connections you have made before (appears after first evaluation on a port). + +### 3. Common Patterns + +**Require a namespace (always use :reload to pick up changes):** +```bash +clj-nrepl-eval -p "(require '[my.namespace :as ns] :reload)" +``` + +**Test a function after requiring:** +```bash +clj-nrepl-eval -p "(ns/my-function arg1 arg2)" +``` + +**Check if a file compiles:** +```bash +clj-nrepl-eval -p "(require 'my.namespace :reload)" +``` + +**Multiple expressions:** +```bash +clj-nrepl-eval -p "(def x 10) (* x 2) (+ x 5)" +``` + +**Complex multiline code (using heredoc):** +```bash +clj-nrepl-eval -p <<'EOF' +(def x 10) +(* x 2) +(+ x 5) +EOF +``` +*Note: Heredoc syntax may require permission approval.* + +**With custom timeout (in milliseconds):** +```bash +clj-nrepl-eval -p --timeout 5000 "(long-running-fn)" +``` + +**Reset the session (clears all state):** +```bash +clj-nrepl-eval -p --reset-session +clj-nrepl-eval -p --reset-session "(def x 1)" +``` + +## Available Options + +- `-p, --port PORT` - nREPL port (required) +- `-H, --host HOST` - nREPL host (default: 127.0.0.1) +- `-t, --timeout MILLISECONDS` - Timeout (default: 120000 = 2 minutes) +- `-r, --reset-session` - Reset the persistent nREPL session +- `-c, --connected-ports` - List previously connected nREPL sessions +- `-d, --discover-ports` - Discover nREPL servers in current directory +- `-h, --help` - Show help message + +## Important Notes + +- **Prefer command-line arguments:** Pass code as quoted strings: `clj-nrepl-eval -p "(+ 1 2 3)"` - works with existing permissions +- **Heredoc for complex code:** Use heredoc (`<<'EOF' ... EOF`) for truly multiline code, but note it may require permission approval +- **Sessions persist:** State (vars, namespaces, loaded libraries) persists across invocations until the nREPL server restarts or `--reset-session` is used +- **Automatic delimiter repair:** The tool automatically repairs missing or mismatched parentheses +- **Always use :reload:** When requiring namespaces, use `:reload` to pick up recent changes +- **Default timeout:** 2 minutes (120000ms) - increase for long-running operations +- **Input precedence:** Command-line arguments take precedence over stdin + +## Typical Workflow + +1. Discover nREPL servers: `clj-nrepl-eval --discover-ports` +2. Use **AskUserQuestion** tool to prompt user to select a port +3. Require namespace: + ```bash + clj-nrepl-eval -p "(require '[my.ns :as ns] :reload)" + ``` +4. Test function: + ```bash + clj-nrepl-eval -p "(ns/my-fn ...)" + ``` +5. Iterate: Make changes, re-require with `:reload`, test again + diff --git a/.opencode/skills/clojure-eval/examples.md b/.opencode/skills/clojure-eval/examples.md new file mode 100644 index 00000000..003b5aeb --- /dev/null +++ b/.opencode/skills/clojure-eval/examples.md @@ -0,0 +1,82 @@ +# clj-nrepl-eval Examples + +## Discovery + +```bash +clj-nrepl-eval --connected-ports +``` + +## Heredoc for Multiline Code + +```bash +clj-nrepl-eval -p 7888 <<'EOF' +(defn greet [name] + (str "Hello, " name "!")) + +(greet "Claude") +EOF +``` + +### Heredoc Simplifies String Escaping + +Heredoc avoids shell escaping issues with quotes, backslashes, and special characters: + +```bash +# With heredoc - no escaping needed +clj-nrepl-eval -p 7888 <<'EOF' +(def regex #"\\d{3}-\\d{4}") +(def message "She said \"Hello!\" and waved") +(def path "C:\\Users\\name\\file.txt") +(println message) +EOF + +# Without heredoc - requires complex escaping +clj-nrepl-eval -p 7888 "(def message \"She said \\\"Hello!\\\" and waved\")" +``` + +## Working with Project Namespaces + +```bash +# Test a function after requiring +clj-nrepl-eval -p 7888 <<'EOF' +(require '[clojure-mcp-light.delimiter-repair :as dr] :reload) +(dr/delimiter-error? "(defn foo [x]") +EOF +``` + +## Verify Compilation After Edit + +```bash +# If this returns nil, the file compiled successfully +clj-nrepl-eval -p 7888 "(require 'clojure-mcp-light.hook :reload)" +``` + +## Session Management + +```bash +# Reset session if state becomes corrupted +clj-nrepl-eval -p 7888 --reset-session +``` + +## Common Workflow Patterns + +### Load, Test, Iterate + +```bash +# After editing a file, reload and test in one command +clj-nrepl-eval -p 7888 <<'EOF' +(require '[my.namespace :as ns] :reload) +(ns/my-function test-data) +EOF +``` + +### Run Tests After Changes + +```bash +clj-nrepl-eval -p 7888 <<'EOF' +(require '[my.project.core :as core] :reload) +(require '[my.project.core-test :as test] :reload) +(clojure.test/run-tests 'my.project.core-test) +EOF +``` + diff --git a/.opencode/skills/invoice-template-creator/SKILL.md b/.opencode/skills/invoice-template-creator/SKILL.md new file mode 100644 index 00000000..c156cbab --- /dev/null +++ b/.opencode/skills/invoice-template-creator/SKILL.md @@ -0,0 +1,201 @@ +--- +name: invoice-template-creator +description: This skill creates PDF invoice parsing templates for the Integreat system. It should be used when adding support for a new vendor invoice format that needs to be automatically parsed. +license: Complete terms in LICENSE.txt +--- + +# Invoice Template Creator + +This skill automates the creation of invoice parsing templates for the Integreat system. It generates both the template definition and a corresponding test file based on a sample PDF invoice. + +## When to Use This Skill + +Use this skill when you need to add support for a new vendor invoice format that cannot be parsed by existing templates. This typically happens when: + +- A new vendor sends invoices in a unique format +- An existing vendor changes their invoice layout +- You encounter an invoice that fails to parse with current templates + +## Prerequisites + +Before using this skill, ensure you have: + +1. A sample PDF invoice file placed in `dev-resources/` directory +2. Identified the vendor name +3. Identified unique text patterns in the invoice (phone numbers, addresses, etc.) that can distinguish this vendor +4. Know the expected values for key fields (invoice number, date, customer name, total) + +## Usage Workflow + +### Step 1: Analyze the PDF + +First, extract and analyze the PDF text to understand its structure: + +```bash +pdftotext -layout "dev-resources/FILENAME.pdf" - +``` + +Look for: +- **Vendor identifiers**: Phone numbers, addresses, or unique text that identifies this vendor +- **Field patterns**: How invoice number, date, customer name, and total appear in the text +- **Layout quirks**: Multi-line fields, special formatting, or unusual spacing + +### Step 2: Define Expected Values + +Document the expected values for each field: + +| Field | Expected Value | Notes | +|-------|---------------|-------| +| Vendor Name | "Vendor Name" | Company name as it should appear | +| Invoice Number | "12345" | The invoice identifier | +| Date | "01/15/26" | Format found in PDF | +| Customer Name | "Customer Name" | As it appears on invoice | +| Customer Address | "123 Main St" | Street address if available | +| Total | "100.00" | Amount | + +### Step 3: Create the Template and Test + +The skill will: + +1. **Create a test file** at `test/clj/auto_ap/parse/templates_test.clj` (or add to existing) + - Test parses the PDF file + - Verifies all expected values are extracted correctly + - Follows existing test patterns + +2. **Add template to** `src/clj/auto_ap/parse/templates.clj` + - Adds entry to `pdf-templates` vector + - Includes: + - `:vendor` - Vendor name + - `:keywords` - Regex patterns to identify this vendor (must match all) + - `:extract` - Regex patterns for each field + - `:parser` - Optional date/number parsers + +### Step 4: Iterative Refinement + +Run the test to see if it passes: + +```bash +lein test auto-ap.parse.templates-test +``` + +If it fails, examine the debug output and refine the regex patterns. Common issues: + +- **Template doesn't match**: Keywords don't actually appear in the PDF text +- **Field is nil**: Regex capture group doesn't match the actual text format +- **Wrong value captured**: Regex is too greedy or matches wrong text + +## Template Structure Reference + +### Basic Template Format + +```clojure +{:vendor "Vendor Name" + :keywords [#"unique-pattern-1" #"unique-pattern-2"] + :extract {:invoice-number #"Invoice\s+#\s+(\d+)" + :date #"Date:\s+(\d{2}/\d{2}/\d{2})" + :customer-identifier #"Bill To:\s+([A-Za-z\s]+)" + :total #"Total:\s+\$([\d,]+\.\d{2})"} + :parser {:date [:clj-time "MM/dd/yy"] + :total [:trim-commas nil]}} +``` + +### Field Extraction Patterns + +**Invoice Number:** +- Look for: `"Invoice #12345"` or `"INV: 12345"` +- Pattern: `#"Invoice\s*#?\s*(\d+)"` or `#"INV:\s*(\d+)"` + +**Date:** +- Common formats: `"01/15/26"`, `"Jan 15, 2026"`, `"2026-01-15"` +- Pattern: `#"(\d{2}/\d{2}/\d{2})"` for MM/dd/yy +- Parser: `:date [:clj-time "MM/dd/yy"]` + +**Customer Identifier:** +- Look for: `"Bill To: Customer Name"` or `"Sold To: Customer Name"` +- Pattern: `#"Bill To:\s+([A-Za-z\s]+?)(?=\s{2,}|\n)"` +- Use non-greedy `+?` and lookahead `(?=...)` to stop at boundaries + +**Total:** +- Look for: `"Total: $100.00"` or `"Amount Due: 100.00"` +- Pattern: `#"Total:\s+\$?([\d,]+\.\d{2})"` +- Parser: `:total [:trim-commas nil]` removes commas + +### Advanced Patterns + +**Multi-line customer address:** +When customer info spans multiple lines (name + address): + +```clojure +:customer-identifier #"(?s)I\s+([A-Z][A-Z\s]+?)\s{2,}.*?L\s+([0-9][A-Z0-9\s]+?)(?=\s{2,}|\n)" +:account-number #"(?s)L\s+([0-9][A-Z0-9\s]+?)(?=\s{2,}|\n)" +``` + +The `(?s)` flag makes `.` match newlines. Use non-greedy `+?` and lookaheads `(?=...)` to capture clean values. + +**Multiple date formats:** + +```clojure +:parser {:date [:clj-time ["MM/dd/yy" "yyyy-MM-dd"]]} +``` + +**Credit memos (negative amounts):** + +```clojure +:parser {:total [:trim-commas-and-negate nil]} +``` + +## Testing Best Practices + +1. IMPORTANT, CRITICAL!! **Start with a failing test** - Define expected values before implementing +2. **Test actual PDF parsing** - Use `parse-file` or `parse` with real PDF text +3. **Verify each field individually** - Separate assertions for clarity +4. **Handle date comparisons carefully** - Compare year/month/day separately if needed +5. **Use `str/trim`** - Account for extra whitespace in extracted values + +## Example Test Structure + +```clojure +(deftest parse-vendor-invoice-12345 + (testing "Should parse Vendor invoice with expected values" + (let [results (sut/parse-file (io/file "dev-resources/INVOICE.pdf") + "INVOICE.pdf") + result (first results)] + (is (some? results) "Should return results") + (is (some? result) "Template should match") + (when result + (is (= "Vendor Name" (:vendor-code result))) + (is (= "12345" (:invoice-number result))) + (is (= "Customer Name" (:customer-identifier result))) + (is (= "100.00" (:total result))))))) +``` + +## Common Pitfalls + +1. **Keywords must all match** - Every pattern in `:keywords` must be found in the PDF +2. **Capture groups required** - Regexes need `()` to extract values +3. **PDF text != visual text** - Layout may differ from what you see visually +4. **Greedy quantifiers** - Use `+?` instead of `+` to avoid over-matching +5. **Case sensitivity** - Regex is case-sensitive unless you use `(?i)` flag + +## Post-Creation Checklist + +After creating the template: + +- [ ] Test passes: `lein test auto-ap.parse.templates-test` +- [ ] Format is correct: `lein cljfmt check` +- [ ] Code compiles: `lein check` +- [ ] Template is in correct position in `pdf-templates` vector +- [ ] Keywords uniquely identify this vendor (won't match other templates) +- [ ] Test file follows naming conventions + +## Integration with Workflow + +This skill is typically used as part of a larger workflow: + +1. User provides PDF and requirements +2. This skill creates template and test +3. User reviews and refines if needed +4. Test is run to verify extraction +5. Code is committed + +The skill ensures consistency with existing patterns and reduces manual boilerplate when adding new vendor support. diff --git a/.opencode/skills/invoice-template-creator/references/examples.md b/.opencode/skills/invoice-template-creator/references/examples.md new file mode 100644 index 00000000..954b60ed --- /dev/null +++ b/.opencode/skills/invoice-template-creator/references/examples.md @@ -0,0 +1,188 @@ +# Invoice Template Examples + +## Simple Single Invoice + +```clojure +{:vendor "Gstar Seafood" + :keywords [#"G Star Seafood"] + :extract {:total #"Total\s{2,}([\d\-,]+\.\d{2,2}+)" + :customer-identifier #"(.*?)(?:\s+)Invoice #" + :date #"Invoice Date\s{2,}([0-9]+/[0-9]+/[0-9]+)" + :invoice-number #"Invoice #\s+(\d+)"} + :parser {:date [:clj-time "MM/dd/yyyy"] + :total [:trim-commas nil]}} +``` + +## Multi-Invoice Statement + +```clojure +{:vendor "Southbay Fresh Produce" + :keywords [#"(SOUTH BAY FRESH PRODUCE|SOUTH BAY PRODUCE)"] + :extract {:date #"^([0-9]+/[0-9]+/[0-9]+)" + :customer-identifier #"To:[^\n]*\n\s+([A-Za-z' ]+)\s{2}" + :invoice-number #"INV #\/(\d+)" + :total #"\$([0-9.]+)\."} + :parser {:date [:clj-time "MM/dd/yyyy"]} + :multi #"\n" + :multi-match? #"^[0-9]+/[0-9]+/[0-9]+\s+INV "} +``` + +## Customer with Address (Multi-line) + +```clojure +{:vendor "Bonanza Produce" + :keywords [#"530-544-4136"] + :extract {:invoice-number #"NO\s+(\d{8,})\s+\d{2}/\d{2}/\d{2}" + :date #"NO\s+\d{8,}\s+(\d{2}/\d{2}/\d{2})" + :customer-identifier #"(?s)I\s+([A-Z][A-Z\s]+?)\s{2,}.*?L\s+([0-9][A-Z0-9\s]+?)(?=\s{2,}|\n)" + :account-number #"(?s)L\s+([0-9][A-Z0-9\s]+?)(?=\s{2,}|\n)" + :total #"SHIPPED\s+[\d\.]+\s+TOTAL\s+([\d\.]+)"} + :parser {:date [:clj-time "MM/dd/yy"] + :total [:trim-commas nil]}} +``` + +## Credit Memo (Negative Amounts) + +```clojure +{:vendor "General Produce Company" + :keywords [#"916-552-6495"] + :extract {:date #"DATE.*\n.*\n.*?([0-9]+/[0-9]+/[0-9]+)" + :invoice-number #"CREDIT NO.*\n.*\n.*?(\d{5,}?)\s+" + :account-number #"CUST NO.*\n.*\n\s+(\d+)" + :total #"TOTAL:\s+\|\s*(.*)"} + :parser {:date [:clj-time "MM/dd/yy"] + :total [:trim-commas-and-negate nil]}} +``` + +## Complex Date Parsing + +```clojure +{:vendor "Ben E. Keith" + :keywords [#"BEN E. KEITH"] + :extract {:date #"Customer No Mo Day Yr.*?\n.*?\d{5,}\s{2,}(\d+\s+\d+\s+\d+)" + :customer-identifier #"Customer No Mo Day Yr.*?\n.*?(\d{5,})" + :invoice-number #"Invoice No.*?\n.*?(\d{8,})" + :total #"Total Invoice.*?\n.*?([\-]?[0-9]+\.[0-9]{2,})"} + :parser {:date [:month-day-year nil] + :total [:trim-commas-and-negate nil]}} +``` + +## Multiple Date Formats + +```clojure +{:vendor "RNDC" + :keywords [#"P.O.Box 743564"] + :extract {:date #"(?:INVOICE|CREDIT) DATE\n(?:.*?)(\S+)\n" + :account-number #"Store Number:\s+(\d+)" + :invoice-number #"(?:INVOICE|CREDIT) DATE\n(?:.*?)\s{2,}(\d+?)\s+\S+\n" + :total #"Net Amount(?:.*\n){4}(?:.*?)([\-]?[0-9\.]+)\n"} + :parser {:date [:clj-time ["MM/dd/yy" "dd-MMM-yy"]] + :total [:trim-commas-and-negate nil]}} +``` + +## Common Regex Patterns + +### Phone Numbers +```clojure +#"\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}" +``` + +### Dollar Amounts +```clojure +#"\$?([0-9,]+\.[0-9]{2})" +``` + +### Dates (MM/dd/yy) +```clojure +#"([0-9]{2}/[0-9]{2}/[0-9]{2})" +``` + +### Dates (MM/dd/yyyy) +```clojure +#"([0-9]{2}/[0-9]{2}/[0-9]{4})" +``` + +### Multi-line Text (dotall mode) +```clojure +#"(?s)start.*?end" +``` + +### Non-greedy Match +```clojure +#"(pattern.+?)" +``` + +### Lookahead Boundary +```clojure +#"value(?=\s{2,}|\n)" +``` + +## Field Extraction Strategies + +### 1. Simple Line-based +Use `[^\n]*` to match until end of line: +```clojure +#"Invoice:\s+([^\n]+)" +``` + +### 2. Whitespace Boundary +Use `(?=\s{2,}|\n)` to stop at multiple spaces or newline: +```clojure +#"Customer:\s+(.+?)(?=\s{2,}|\n)" +``` + +### 3. Specific Marker +Match until a specific pattern is found: +```clojure +#"(?s)Start(.*?)End" +``` + +### 4. Multi-part Extraction +Use multiple capture groups for related fields: +```clojure +#"Date:\s+(\d{2})/(\d{2})/(\d{2})" +``` + +## Parser Options + +### Date Parsers +- `[:clj-time "MM/dd/yyyy"]` - Standard US date +- `[:clj-time "MM/dd/yy"]` - 2-digit year +- `[:clj-time "MMM dd, yyyy"]` - Named month +- `[:clj-time ["MM/dd/yy" "yyyy-MM-dd"]]` - Multiple formats +- `[:month-day-year nil]` - Space-separated (1 15 26) + +### Number Parsers +- `[:trim-commas nil]` - Remove commas from numbers +- `[:trim-commas-and-negate nil]` - Handle negative/credit amounts +- `[:trim-commas-and-remove-dollars nil]` - Remove $ and commas +- `nil` - No parsing, return raw string + +## Testing Patterns + +### Basic Test Structure +```clojure +(deftest parse-vendor-invoice + (testing "Should parse vendor invoice" + (let [results (sut/parse-file (io/file "dev-resources/INVOICE.pdf") + "INVOICE.pdf") + result (first results)] + (is (some? result)) + (is (= "Vendor" (:vendor-code result))) + (is (= "12345" (:invoice-number result)))))) +``` + +### Date Testing +```clojure +(let [d (:date result)] + (is (= 2026 (time/year d))) + (is (= 1 (time/month d))) + (is (= 15 (time/day d)))) +``` + +### Multi-field Verification +```clojure +(is (= "Expected Name" (:customer-identifier result))) +(is (= "Expected Street" (str/trim (:account-number result)))) +(is (= "Expected City, ST 12345" (str/trim (:location result)))) +``` diff --git a/.opencode/skills/testing-conventions/SKILL.md b/.opencode/skills/testing-conventions/SKILL.md new file mode 100644 index 00000000..fa7cf235 --- /dev/null +++ b/.opencode/skills/testing-conventions/SKILL.md @@ -0,0 +1,248 @@ +--- +name: testing-conventions +description: Describe the way that tests should be authored, conventions, tools, helpers, superceding any conventions found in existing tests. +--- + +# Testing Conventions Skill + +This skill documents the testing conventions for `test/clj/auto_ap/`. + +## Test Focus: User-Observable Behavior + +**Primary rule**: Test user-observable behavior. If an endpoint or function makes a database change, verify the change by querying the database directly rather than asserting on markup. + +**other rules**: +1. Don't test the means of doing work. For example, if there is a middleware that makes something available on a request, don't bother testing that wrapper. +2. prefer :refer testing imports, rather than :as reference +3. Prefer structured edits from clojure-mcp + +### When to Assert on Database State + +When testing an endpoint that modifies data: +1. Verify the database change by querying the entity directly +2. Use `dc/pull` or `dc/q` to verify the data was stored correctly + +```clojure +;; CORRECT: Verify the database change directly +(deftest test-create-transaction + (let [result @(post-create-transaction {:amount 100.0})] + (let [entity (dc/pull (dc/db conn) [:db/id :transaction/amount] (:transaction/id result))] + (is (= 100.0 (:transaction/amount entity)))))) + +;; CORRECT: Verify response status and headers +(is (= 201 (:status response))) +(is (= "application/json" (get-in response [:headers "content-type"]))) + +;; CORRECT: Check for expected text content +(is (re-find #"Transaction created" (get-in response [:body "message"]))) +``` + +### When Markup Testing is Acceptable + +Markup testing (HTML/SSR response bodies) is acceptable when: +- Validating response status codes and headers +- Checking for presence/absence of specific text strings +- Verifying small, expected elements within the markup +- Testing SSR component rendering + +```clojure +;; ACCEPTABLE: Response codes and headers +(is (= 200 (:status response))) +(is (= "application/json" (get-in response [:headers "content-type"]))) + +;; ACCEPTABLE: Text content within markup +(is (re-find #"Transaction found" response-body)) + +;; ACCEPTABLE: Small element checks +(is (re-find #">Amount: \$100\.00<" response-body)) +``` + +### When to Avoid Markup Testing + +Do not use markup assertions for: +- Verifying complex data structures (use database queries instead) +- Complex nested content that's easier to query +- Business logic verification (test behavior, not presentation) + +## Database Setup + +All tests in `test/clj/auto_ap/` use a shared database fixture (`wrap-setup`) that: +1. Creates a temporary in-memory Datomic database (`datomic:mem://test`) +2. Loads the full schema from `io/resources/schema.edn` +3. Installs custom Datomic functions from `io/resources/functions.edn` +4. Cleans up the database after each test + +## Using the Fixture + +```clojure +(ns my-test + (:require + [auto-ap.integration.util :refer [wrap-setup]] + [clojure.test :as t])) + +(use-fixtures :each wrap-setup) + +(deftest my-test + ;; tests here can access the test database + ) +``` + +## Helper Functions + +`test/clj/auto_ap/integration/util.clj` provides helper functions for creating test data: + +### Identity Helpers + +```clojure +;; Add a unique string to avoid collisions +(str "CLIENT" (rand-int 100000)) +(str "INVOICE " (rand-int 1000000)) +``` + +### Test Entity Builders + +```clojure +;; Client +(test-client + [:db/id "client-id" + :client/code "CLIENT123" + :client/locations ["DT" "MH"] + :client/bank-accounts [:bank-account-id]]) + +;; Vendor +(test-vendor + [:db/id "vendor-id" + :vendor/name "Vendorson" + :vendor/default-account "test-account-id"]) + +;; Bank Account +(test-bank-account + [:db/id "bank-account-id" + :bank-account/code "TEST-BANK-123" + :bank-account/type :bank-account-type/check]) + +;; Transaction +(test-transaction + [:db/id "transaction-id" + :transaction/date #inst "2022-01-01" + :transaction/client "test-client-id" + :transaction/bank-account "test-bank-account-id" + :transaction/id (str (java.util.UUID/randomUUID)) + :transaction/amount 100.0 + :transaction/description-original "original description"]) + +;; Payment +(test-payment + [:db/id "test-payment-id" + :payment/date #inst "2022-01-01" + :payment/client "test-client-id" + :payment/bank-account "test-bank-account-id" + :payment/type :payment-type/check + :payment/vendor "test-vendor-id" + :payment/amount 100.0]) + +;; Invoice +(test-invoice + [:db/id "test-invoice-id" + :invoice/date #inst "2022-01-01" + :invoice/client "test-client-id" + :invoice/status :invoice-status/unpaid + :invoice/import-status :import-status/imported + :invoice/total 100.0 + :invoice/outstanding-balance 100.00 + :invoice/vendor "test-vendor-id" + :invoice/invoice-number "INVOICE 123456" + :invoice/expense-accounts + [{:invoice-expense-account/account "test-account-id" + :invoice-expense-account/amount 100.0 + :invoice-expense-account/location "DT"}]]) + +;; Account +(test-account + [:db/id "account-id" + :account/name "Account" + :account/type :account-type/asset]) +``` + +### Common Data Setup (`setup-test-data`) + +Creates a minimal but complete dataset for testing: + +```clojure +(defn setup-test-data [data] + (:tempids @(dc/transact conn (into data + [(test-account :db/id "test-account-id") + (test-client :db/id "test-client-id" + :client/bank-accounts [(test-bank-account :db/id "test-bank-account-id")]) + (test-vendor :db/id "test-vendor-id") + {:db/id "accounts-payable-id" + :account/name "Accounts Payable" + :db/ident :account/accounts-payable + :account/numeric-code 21000 + :account/account-set "default"}])))) +``` + +Use like: +```clojure +(let [{:strs [test-client-id test-bank-account-id test-vendor-id]} (setup-test-data [])] + ...) +``` + +### Token Helpers + +```clojure +;; Admin token +(admin-token) + +;; User token (optionally scoped to specific client) +(user-token) ; Default: client-id 1 +(user-token client-id) ; Scoped to specific client +``` + +## Example Usage + +```clojure +(ns my-test + (:require + [clojure.test :as t] + [auto-ap.datomic :refer [conn]] + [auto-ap.integration.util :refer [wrap-setup admin-token setup-test-data test-transaction]])) + +(use-fixtures :each wrap-setup) + +(deftest test-transaction-import + (testing "Should import a transaction" + (let [{:strs [client-id bank-account-id]} (setup-test-data []) + tx-result @(dc/transact conn + [(test-transaction + {:db/id "test-tx-id" + :transaction/client client-id + :transaction/bank-account bank-account-id + :transaction/amount 50.0})])] + (is (= 1 (count (:tx-data tx-result)))) + ;; Verify by querying the database, not markup + (let [entity (dc/pull (dc/db conn) [:transaction/amount] (:db/id tx-result))] + (is (= 50.0 (:transaction/amount entity))))))) +``` + +## Note on Temp IDs + +Test data often uses string-based temp IDs like `"client-id"`, `"bank-account-id"`, etc. When transacting, the returned `:tempids` map maps these symbolic IDs to Datomic's internal entity IDs: + +```clojure +(let [{:strs [client-id bank-account-id]} (:tempids @(dc/transact conn txes))] + ...) +``` + +## Memory Database + +All tests use `datomic:mem://test` - an in-memory database. This ensures: +- Tests are fast +- Tests don't interfere with each other +- No setup required to run tests locally + +The database is automatically deleted after each test completes. + +# running tests +prefer to use clojure nrepl evaluation skill over leiningen, but worst case, +use leiningen to run tests diff --git a/docs/solutions/integration-issues/multi-invoice-template-bonanza-produce-20260207.md b/docs/solutions/integration-issues/multi-invoice-template-bonanza-produce-20260207.md new file mode 100644 index 00000000..64dde609 --- /dev/null +++ b/docs/solutions/integration-issues/multi-invoice-template-bonanza-produce-20260207.md @@ -0,0 +1,132 @@ +--- +module: Invoice Parsing +date: 2026-02-07 +problem_type: integration_failure +component: pdf_template_parser +symptoms: + - "Bonanza Produce multi-invoice statement (13595522.pdf) fails to parse correctly" + - "Single invoice template extracts only one invoice instead of four" + - "Multi-invoice statement lacks I/L markers present in single invoices" + - "Customer identifier extraction pattern requires different regex for statements" +root_cause: template_inadequate +resolution_type: template_fix +severity: high +tags: [pdf, parsing, invoice, bonanza-produce, multi-invoice, integration] +--- + +# Bonanza Produce Multi-Invoice Statement Template Fix + +## Problem + +Bonanza Produce sends two different invoice formats: +1. **Single invoices** (e.g., 03881260.pdf) with I/L markers and specific layout +2. **Multi-invoice statements** (e.g., 13595522.pdf) containing 4 invoices per page + +The single invoice template failed to parse multi-invoice statements because: +- Multi-invoice statements lack the I/L (Invoice/Location) markers used in single invoice templates +- The layout structure is completely different, with invoices listed as table rows instead of distinct sections +- Customer identifier extraction requires a different regex pattern + +## Environment + +- Component: PDF Template Parser (Clojure) +- Date: 2026-02-07 +- Test File: `test/clj/auto_ap/parse/templates_test.clj` +- Template File: `src/clj/auto_ap/parse/templates.clj` +- Test Document: `dev-resources/13595522.pdf` (4 invoices on single page) + +## Symptoms + +- Single invoice template only parses first invoice from multi-invoice statement +- Parse returns single result instead of 4 separate invoice records +- `:customer-identifier` extraction returns empty or incorrect values for statements +- Test `parse-bonanza-produce-statement-13595522` expects 4 results but receives 1 + +## What Didn't Work + +**Attempted Solution 1: Reuse single invoice template with `:multi` flag** +- Added `:multi #"\n"` and `:multi-match?` pattern to existing single invoice template +- **Why it failed:** The single invoice template's regex patterns (e.g., `I\s+([A-Z][A-Z\s]+?)\s{2,}.*?L\s+`) expect I/L markers that don't exist in multi-invoice statements. The layout structure is fundamentally different. + +**Attempted Solution 2: Using simpler customer identifier pattern** +- Tried pattern `#"(.*?)\s+RETURN"` extracted from multi-invoice statement text +- **Why it failed:** This pattern alone doesn't account for the statement's column-based layout. Need to combine with `:multi` and `:multi-match?` flags to parse multiple invoices. + +## Solution + +Added a dedicated multi-invoice template that: +1. Uses different keywords to identify multi-invoice statements +2. Employs `:multi` and `:multi-match?` flags for multiple invoice extraction +3. Uses simpler regex patterns suitable for the statement layout + +**Implementation:** + +```clojure +;; Bonanza Produce Statement (multi-invoice) +{:vendor "Bonanza Produce" + :keywords [#"The perishable agricultural commodities" #"SPARKS, NEVADA"] + :extract {:invoice-number #"^\s+[0-9]{2}/[0-9]{2}/[0-9]{2}\s+([0-9]+)\s+INVOICE" + :customer-identifier #"(.*?)\s+RETURN" + :date #"^\s+([0-9]{2}/[0-9]{2}/[0-9]{2})" + :total #"^\s+[0-9]{2}/[0-9]{2}/[0-9]{2}\s+[0-9]+\s+INVOICE\s+([\d.]+)"} + :parser {:date [:clj-time "MM/dd/yy"] + :total [:trim-commas nil]} + :multi #"\n" + :multi-match? #"\s+[0-9]{2}/[0-9]{2}/[0-9]{2}\s+[0-9]+\s+INVOICE"} +``` + +**Key differences from single invoice template:** +- `:keywords`: Look for statement header text instead of phone number +- `:customer-identifier`: Pattern `#"(.*?)\s+RETURN"` works for statement format +- `:multi #"\n"`: Split results on newline boundaries +- `:multi-match?`: Match invoice header pattern to identify individual invoices +- No I/L markers: Patterns scan from left margin without location markers + +## Why This Works + +1. **Statement-specific keywords:** "The perishable agricultural commodities" and "SPARKS, NEVADA" uniquely identify multi-invoice statements vs. single invoices (which have phone number 530-544-4136) + +2. **Multi-flag parsing:** The `:multi` and `:multi-match?` flags tell the parser to split the document on newlines and identify individual invoices using the date/invoice-number pattern, rather than treating the whole page as one invoice + +3. **Simplified patterns:** Without I/L markers, patterns scan from line start (`^\s+`) and extract columns based on whitespace positions. The `:customer-identifier` pattern `(.*?)\s+RETURN` captures everything before "RETURN" on each line + +4. **Separate templates:** Having distinct templates for single invoices vs. statements prevents conflict and allows optimization for each format + +## Prevention + +**When adding templates for vendors with multiple document formats:** + +1. **Create separate templates:** Don't try to make one template handle both formats. Use distinct keywords to identify each format + +2. **Test both single and multi-invoice documents:** Ensure templates parse expected number of invoices: + ```clojure + (is (= 4 (count results)) "Should parse 4 invoices from statement") + ``` + +3. **Verify `:multi` usage:** Multi-invoice templates should have both `:multi` and `:multi-match?` flags: + ```clojure + :multi #"\n" + :multi-match? #"\s+[0-9]{2}/[0-9]{2}/[0-9]{2}\s+[0-9]+\s+INVOICE" + ``` + +4. **Check pattern scope:** Multi-invoice statements often lack structural markers (I/L), so patterns should: + - Use `^\s+` to anchor at line start + - Extract from whitespace-separated columns + - Avoid patterns requiring specific markers + +5. **Run all template tests:** Before committing, run: + ```bash + lein test auto-ap.parse.templates-test + ``` + +## Related Issues + +- Single invoice template: `src/clj/auto_ap/parse/templates.clj` lines 756-765 +- Similar multi-invoice patterns: Search for `:multi` and `:multi-match?` in `src/clj/auto_ap/parse/templates.clj` + +## Key Files + +- **Tests:** `test/clj/auto_ap/parse/templates_test.clj` (lines 36-53) +- **Template:** `src/clj/auto_ap/parse/templates.clj` (lines 767-777) +- **Test document:** `dev-resources/13595522.pdf` +- **Template parser:** `src/clj/auto_ap/parse.clj` \ No newline at end of file diff --git a/src/clj/auto_ap/parse/templates.clj b/src/clj/auto_ap/parse/templates.clj index 515d630a..1e9e315d 100644 --- a/src/clj/auto_ap/parse/templates.clj +++ b/src/clj/auto_ap/parse/templates.clj @@ -753,7 +753,7 @@ :multi #"\n" :multi-match? #"INV #"} - ;; Bonanza Produce +;; Bonanza Produce {:vendor "Bonanza Produce" :keywords [#"530-544-4136"] :extract {:invoice-number #"NO\s+(\d{8,})\s+\d{2}/\d{2}/\d{2}" @@ -762,7 +762,19 @@ :account-number #"(?s)L\s+([0-9][A-Z0-9\s]+?)(?=\s{2,}|\n)" :total #"SHIPPED\s+[\d\.]+\s+TOTAL\s+([\d\.]+)"} :parser {:date [:clj-time "MM/dd/yy"] - :total [:trim-commas nil]}}]) + :total [:trim-commas nil]}} + + ;; Bonanza Produce Statement (multi-invoice) + {:vendor "Bonanza Produce" + :keywords [#"The perishable agricultural commodities" #"SPARKS, NEVADA"] + :extract {:invoice-number #"^\s+[0-9]{2}/[0-9]{2}/[0-9]{2}\s+([0-9]+)\s+INVOICE" + :customer-identifier #"(.*?)\s+RETURN" + :date #"^\s+([0-9]{2}/[0-9]{2}/[0-9]{2})" + :total #"^\s+[0-9]{2}/[0-9]{2}/[0-9]{2}\s+[0-9]+\s+INVOICE\s+([\d.]+)"} + :parser {:date [:clj-time "MM/dd/yy"] + :total [:trim-commas nil]} + :multi #"\n" + :multi-match? #"\s+[0-9]{2}/[0-9]{2}/[0-9]{2}\s+[0-9]+\s+INVOICE"}]) (def excel-templates [{:vendor "Mama Lu's Foods" diff --git a/test/clj/auto_ap/parse/templates_test.clj b/test/clj/auto_ap/parse/templates_test.clj index 673402e2..3313bcfb 100644 --- a/test/clj/auto_ap/parse/templates_test.clj +++ b/test/clj/auto_ap/parse/templates_test.clj @@ -32,3 +32,22 @@ (str (:customer-identifier result) " " (str/trim (:account-number result))))) ;; Total is parsed as string, not number (per current behavior) (is (= "23.22" (:total result))))))) + +(deftest parse-bonanza-produce-statement-13595522 + (testing "Should parse Bonanza Produce statement 13595522 with multiple invoices" + (let [pdf-file (io/file "dev-resources/13595522.pdf") + pdf-text (:out (clojure.java.shell/sh "pdftotext" "-layout" (str pdf-file) "-")) + results (sut/parse pdf-text)] + (is (some? results) "parse should return results") + (is (= 4 (count results)) "Should parse 4 invoices from statement") + (doseq [result results] + (is (= "Bonanza Produce" (:vendor-code result))) + (is (= "600 VISTA WAY" (:customer-identifier result)))) + (is (= "03876838" (:invoice-number (nth results 0)))) + (is (= "03877314" (:invoice-number (nth results 1)))) + (is (= "03878619" (:invoice-number (nth results 2)))) + (is (= "03879035" (:invoice-number (nth results 3)))) + (is (= "891.65" (:total (nth results 0)))) + (is (= "720.33" (:total (nth results 1)))) + (is (= "853.16" (:total (nth results 2)))) + (is (= "1066.60" (:total (nth results 3)))))))