(ns auto-ap.parse.excel (:require [auto-ap.parse.templates :as t] [clojure.string :as str] [dk.ative.docjure.spreadsheet :as d]) (:import (org.apache.poi.ss.util CellAddress))) (defn template-applies? [text {:keys [keywords]}] (every? #(re-find % text) keywords)) (defn extract [wb {:keys [extract vendor]}] (reduce-kv (fn [invoice k [regex offset-row offset-column extract-regex]] (assoc invoice k (->> wb (d/sheet-seq) first (d/cell-seq) (filter (fn [cell] (re-find regex (str (d/read-cell cell))))) (map (fn [cell] (let [address (.getAddress cell) cell-value (str (d/read-cell (d/select-cell (.toString (CellAddress. (+ offset-row (.getRow address)) (+ offset-column (.getColumn address)) )) (first (d/sheet-seq wb)))))] (if extract-regex (second (re-find extract-regex cell-value)) cell-value)))) first))) {:vendor-code vendor} extract)) (defn parse-file [file filename] [(let [wb (d/load-workbook file) text (->> wb (d/sheet-seq) first (d/cell-seq) (map d/read-cell) (str/join " "))] (->> t/excel-templates (filter (partial template-applies? text)) first (extract wb) ))])