(ns auto-ap.parse.excel (:require [auto-ap.parse.templates :as t] [auto-ap.parse.util :as u] [clojure.string :as str] [dk.ative.docjure.spreadsheet :as d]) (:import (org.apache.poi.ss.util CellAddress))) (defn template-applies? [text {:keys [keywords]}] (every? #(re-find % text) keywords)) (defn extract [wb {:keys [extract vendor parser]}] (if (fn? extract) (extract wb vendor) [(reduce-kv (fn [invoice k [regex offset-row offset-column extract-regex]] (assoc invoice k (->> wb (d/sheet-seq) first (d/cell-seq) (filter (fn [cell] (re-find regex (str (d/read-cell cell))))) (map (fn [cell] (let [address (.getAddress cell) cell-value (str (d/read-cell (d/select-cell (.toString (CellAddress. (+ offset-row (.getRow address)) (+ offset-column (.getColumn address)) )) (first (d/sheet-seq wb))))) raw-result (if extract-regex (second (re-find extract-regex cell-value)) cell-value)] (if (get parser k) (u/parse-value (first (get parser k) ) (second (get parser k) ) raw-result) raw-result )))) first))) {:vendor-code vendor} extract)])) (defn parse-file [file _] (let [wb (d/load-workbook file) text (->> wb (d/sheet-seq) first (d/cell-seq) (map d/read-cell) (str/join " "))] (->> t/excel-templates (filter (partial template-applies? text)) first (extract wb) )))