Files
integreat/src/clj/auto_ap/parse/excel.clj
2022-07-26 05:56:41 -07:00

55 lines
2.0 KiB
Clojure

(ns auto-ap.parse.excel
(:require [auto-ap.parse.templates :as t]
[auto-ap.parse.util :as u]
[clojure.string :as str]
[dk.ative.docjure.spreadsheet :as d])
(:import (org.apache.poi.ss.util CellAddress)))
(defn template-applies? [text {:keys [keywords]}]
(every? #(re-find % text) keywords))
(defn extract [wb {:keys [extract vendor parser]}]
(if (fn? extract)
(extract wb vendor)
[(reduce-kv
(fn [invoice k [regex offset-row offset-column extract-regex]]
(assoc invoice k
(->> wb
(d/sheet-seq)
first
(d/cell-seq)
(filter (fn [cell]
(re-find regex (str (d/read-cell cell)))))
(map (fn [cell]
(let [address (.getAddress cell)
cell-value (str (d/read-cell (d/select-cell (.toString (CellAddress. (+ offset-row (.getRow address)) (+ offset-column (.getColumn address)) ))
(first (d/sheet-seq wb)))))
raw-result (if extract-regex
(second (re-find extract-regex cell-value))
cell-value)]
(if (get parser k)
(u/parse-value (first (get parser k) ) (second (get parser k) ) raw-result)
raw-result
))))
first)))
{:vendor-code vendor}
extract)]))
(defn parse-file
[file _]
(let [wb (d/load-workbook file)
text (->> wb
(d/sheet-seq)
first
(d/cell-seq)
(map d/read-cell)
(str/join " "))]
(->> t/excel-templates
(filter (partial template-applies? text))
first
(extract wb)
)))