Adds Cheetah
This commit is contained in:
@@ -1,16 +1,15 @@
|
||||
(ns auto-ap.parse
|
||||
(:require [auto-ap.parse.csv :as csv]
|
||||
[auto-ap.parse.excel :as excel]
|
||||
[auto-ap.parse.templates :as t]
|
||||
[auto-ap.parse.util :as u]
|
||||
[clj-fuzzy.metrics :as m]
|
||||
[clj-time.core :as time]
|
||||
[clj-time.format :as f]
|
||||
[clojure.java.shell :as sh]
|
||||
[clojure.set :as set]
|
||||
[clojure.string :as str]
|
||||
[clojure.tools.logging :as log]
|
||||
[auto-ap.logging :refer [info-event]]))
|
||||
(:require
|
||||
[auto-ap.logging :refer [info-event]]
|
||||
[auto-ap.parse.csv :as csv]
|
||||
[auto-ap.parse.excel :as excel]
|
||||
[auto-ap.parse.templates :as t]
|
||||
[auto-ap.parse.util :as u]
|
||||
[clj-fuzzy.metrics :as m]
|
||||
[clojure.java.shell :as sh]
|
||||
[clojure.set :as set]
|
||||
[clojure.string :as str]
|
||||
[clojure.tools.logging :as log]))
|
||||
|
||||
(def last-text (atom nil))
|
||||
|
||||
@@ -57,11 +56,11 @@
|
||||
(extract-template text)))
|
||||
|
||||
|
||||
(defmulti parse-file (fn [file filename] (.toLowerCase (last (str/split filename #"\." )))))
|
||||
(defmulti parse-file (fn [_ filename] (.toLowerCase (last (str/split filename #"\." )))))
|
||||
|
||||
(defmethod parse-file
|
||||
"pdf"
|
||||
[file filename]
|
||||
[file _]
|
||||
(-> (sh/sh "pdftotext" "-layout" file "-")
|
||||
:out
|
||||
parse))
|
||||
@@ -87,7 +86,7 @@
|
||||
(best-match clients invoice-client-name 0.25))
|
||||
([clients invoice-client-name threshold]
|
||||
(let [fuzzy-match (->> clients
|
||||
(mapcat (fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}]
|
||||
(mapcat (fn [{:keys [:client/matches :client/name] :as client :or {matches []}}]
|
||||
(map (fn [m]
|
||||
[client (m/jaccard (.toLowerCase invoice-client-name) (.toLowerCase m))])
|
||||
(conj matches name))))
|
||||
@@ -98,7 +97,7 @@
|
||||
word-set (set (filter (complement str/blank?) (str/split (.toLowerCase invoice-client-name) #"[\s:\-]" )))
|
||||
client-word-match (->> clients
|
||||
(map
|
||||
(fn [{:keys [:db/id :client/matches :client/name] :as client :or {matches []}}]
|
||||
(fn [{:keys [:client/matches :client/name] :as client :or {matches []}}]
|
||||
(let [client-words (-> #{}
|
||||
(into
|
||||
(mapcat
|
||||
@@ -119,7 +118,7 @@
|
||||
(mapcat (fn [{:keys [:location-match/location :location-match/matches]}]
|
||||
|
||||
(map (fn [match] [location match]) matches)))
|
||||
(filter (fn [[location match]]
|
||||
(filter (fn [[_ match]]
|
||||
(re-find (re-pattern (str "(?i)" match)) text)) )
|
||||
first
|
||||
first)
|
||||
@@ -127,15 +126,14 @@
|
||||
:client/location-matches
|
||||
(mapcat (fn [{:keys [:location-match/location :location-match/matches]}]
|
||||
(map (fn [match] [location match]) matches)))
|
||||
(filter (fn [[location match]] (re-find (re-pattern (str "(?i)" match)) full-text)) )
|
||||
(filter (fn [[_ match]] (re-find (re-pattern (str "(?i)" match)) full-text)) )
|
||||
first
|
||||
first)
|
||||
(:client/default-location client)
|
||||
(first (:client/locations client))))
|
||||
|
||||
#_{:clj-kondo/ignore [:clojure-lsp/unused-public-var]}
|
||||
(defn dbg-parse [v]
|
||||
(doto
|
||||
(map
|
||||
(fn [x] (dissoc x :full-text :text))
|
||||
(parse v))
|
||||
clojure.pprint/pprint ))
|
||||
(map
|
||||
(fn [x] (dissoc x :full-text :text))
|
||||
(parse v)))
|
||||
|
||||
Reference in New Issue
Block a user