Okay, I'm flummoxed.  Given the following definition:

(defn make-n-gram-fn [n]
  (fn [coll] (map vec (partition n 1 coll))))

I can do this:

(def bi-gram (make-n-gram-fn 2))

(bi-gram "abc")
([\a \b] [\b \c])

But, if I add the following:

; counts the number of indexes in a pair of collections where the
elements at the
; indexes match one another
(defn count-matching-elements [coll1 coll2]
  (reduce +
    (for [e1 coll1 e2 coll2]
      (if (= e1 e2) 1 0))))

; uses the matching element count to compute a simple similarity
between
; the two collections
(defn raw-coll-similarity [coll1 coll2]
  (let [maxlen (max (count coll1) (count coll2))]
    (/ (double (count-matching-elements coll1 coll2)) maxlen)))

; a more general collection similarity that applies the same
transformation to both
; collections, then applies the simpler coll similarity function
(defn coll-similarity [coll1 coll2 transform]
  (let [ncoll1 (transform coll1) ncoll2 (transform coll2)]
    (raw-coll-similarity ncoll1 ncoll2)))

; a returns a new similarity function that applies the provided
transform function
; before comparing a pair of collections
(defn make-coll-similarity-fn [coll-transform]
  (fn [coll1 coll2] coll-similarity [coll1 coll2 coll-transform]))

; makes an n-gram similarity function using the provided value for 'n'
(defn make-n-gram-similarity [n] (make-coll-similarity-fn (make-n-gram-
fn n)))

Then use my new similarity function generator:

(def bigram-similarity (make-coll-similarity-fn (make-n-gram-fn 2)))

(bigram-similarity "abcde" "abc")

I get the following:

["abcde" "abc" #<similarity$make_n_gram_fn__114$fn__116
org.ricercata.similarity$make_n_gram_fn__114$fn__...@219ba640>]

I must be missing something obvious, but I can't see it.
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google
Groups "Clojure" group.
To post to this group, send email to clojure@googlegroups.com
Note that posts from new members are moderated - please be patient with your 
first post.
To unsubscribe from this group, send email to
clojure+unsubscr...@googlegroups.com
For more options, visit this group at
http://groups.google.com/group/clojure?hl=en
-~----------~----~----~----~------~----~------~--~---

Reply via email to