Very cool. I actually cleaned up the code a little bit more this
morning trying to speed things up a bit. It's still not as fast as I'd
like, but I'm not up to speed on Closure optimization either, so I
could be missing something.

Revised code:

(ns markov
  (use clojure.contrib.str-utils))

(defn flatten
  "Takes any nested combination of sequential things (lists, vectors,
  etc.) and returns their contents as a single, flat sequence.
  (flatten nil) returns nil."
  [x]
  (filter (complement sequential?)
          (rest (tree-seq sequential? seq x))))

(defn rand-elt
  "Return a random element of this seq"
  [s]
  (nth s (rand-int (count s))))

(defn clean [txt]
  "clean given txt for symbols disruptive to markov chains"
  (let [new-txt (re-gsub #"[:;,^\"()]" "" txt)
        new-txt (re-gsub #"'(?!(d|t|ve|m|ll|s|de|re))" "" new-txt)]
new-txt))

(defn chain-lengths [markov-chain]
  "return a set of lengths for each element in the collection"
  (let [markov-keys (map keys markov-chain)]
    (set (for [x markov-keys] (count x)))))

(defn max-chain-length [markov-chain]
  "return the length lf the longest chain"
  (apply max (chain-lengths markov-chain)))

(defn chain
  "Take a list of words and build a markov chain out of them.
  The length is the size of the key in number of words."
  ([words]
   (chain words 3))
  ([words length]
   (loop [markov-chain {}
          keychain (for [x (range length)] nil)
          words (map clean words)]
     (let [first-word (first words)]
       (if (seq words)
         (recur (assoc markov-chain keychain
                       (cons first-word (get markov-chain keychain)))
                (concat (rest keychain) [first-word])
                (rest words))
         (assoc markov-chain keychain []))))))

(defn split-sentence [text]
  "Convert a string to a collection on common boundaries"
  (filter seq (re-split #"[,.!?()\d]+\s*" text)))

(defn file-chain
  "Create a markov chain from the contents of a given file"
  ([file]
   (file-chain file 3))
  ([file length]
   (let [sentences (split-sentence (slurp file))
         flatten-list (fn [& x] (flatten (list x)))]
     (loop [markov-chain {} words sentences]
       (if (seq words)
         (recur (merge-with flatten-list
                            markov-chain
                            (chain (re-split #"\s+" (first words))))
                (rest words))
         markov-chain)))))

(defn construct-sentence
   "Build a sentence from a markov chain structure.  Given a
   Markov chain (any size key),  Seed (to start the sentence) and
   Proc (a function for choosing the next word), returns a sentence
   composed until is reaches the end of a chain (an end of sentence)."
  ([markov-chain]
   (construct-sentence markov-chain nil rand-elt))
  ([markov-chain seed]
   (construct-sentence markov-chain seed rand-elt))
  ([markov-chain seed proc]
   (loop [words (if seed seed (rand-elt (keys markov-chain)))
          sentence (str-join " " (filter identity words))]
     (if (seq (markov-chain words))
       (let [word-new (proc (markov-chain words))]
         (recur (concat (rest words) [word-new])
                (str-join " " (into [sentence] [word-new]))))
       sentence))))


On Apr 24, 12:00 pm, Luke VanderHart <luke.vanderh...@gmail.com>
wrote:
> Cool... I actually did a Markov chain generator myself as one of my
> early Clojure projects. I posted about it at the DC Study group, here:
>
> http://groups.google.com/group/clojure-study-dc/browse_thread/thread/...
>
> It looks like yours is more succinct... I'll definitely have to take
> some time and compare our approaches.
>
> -Luke
>
> On Apr 24, 8:47 am, tmountain <tinymount...@gmail.com> wrote:
>
> > In an effort to learn more about Clojure, I decided to port a markov
> > text generator which a friend wrote in Python. After getting through a
> > few snags, I completed the program and decided to have some fun
> > feeding in some e-books downloaded from the Gutenberg project as
> > input. In this case, I chose Sherlock Holmes and Bram Stoker's Dracula
> > to create a bizarre mashup, which could be called Draclock Holmes or
> > something approximate. I had the program print out three-line snippits
> > of text, and some of the resulting text resembles a sort of absurd
> > poetry. I'd imagine if I let it churn and burn for a few hours, some
> > real gems could emerge.
>
> > acting in her interests
> > Mina's morning and evening hypnotic answer is unvaried
> > with devilish passion
>
> > she succeeded somewhat
> > swiftly and deftly
> > His look is a warning
>
> > together as we swept along
> > found myself lying on my bed trembling all over
> > Miss Stoner and I gazed at him in many tongues
>
> > my power to reward you for your services
> > common subject for conversation
> > throwing open another door
>
> > nine years in England
> > strong-faced old man
> > to mediaeval times
>
> > Here's the code. I'm new to Clojure, so I'm open to suggestions. It's
> > written in a purely functional non-destructive fashion; although, I'm
> > sure a few things could be improved.
>
> > (ns markov
> >   (use clojure.contrib.str-utils))
>
> > (defn rand-nth [coll]
> >   "return a random element from a collection"
> >   (nth (seq coll) (rand-int (count coll))))
>
> > (defn clean [txt]
> >   "clean given txt for symbols disruptive to markov chains"
> >   (let [new-txt (re-gsub #"[:;,^\"()]" "" txt)
> >         new-txt (re-gsub #"'(?!(d|t|ve|m|ll|s|de|re))" "" new-txt)]
> > new-txt))
>
> > (defn chain-lengths [markov-chain]
> >   "return a set of lengths for each element in the collection"
> >   (let [markov-keys (map keys markov-chain)]
> >     (set (for [x markov-keys] (count x)))))
>
> > (defn max-chain-length [markov-chain]
> >   "return the length lf the longest chain"
> >   (apply max (chain-lengths markov-chain)))
>
> > (defn flatten [x]
> >   "Flatten a collection"
> >   (let [s? #(instance? clojure.lang.Sequential %)]
> >     (filter (complement s?) (tree-seq s? seq x))))
>
> > (defn build-chain [markov-chain keychain words]
> >   "Builds a markov chain"
> >   (let [first-word (first words)]
> >     (if (seq words)
> >       (recur (assoc markov-chain keychain
> >                     (cons first-word (get markov-chain keychain)))
> >              (concat (rest keychain) [first-word])
> >              (rest words))
> >       (assoc markov-chain keychain []))))
>
> > (defn chain
> >   "Take a list of words and build a markov chain out of them.
> >   The length is the size of the key in number of words."
> >   ([words]
> >    (chain words 3))
> >   ([words length]
> >    (build-chain {} (for [x (range length)] nil) (map clean words))))
>
> > (defn split-sentence [text]
> >   "Convert a string to a collection on common boundaries"
> >   (filter seq (re-split #"[,.!?()\d]+\s*" text)))
>
> > (defn file-chain
> >   "Create a markov chain from the contents of a given file"
> >   ([file]
> >    (file-chain file 3))
> >   ([file length]
> >    (let [sentences (split-sentence (slurp file))
> >          flatten-list (fn [& x] (flatten (list x)))]
> >      (loop [markov-chain {} words sentences]
> >        (if (seq words)
> >          (recur (merge-with flatten-list
> >                             markov-chain
> >                             (chain (re-split #"\s+" (first words))))
> >                 (rest words))
> >          markov-chain)))))
>
> > (defn construct-sentence
> >    "Build a sentence from a markov chain structure.  Given a
> >    Markov chain (any size key),  Seed (to start the sentence) and
> >    Proc (a function for choosing the next word), returns a sentence
> >    composed until is reaches the end of a chain (an end of sentence)."
> >   ([markov-chain]
> >    (construct-sentence markov-chain nil rand-nth))
> >   ([markov-chain seed]
> >    (construct-sentence markov-chain seed rand-nth))
> >   ([markov-chain seed proc]
> >    (loop [words (if seed seed (rand-nth (keys markov-chain)))
> >           sentence (str-join " " (filter identity words))]
> >      (if (seq (markov-chain words))
> >        (let [word-new (proc (markov-chain words))]
> >          (recur (concat (rest words) [word-new])
> >                 (str-join " " (into [sentence] [word-new]))))
> >        sentence))))
>
> > Example usage:
>
> > (ns main (use markov))
> > (def markov (file-chain "draclock.txt"))
> > (doseq [x (range 100)]
> >   (doseq [x (range 3)] (println (construct-sentence markov)))
> >   (println))
>
> > Input 
> > files:http://www.gutenberg.org/files/345/345.txt-draculahttp://www.gutenberg.org/dirs/etext99/advsh12.txt-sherlock
> >  holmes
>
> > I just cat them together to make draclock.txt ;-)
>
> > Cheers!
> > Travis
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"Clojure" group.
To post to this group, send email to clojure@googlegroups.com
To unsubscribe from this group, send email to 
clojure+unsubscr...@googlegroups.com
For more options, visit this group at 
http://groups.google.com/group/clojure?hl=en
-~----------~----~----~----~------~----~------~--~---

Reply via email to