Very cool. I actually cleaned up the code a little bit more this morning trying to speed things up a bit. It's still not as fast as I'd like, but I'm not up to speed on Closure optimization either, so I could be missing something.
Revised code: (ns markov (use clojure.contrib.str-utils)) (defn flatten "Takes any nested combination of sequential things (lists, vectors, etc.) and returns their contents as a single, flat sequence. (flatten nil) returns nil." [x] (filter (complement sequential?) (rest (tree-seq sequential? seq x)))) (defn rand-elt "Return a random element of this seq" [s] (nth s (rand-int (count s)))) (defn clean [txt] "clean given txt for symbols disruptive to markov chains" (let [new-txt (re-gsub #"[:;,^\"()]" "" txt) new-txt (re-gsub #"'(?!(d|t|ve|m|ll|s|de|re))" "" new-txt)] new-txt)) (defn chain-lengths [markov-chain] "return a set of lengths for each element in the collection" (let [markov-keys (map keys markov-chain)] (set (for [x markov-keys] (count x))))) (defn max-chain-length [markov-chain] "return the length lf the longest chain" (apply max (chain-lengths markov-chain))) (defn chain "Take a list of words and build a markov chain out of them. The length is the size of the key in number of words." ([words] (chain words 3)) ([words length] (loop [markov-chain {} keychain (for [x (range length)] nil) words (map clean words)] (let [first-word (first words)] (if (seq words) (recur (assoc markov-chain keychain (cons first-word (get markov-chain keychain))) (concat (rest keychain) [first-word]) (rest words)) (assoc markov-chain keychain [])))))) (defn split-sentence [text] "Convert a string to a collection on common boundaries" (filter seq (re-split #"[,.!?()\d]+\s*" text))) (defn file-chain "Create a markov chain from the contents of a given file" ([file] (file-chain file 3)) ([file length] (let [sentences (split-sentence (slurp file)) flatten-list (fn [& x] (flatten (list x)))] (loop [markov-chain {} words sentences] (if (seq words) (recur (merge-with flatten-list markov-chain (chain (re-split #"\s+" (first words)))) (rest words)) markov-chain))))) (defn construct-sentence "Build a sentence from a markov chain structure. Given a Markov chain (any size key), Seed (to start the sentence) and Proc (a function for choosing the next word), returns a sentence composed until is reaches the end of a chain (an end of sentence)." ([markov-chain] (construct-sentence markov-chain nil rand-elt)) ([markov-chain seed] (construct-sentence markov-chain seed rand-elt)) ([markov-chain seed proc] (loop [words (if seed seed (rand-elt (keys markov-chain))) sentence (str-join " " (filter identity words))] (if (seq (markov-chain words)) (let [word-new (proc (markov-chain words))] (recur (concat (rest words) [word-new]) (str-join " " (into [sentence] [word-new])))) sentence)))) On Apr 24, 12:00 pm, Luke VanderHart <luke.vanderh...@gmail.com> wrote: > Cool... I actually did a Markov chain generator myself as one of my > early Clojure projects. I posted about it at the DC Study group, here: > > http://groups.google.com/group/clojure-study-dc/browse_thread/thread/... > > It looks like yours is more succinct... I'll definitely have to take > some time and compare our approaches. > > -Luke > > On Apr 24, 8:47 am, tmountain <tinymount...@gmail.com> wrote: > > > In an effort to learn more about Clojure, I decided to port a markov > > text generator which a friend wrote in Python. After getting through a > > few snags, I completed the program and decided to have some fun > > feeding in some e-books downloaded from the Gutenberg project as > > input. In this case, I chose Sherlock Holmes and Bram Stoker's Dracula > > to create a bizarre mashup, which could be called Draclock Holmes or > > something approximate. I had the program print out three-line snippits > > of text, and some of the resulting text resembles a sort of absurd > > poetry. I'd imagine if I let it churn and burn for a few hours, some > > real gems could emerge. > > > acting in her interests > > Mina's morning and evening hypnotic answer is unvaried > > with devilish passion > > > she succeeded somewhat > > swiftly and deftly > > His look is a warning > > > together as we swept along > > found myself lying on my bed trembling all over > > Miss Stoner and I gazed at him in many tongues > > > my power to reward you for your services > > common subject for conversation > > throwing open another door > > > nine years in England > > strong-faced old man > > to mediaeval times > > > Here's the code. I'm new to Clojure, so I'm open to suggestions. It's > > written in a purely functional non-destructive fashion; although, I'm > > sure a few things could be improved. > > > (ns markov > > (use clojure.contrib.str-utils)) > > > (defn rand-nth [coll] > > "return a random element from a collection" > > (nth (seq coll) (rand-int (count coll)))) > > > (defn clean [txt] > > "clean given txt for symbols disruptive to markov chains" > > (let [new-txt (re-gsub #"[:;,^\"()]" "" txt) > > new-txt (re-gsub #"'(?!(d|t|ve|m|ll|s|de|re))" "" new-txt)] > > new-txt)) > > > (defn chain-lengths [markov-chain] > > "return a set of lengths for each element in the collection" > > (let [markov-keys (map keys markov-chain)] > > (set (for [x markov-keys] (count x))))) > > > (defn max-chain-length [markov-chain] > > "return the length lf the longest chain" > > (apply max (chain-lengths markov-chain))) > > > (defn flatten [x] > > "Flatten a collection" > > (let [s? #(instance? clojure.lang.Sequential %)] > > (filter (complement s?) (tree-seq s? seq x)))) > > > (defn build-chain [markov-chain keychain words] > > "Builds a markov chain" > > (let [first-word (first words)] > > (if (seq words) > > (recur (assoc markov-chain keychain > > (cons first-word (get markov-chain keychain))) > > (concat (rest keychain) [first-word]) > > (rest words)) > > (assoc markov-chain keychain [])))) > > > (defn chain > > "Take a list of words and build a markov chain out of them. > > The length is the size of the key in number of words." > > ([words] > > (chain words 3)) > > ([words length] > > (build-chain {} (for [x (range length)] nil) (map clean words)))) > > > (defn split-sentence [text] > > "Convert a string to a collection on common boundaries" > > (filter seq (re-split #"[,.!?()\d]+\s*" text))) > > > (defn file-chain > > "Create a markov chain from the contents of a given file" > > ([file] > > (file-chain file 3)) > > ([file length] > > (let [sentences (split-sentence (slurp file)) > > flatten-list (fn [& x] (flatten (list x)))] > > (loop [markov-chain {} words sentences] > > (if (seq words) > > (recur (merge-with flatten-list > > markov-chain > > (chain (re-split #"\s+" (first words)))) > > (rest words)) > > markov-chain))))) > > > (defn construct-sentence > > "Build a sentence from a markov chain structure. Given a > > Markov chain (any size key), Seed (to start the sentence) and > > Proc (a function for choosing the next word), returns a sentence > > composed until is reaches the end of a chain (an end of sentence)." > > ([markov-chain] > > (construct-sentence markov-chain nil rand-nth)) > > ([markov-chain seed] > > (construct-sentence markov-chain seed rand-nth)) > > ([markov-chain seed proc] > > (loop [words (if seed seed (rand-nth (keys markov-chain))) > > sentence (str-join " " (filter identity words))] > > (if (seq (markov-chain words)) > > (let [word-new (proc (markov-chain words))] > > (recur (concat (rest words) [word-new]) > > (str-join " " (into [sentence] [word-new])))) > > sentence)))) > > > Example usage: > > > (ns main (use markov)) > > (def markov (file-chain "draclock.txt")) > > (doseq [x (range 100)] > > (doseq [x (range 3)] (println (construct-sentence markov))) > > (println)) > > > Input > > files:http://www.gutenberg.org/files/345/345.txt-draculahttp://www.gutenberg.org/dirs/etext99/advsh12.txt-sherlock > > holmes > > > I just cat them together to make draclock.txt ;-) > > > Cheers! > > Travis --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "Clojure" group. To post to this group, send email to clojure@googlegroups.com To unsubscribe from this group, send email to clojure+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/clojure?hl=en -~----------~----~----~----~------~----~------~--~---