Could you throw this on github, so we can easily follow along with improvements?
On Mar 22, 8:25 pm, "zoglma...@gmail.com" <zoglma...@gmail.com> wrote: > While playing around and implementing straight up Humman compression, > I wrote a handful of utilities to conveinently play with byte and bit > streams because I didn't see anything too helpful in the mmap and > duck_stream files. > > What I wrote would need to be changed to better work with the existing > code and to increase performance, but does anyone think that it would > be helpful to add these things for playing with binary data into > contrib? > > ; to exercise everything -- use this to copy a file -- obviously > change the files > (time (to-file "/Users/kaz/Desktop/Programming Clojure-copy.pdf" (bit- > to-byte-stream (byte-to-bit-stream (to-byte-stream "/Users/kaz/Desktop/ > Programming Clojure.pdf"))))) > > ; to play with Huffman compress/decompression > (compress-file "/Users/kaz/Desktop/onlisp.pdf" "/Users/kaz/Desktop/ > onlisp.pdf.compress") > (uncompress-file "/Users/kaz/Desktop/onlisp.pdf.compress" "/Users/kaz/ > Desktop/onlisp2.pdf") > > ;-------------------------------------------------------------------------- > ---------------------- > ;-------------- setup files to act like streams at the bit level > ------------- > ;-------------------------------------------------------------------------- > ---------------------- > (defn array-to-list [arr list-size] > (loop [index 0 accum []] > (if (or (= index (alength arr)) (= index list-size)) > accum > (recur (inc index) (conj accum (aget arr index)))))) > > (defn list-to-array [l array-type arr-size] > (let [arr-size (min arr-size (count l)) > arr (make-array array-type arr-size)] > (loop [index 0 l l] > (if (= index arr-size) > arr > (do > (aset arr index (first l)) > (recur (inc index) (rest l))))))) > > (defn num-to-bits [num size] > (loop [num num size size accum nil] > (if (= size 0) > accum > (let [next-n (bit-shift-right num 1) > next-bit (bit-xor num (bit-shift-left next-n 1))] > (recur next-n (dec size) (cons next-bit accum)))))) > > (import '(java.io FileInputStream FileOutputStream)) > (defn to-byte-stream [filename] > (let [bufsize 65536 > is (FileInputStream. filename) > read-buf > (fn [] > (let [buf (make-array (Byte/TYPE) bufsize) > num-read (.read is buf 0 bufsize)] > (if (= num-read -1) > (do > (.close is) > nil) > (array-to-list buf num-read)))) > read-all > (fn read-all [] > (let [next-buf (read-buf)] > (if (nil? next-buf) > nil > (lazy-cat next-buf (read-all)))))] > (read-all))) > > (defn byte-to-bit-stream [l] > (if (nil? l) > nil > (lazy-cat (num-to-bits (first l) 8) (byte-to-bit-stream (rest > l))))) > > (defn bits-to-num [l num-bit-size] > (let [cnt (count (take num-bit-size l))] > (if (< cnt num-bit-size) > (bits-to-num (lazy-cat l (take (- num-bit-size cnt) (repeat > 0))) num-bit-size) > (loop [l l size num-bit-size accum 0] > (if (= size 0) > (list accum l) > (recur (rest l) (dec size) (bit-or (bit-shift-left accum 1) > (first l)))))))) > > (defn bit-to-byte-stream [l] > (if (nil? l) > nil > (let [[next-byte rst] (bits-to-num l 8)] > (lazy-cons next-byte (bit-to-byte-stream rst))))) > > (defn to-file [filename byte-stream] > (let [os (FileOutputStream. filename)] > (loop [bytes byte-stream bytes-written 0] > (if (nil? bytes) > (do > (.close os) > bytes-written) > (let [[bytes rest-bytes] (split-at 65536 bytes) > buf (list-to-array (map byte bytes) (Byte/TYPE) 65536) > wrote-now (alength buf)] > (do > (.write os buf 0 wrote-now) > (recur rest-bytes (+ bytes-written wrote-now)))))))) > > ;-------------------------------------------------------------------------- > ---------------------- > ;-------------- Using byte/bit streams to do huffman ------------- > ;-------------------------------------------------------------------------- > ---------------------- > > (defn get-occurances [l] > (loop [l l accum {}] > (if (empty? l) > accum > (let [next (first l) > current-count (if (nil? (accum next)) 0 (accum next)) > next-accum (assoc accum next (inc current-count))] > (recur (rest l) next-accum))))) > > (defn tree-has-children? [tree] > (and (not (nil? tree)) (or (not (nil? (tree :left-child))) (not (nil? > (tree :right-child)))))) > (defn tree-has-lchild? [tree] > (and (not (nil? tree)) (not (nil? (tree :left-child))))) > (defn tree-has-rchild? [tree] > (and (not (nil? tree)) (not (nil? (tree :right-child))))) > (defn tree-count [tree] > (if (nil? tree) > 0 > (+ 1 > (if (tree-has-lchild? tree) (tree-count (tree :left-child)) 0) > (if (tree-has-rchild? tree) (tree-count (tree :right-child)) > 0)))) > > (defn get-hufftree [occurances] > (let [build (fn build [occurances] > (let [sorted-occurances (sort-by (fn [[tree val num-occur]] > num- > occur) occurances)] > (cond (nil? sorted-occurances) > nil > (nil? (frest sorted-occurances)) > (let [[tree val occur] (first sorted-occurances)] > (if (nil? tree) > {:type :leaf :data val :num-occurances occur} > tree)) > :else > (let [[tree1 val1 occur1] (first sorted-occurances) > tree1 (if (nil? tree1) {:type :leaf :data val1 > :num- > occurances occur1} tree1) > [tree2 val2 occur2] (frest sorted-occurances) > tree2 (if (nil? tree2) {:type :leaf :data val2 > :num- > occurances occur2} tree2) > combined-occur (+ occur1 occur2) > tree1-cnt (tree-count tree1) > tree2-cnt (tree-count tree2) > left-tree (cond (and (= tree1-cnt tree2-cnt) (= > :leaf > (tree1 :type)) (= :leaf (tree2 :type))) > (if (> occur1 occur2) > tree2 > tree1) > (> tree1-cnt tree2-cnt) > tree1 > :else > tree2) > right-tree (if (= left-tree tree1) tree2 tree1) > combined-tree {:type :node > :num-occurances combined-occur > :left-child left-tree > :right-child right-tree} > lst (cons (list combined-tree nil > combined-occur) (rrest > sorted-occurances))] > (build lst)))))] > (build (map #(cons nil %1) occurances)))) > > (defn print-tree [tree] > (if (nil? tree) > (println "tree is nil") > (do > (println "data" (tree :data)) > (println "--right-child--") > (print-tree (tree :right-child)) > (println "--left-child--") > (print-tree (tree :left-child))))) > > (defn map-tree-preorder [f tree] > (cond (nil? tree) > nil > (= :leaf (tree :type)) > (list (f tree)) > :else > (let [accum-1 (map-tree-preorder f (tree :left-child)) > accum-2 (lazy-cat accum-1 (map-tree-preorder f (tree :right- > child))) > accum-3 (lazy-cat accum-2 (list (f tree)))] > accum-3))) > > (defn tree-pre-index [root tree] > (loop [l (map-tree-preorder identity root) cnt 0] > (cond (nil? l) > nil > (= (first l) tree) > cnt > :else > (recur (rest l) (inc cnt))))) > > (defn hufftree-to-bits [htree] > (let [leaf-as-bits > (fn [t] > (lazy-cat (list 1) (num-to-bits (t :data) 9))) > node-as-bits > (fn [t] > (lazy-cat > (list 0) > (num-to-bits (tree-pre-index htree (t :left-child)) 9) > (num-to-bits (tree-pre-index htree (t :right-child)) 9)))] > (reduce concat nil (map-tree-preorder #(if (= :leaf (%1 :type)) (leaf- > as-bits %1) (node-as-bits %1)) htree)) > )) > > (defn bits-to-hufftree [bits num-nodes] > (loop [accum nil bits bits num-nodes num-nodes] > (if (= 0 num-nodes) > (list (last accum) bits) > (let [[[next-bit] rest-bits] (split-at 1 bits)] > (if (= 1 next-bit) > (let [[data rest-bits] (bits-to-num rest-bits 9) > leaf {:type :leaf :data data :pre-index (count accum)}] > (recur (lazy-cat accum (list leaf)) rest-bits (dec num-nodes))) > (let [[left-child-index rest-bits] (bits-to-num rest-bits 9) > left-child (nth accum left-child-index) > [right-child-index rest-bits] (bits-to-num rest-bits 9) > right-child (nth accum right-child-index) > node {:type :node :pre-index (count accum) :right-child right- > child :left-child left-child}] > (recur (lazy-cat accum (list node)) rest-bits (dec num- > nodes)))))))) > > (defn applyHuffForEntry [tree entry] > (let [path (reverse > ((fn ahfe [tree path] > (cond (nil? tree) > nil > (= (tree :data) entry) > path > :else > (let [left-path (ahfe (tree :left-child) (cons 0 path))] > (if (nil? left-path) > (ahfe (tree :right-child) (cons 1 path)) > left-path)))) tree nil))] > (if (nil? path) > (throw (IllegalArgumentException. "entry not found!")) > path))) > > (defn unapplyHuffForEntry [tree bits] > (loop [tree tree bits bits] > (let [next-bit (first bits) > right-child (if (nil? tree) nil (tree :right-child)) > left-child (if (nil? tree) nil (tree :left-child))] > (cond (nil? tree) > (throw (IllegalArgumentException. "Tree must not be null!")) > (= 1 next-bit) > (if (= :leaf (right-child :type)) > (list (right-child :data) (rest bits)) > (recur right-child (rest bits))) > (= 0 next-bit) > (if (= :leaf (left-child :type)) > (list (left-child :data) (rest bits)) > (recur left-child (rest bits))))))) > > (defn applyHuffForEntries [tree entries] > (if (nil? entries) > nil > (lazy-cat (applyHuffForEntry tree (first entries)) > (applyHuffForEntries tree (rest entries))))) > > (defn unapplyHuffForEntries [tree bits num-expect] > (cond > (= num-expect 0) > nil > (nil? bits) > (throw (IllegalArgumentException. (str "Too few bits. Still > expect: " num-expect))) > :else > (let [[data remaining-bits] (unapplyHuffForEntry tree bits)] > (lazy-cons data (unapplyHuffForEntries tree remaining-bits (dec num- > expect)))))) > > (defn compress-file [in-filename out-filename] > (let [contents (to-byte-stream in-filename) ;bad to hold in memory > byte-count (count contents) > huff-tree (get-hufftree (get-occurances contents)) > num-nodes (tree-count huff-tree) > huff-tree-bits (hufftree-to-bits huff-tree) > compress-bits (applyHuffForEntries huff-tree contents) > file-bits (lazy-cat (num-to-bits byte-count 63) (num-to-bits num- > nodes 9) huff-tree-bits compress-bits)] > (do > ; (println "original num bits" (* byte-count 8)) > ; (println "num-nodes" count) > ; (print-hufftree huff-tree) > ; (println "size huff-tree-bits" (count huff-tree-bits)) > ; (println "size compress-bits" (count compress-bits)) > (to-file out-filename (bit-to-byte-stream file-bits)) > ))) > > (defn uncompress-file [in-filename out-filename] > (let [bit-contents (byte-to-bit-stream (to-byte-stream in-filename)) > [byte-count rest-bits] (bits-to-num bit-contents 63) > [num-nodes rest-bits] (bits-to-num rest-bits 9) > [huff-tree rest-bits] (bits-to-hufftree rest-bits num-nodes) > uncompress-bytes (unapplyHuffForEntries huff-tree rest-bits byte- > count)] > (do > (to-file out-filename uncompress-bytes)))) --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "Clojure" group. To post to this group, send email to clojure@googlegroups.com To unsubscribe from this group, send email to clojure+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/clojure?hl=en -~----------~----~----~----~------~----~------~--~---