Finally, I have made it ;)Trick was in more threads . For some reason if I run 
64 (sweet spot) threads program runsfaster both with -threaded and without 
;)Other trick is that I don't convert to uppercase (shaves second) rather pack 
nucleotidein 64 bit int.Program runs 30% faster multithreaded (scales better) 
than current entry, and consumes 50% less memory,and is shorter.If someone can 
see some improvement please post, otherwise I will contribute this program.

{-# Language BangPatterns #-}---- The Computer Language Benchmarks Game-- 
http://benchmarksgame.alioth.debian.org/---- Contributed by Branimir 
Maksimovic--import Data.Bitsimport Data.Charimport Data.Intimport 
Data.Listimport Data.IORefimport Data.Array.Unboxedimport Data.Array.Baseimport 
qualified Data.HashTable.IO as Himport Data.Hashableimport qualified 
Data.ByteString.Char8 as Simport Control.Concurrentimport Text.Printf
main = do    s <- S.getContents    let        (_,subs) = S.breakSubstring 
(S.pack ">THREE") s        content = (S.filter ((/=) '\n') .                    
S.dropWhile ((/=) '\n')) subs     mapM_ (execute content) actions
                    data Actions = I Int | S Stringactions = [I 1,I 2,          
 S "GGT",S "GGTA",S "GGTATT",S "GGTATTTTAATT",S "GGTATTTTAATTTATAGT"]execute 
content (I i) = writeFrequencies content iexecute content (S s) = writeCount 
content s
writeFrequencies :: S.ByteString -> Int -> IO ()writeFrequencies input size = 
do    ht <- tcalculate input size    lst <- H.foldM (\lst (k,v)->do         v' 
<- readIORef v        return $ insertBy (\(_,x) (_,y)->y `compare` x) (k,v') 
lst) [] ht    let sum = fromIntegral ((S.length input) + 1 - size)    mapM_ 
(\(k,v)-> do        printf "%s %.3f\n"             (toString k) ((100 * 
(fromIntegral v)/sum)::Double)) lst    putChar '\n'
writeCount :: S.ByteString -> String -> IO ()writeCount input string = do    
let size = length string    ht <- tcalculate input size    let k = T (toNum 
(S.pack string) 0 size) size    res <- H.lookup ht k    case res of         
Nothing -> putStrLn $ string ++ " not found..."        Just v -> do            
r <- readIORef v            printf "%d\t%s\n" r string
tcalculate :: S.ByteString -> Int -> IO HMtcalculate input size = do    let     
    l = [0..63]        actions = map (\i -> (calculate input i size (length 
l))) l    vars <- mapM (\action -> do                    var <- newEmptyMVar    
                forkIO $ do                        answer <- action             
           putMVar var answer                    return var) actions    result 
<- newTable :: IO HM    results <- mapM takeMVar vars    mapM_ (\ht -> H.foldM 
(\lst (k,v) -> do                             res <- H.lookup lst k             
               case res of                                Nothing -> do         
                           r1 <- readIORef v                                    
r2 <- newIORef r1                                    H.insert lst k r2          
                      Just v1 -> do                                    r1 <- 
readIORef v1                                    r2 <- readIORef v               
                     writeIORef v1 (r1+r2)                            return 
lst) result ht) results    return result
calculate :: S.ByteString -> Int -> Int -> Int -> IO HM    calculate input beg 
size incr = do    !ht <- newTable :: IO HM    let        calculate' i          
| i >= ((S.length input)+1 - size) = return ht         | otherwise = do         
   res <- H.lookup ht k            case res of                Nothing -> do     
               !r <- newIORef 1                    H.insert ht k r              
  Just v -> do                    !r <- readIORef v                    
writeIORef v (r+1)            calculate' (i+incr)                where k = T 
(toNum input i size) size    calculate' beg
toNum :: S.ByteString -> Int -> Int -> Int64toNum s beg size = toNum' 0 size    
where        toNum' v 0 = v        toNum' v i = toNum' ((v `shiftL` 2) .|.      
               (toNumA `unsafeAt` (ord (S.index s (beg+i-1))))) (i-1)
toString :: T -> StringtoString (T v s) = toString' v s    where        
toString' v 0 = []        toString' v i = case v.&.3 of                        
0 -> 'A'                        1 -> 'C'                        2 -> 'T'        
                3 -> 'G'                      : toString' (v `shiftR` 2) (i-1)
toNumA :: UArray Int Int64toNumA = array (0,255) [(ord 'a',0),(ord 'c',1),(ord 
't',2),(ord 'g',3),            (ord 'A',0),(ord 'C',1),(ord 'T',2),(ord 'G',3)]
data T = T !Int64 !Intinstance Eq T where    (T a _) == (T b _) = a == 
binstance Hashable T where    hashWithSalt _ (T a _) = fromIntegral a
type HM = H.BasicHashTable T (IORef Int)newTable = H.new

Date: Sun, 24 Mar 2013 20:12:57 +0100
Subject: Re: [Haskell-cafe] my take at knucleotide
From: g...@gregorycollins.net
To: bm...@hotmail.com
CC: haskell-cafe@haskell.org

What happens to performance if you compile and link with "cabal install 
--constraint='hashable < 1.2'" ?
G


                                          

_______________________________________________
Haskell-Cafe mailing list
Haskell-Cafe@haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe                            
          
_______________________________________________
Haskell-Cafe mailing list
Haskell-Cafe@haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe

Reply via email to