lists: > The code below is using way more RAM than it should. It seems to only > take so long when I build the 'programs' list - the actual > reading/parsing is fast. For a 5MB input file, it's using 50MB of RAM! > Any idea how to combat this? > > Thanks, > Lyle > > {-# OPTIONS_GHC -fglasgow-exts #-} > > -- linear_importer.hs > > import Control.Monad (unless) > import Data.ByteString.Char8 (ByteString) > import qualified Data.ByteString.Char8 as BS > import Text.Regex as RE > import Data.Time.Calendar > import Data.Time.LocalTime > import System.Environment > > -- Consider adding strictness as necessary for performance. > > -- STB time channel > data ChannelChange = ChannelChange Int LocalTime Int > deriving Show > > -- channel program_no start > end network_no > data ScheduleProgram = ScheduleProgram Int Int LocalTime > LocalTime (Maybe Int) > deriving Show > > main = do > fileNames <- getArgs > ensure (length fileNames == 2) usageMessage > let [eventFileName,programFileName] = fileNames > putStrLn ("Reading program schedule file from '" ++ programFileName > ++ "'...") > text <- BS.readFile programFileName > programs <- sequence $ map parseScheduleProgram (BS.lines text) > print (take 20 programs) > return () > > usageMessage = "Usage: linear_importer <channel change file> <schedule > file>" > > parseScheduleProgram :: ByteString -> IO ScheduleProgram > parseScheduleProgram s = do > let fields = BS.split '|' s > ensure (length fields == 7) ("Wrong number of fields in schedule > program: " ++ BS.unpack s) > let > [_,channelNoText,programNoText,_,startTimeText,endTimeText,networkNoText] > = fields > let channelNo = read $ BS.unpack channelNoText > programNo = read $ BS.unpack programNoText > startTime <- parseProgramTime startTimeText > endTime <- parseProgramTime endTimeText > let networkNo = if BS.null networkNoText then Nothing else Just > (read (BS.unpack networkNoText)) > return $ ScheduleProgram channelNo programNo startTime endTime networkNo > > parseProgramTime :: ByteString -> IO LocalTime > parseProgramTime s = do > let parts = BS.split 'T' s > ensure (length parts == 2) > ("Expected exactly one T in eventChannelChange time: " ++ > BS.unpack s) > let [datePart,timePart] = parts > ensure (BS.length datePart == 8) > ("Expected 8 digits in date part of eventChannelChange time: " > ++ BS.unpack s) > let (yearPart, monthDayPart) = BS.splitAt 4 datePart > (monthPart, dayPart) = BS.splitAt 2 monthDayPart > year = read $ BS.unpack yearPart > month = read $ BS.unpack monthPart > day = read $ BS.unpack dayPart > let date = fromGregorian year month day > ensure (BS.length timePart == 6) > ("Expected 6 digits in time part of eventChannelChange time: " > ++ BS.unpack s) > let (hoursPart,minutesSecondsPart) = BS.splitAt 2 timePart > (minutesPart,secondsPart) = BS.splitAt 2 minutesSecondsPart > hours = read $ BS.unpack hoursPart > minutes = read $ BS.unpack minutesPart > seconds = read $ BS.unpack secondsPart > let time = TimeOfDay hours minutes (fromInteger seconds) > return (LocalTime date time)
Argh, all those: read .unpacks are going to be painful. Consider using Data.ByteString.Char8.readInt/Integer -- Don _______________________________________________ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe