On 2012-01-20 23:44, Gwern Branwen wrote:
On Fri, Jan 20, 2012 at 1:57 PM, Twan van Laarhoven<[email protected]>  wrote:
Here is some example code (untested):

Well, you're right that it doesn't work. I tried to fix the crucial
function, 'atLeastThisManyDescendants', but it's missing something
because varying parts doesn't much affect the results when I try it
out on example input - it either returns everything or nothing, it
seems:
atLeastThisManyDescendants :: Int -> Trie a -> [CommonPrefix a]
atLeastThisManyDescendants minD trie@(Trie l d t')
   | d < minD = []
   | null forChildren = [Prefix [] trie]
   | otherwise = forChildren
 where
   forChildren = [ Prefix (x:pfx) nms
                 | (x,t) <- Map.toList t'
                 , Prefix pfx nms <- atLeastThisManyDescendants l t ]

It should be "atLeastThisManyDescendants minD t", minD is a threshold for the minimum numer of descendants, and it stays the same in the recursive call.

That's what you get for not testing your code :)

With the correct function I get a result like:

    *Main> mapM_ (print . prefix) $ atLeastThisManyDescendants 4 test1
    "gumi-"
    "luka-"
    "miku-a"
    "miku-h"
    "miku-m"
    "miku-n"
    "miku-p"
    "miku-r"
    "miku-s"
    "miku-t"
    "rin-"

Notice that there are lots of "miku-X" prefixes found. This is probably not what you want. What exactly do you want the algorithm to do? For example, "" is obviously a prefix of every string, but it is not very long. On the other hand, each string is a prefix of itself, but that prefix is shared by only one string (usually).

By the way, the sort and compare adjacent pairs approach corresponds to "atLeastThisManyDescendants 2".

Twan
import qualified Data.Map as Map

-- A trie datatype
data Trie a = Trie { numLeafs, numDescendant :: !Int
                   , children :: Map.Map a (Trie a) }

instance (Show a) => Show (Trie a) where
    showsPrec _ t = showString "fromList " . shows (toList t)

-- The empty trie
empty :: Trie a
empty = Trie 0 0 Map.empty

-- A trie that contains a single string
singleton :: Ord a => [a] -> Trie a
singleton [] = Trie 1 1 Map.empty
singleton (x:xs) = Trie 0 1 (Map.singleton x (singleton xs))

-- Merge two tries
merge :: Ord a => Trie a -> Trie a -> Trie a
merge (Trie l d c) (Trie l' d' c')
    = Trie (l+l') (d+d') (Map.unionWith merge c c')

fromList :: Ord a => [[a]] -> Trie a
fromList = foldr merge empty . map singleton

toList :: Trie a -> [[a]]
toList (Trie l _ c)
    = replicate l []
    ++ [ x:xs | (x,t) <- Map.toList c, xs <- toList t ]

data CommonPrefix a = Prefix { prefix :: [a], names :: Trie a }

instance (Show a) => Show (CommonPrefix a) where
    showsPrec _ (Prefix p ns) = shows p . showString " ++ " . shows (toList ns)

-- Find prefixes that have at least minD descendants.
-- when there is a prefix xs with >=minD descendants, then shorter prefixes 
will not be returned
atLeastThisManyDescendants :: Int -> Trie a -> [CommonPrefix a]
atLeastThisManyDescendants minD trie@(Trie l d c)
    | d < minD = [] -- too few descendants
    | null forChildren = [Prefix [] trie] -- all longer prefixes have too few 
descendants, but this prefix doesn't
    | otherwise = forChildren -- there are longer prefixes with enough 
descendants, return them
  where
    forChildren = [ Prefix (x:pfx) names
                  | (x,t) <- Map.toList c
                  , Prefix pfx names <- atLeastThisManyDescendants minD t ]

test1 = fromList
  ["chorus-kiminoshiranaimonogatari.ogg"
  ,"chorus-mrmusic.ogg"
  ,"choucho-lastnightgoodnight.ogg"
  ,"dylanislame-aikotoba.ogg"
  ,"electriclove-エレクトリック・ラブ-korskremix.ogg"
  ,"gumi-bacon8-justhangingaround.ogg"
  ,"gumi-iapologizetoyou.ogg"
  ,"gumi-montblanc.ogg"
  ,"gumi-mozaikrole.ogg"
  ,"gumi-ハッピーシンセサイザ.ogg"
  ,"gumi-showasengirl.ogg"
  ,"gumi-sweetfloatflatsスイートフロートアパート.ogg"
  ,"gumi-timewarpedafterchoppingmystagbeetle.ogg"
  ,"gumi-オリジナル曲-付きホシメグリ.ogg"
  ,"gumi-ミクオリジナル親友.ogg"
  ,"kaito-byakkoyano.ogg"
  ,"kaito-flowertail.ogg"
  ,"kasaneteto-tam-ochamekinou重音テト吹っ切れたおちゃめ機能.ogg"
  ,"len-crime-timetosaygoodbye.ogg"
  ,"len-fire◎flower.ogg"
  ,"len-ponponpon.ogg"
  ,"lily-prototype.ogg"
  ,"luka-apolxcore-waitingforyou.ogg"
  ,"luka-dimトロイ.ogg"
  ,"luka-dion-myheartwillgoon.ogg"
  ,"luka-dirgefilozofio-dirgeasleepinjesus.ogg"
  ,"luka-アゴアニキ-doubelariatダブルラリアット.ogg"
  ,"luka-emon-heartbeats.ogg"
  ,"luka-emonloid3-ハローハロー.ogg"
  ,"luka-everybreathyoutake.ogg"
  ,"luka-オリジナル-garden.ogg"
  ,"luka-justbefriends.ogg"
  ,"lukameiko-gemini.ogg"
  ,"luka-milkyway.ogg"
  ,"luka-やみくろ-かいぎ.ogg"
  ,"luka-tic-tick.ogg"
  ,"luka-torinouta.ogg"
  ,"luka-zeijakukei-shounenshoujo.ogg"
  ,"luka-勝手にアニメ-nologic-作ってみた.ogg"
  ,"luka-駄目人間.ogg"
  ,"meiko-artemis-awake.ogg"
  ,"miku-9ronicleプラチナ.ogg"
  ,"miku-acolorlinkingworld-この世界の下で.ogg"
  ,"miku-acolorlinkingworld-青い花.ogg"
  ,"miku-a+jugos-lullabyforkindness.ogg"
  ,"miku-akayaka-beacon.ogg"
  ,"miku-akayakap-sunrise.ogg"
  ,"miku-aoihana.ogg"
  ,"miku-arabianresponse.ogg"
  ,"miku-avtechno-tear.ogg"
  ,"miku-こえをきかせてcicci.ogg"
  ,"miku-cleantears-remind2011natsu-greenhillzonecrystiararemix.ogg"
  ,"miku-cleantears-remind2011natsu-夏影summerwindremix.ogg"
  ,"miku-clocklockworks.ogg"
  ,"miku-dancedancevol2-runner.ogg"
  ,"miku-daniwellp-chaoticuniverse.ogg"
  ,"miku-dixieflatline-shinonomescrumble.ogg"
  ,"miku-electricloveエレクトリックラヴ.ogg"
  ,"miku-elegumitokyo-kissmebaby.ogg"
  ,"miku-galaxyodyssey-cryingirl.ogg"
  ,"miku-galaxyodyssey-galaxyspacelines.ogg"
  ,"miku-hakamairi.ogg"
  ,"miku-haruna.ogg"
  ,"miku-heartshooter.ogg"
  ,"miku-hoshikuzutokakera.ogg"
  ,"miku-innes.ogg"
  ,"miku-innocence初音ミク.ogg"
  ,"miku-jemappelle-motion-likeyou.ogg"
  ,"miku-jemappelle-motion-ohwell.ogg"
  ,"miku-jevannip-myfavoritesummer.ogg"
  ,"miku-kakokyuudance-過呼吸ダンス.ogg"
  ,"miku-kz-packaged.ogg"
  ,"miku-kz-tellyourworld.ogg"
  ,"miku-lastscene.ogg"
  ,"miku-lostmemories付き-初音ミク.ogg"
  ,"miku-lovelyday.ogg"
  ,"miku-いいわけlove_song.ogg"
  ,"mikulukagumi-prayfor.ogg"
  ,"miku-maple-初音ミク楓-オリジナル曲.ogg"
  ,"miku-more1.5.ogg"
  ,"[email protected]"
  ,"[email protected]"
  ,"miku-nana-ボーナストラック-ハッピー般若コア.ogg"
  ,"miku-nekomimiswitch.ogg"
  ,"miku-nightrainbow.ogg"
  ,"miku-noyounome.ogg"
  ,"miku-むかしむかしのきょうのぼくオリジナル.ogg"
  ,"miku-pandolistp-neverendinghammertime.ogg"
  ,"miku-ジラートP-birthdayofeden-deepsleep.ogg"
  ,"miku-ジラートP-birthdayofeden-水中読書.ogg"
  ,"miku-plustellia-dear.ogg"
  ,"miku-plustellia-壁の彩度-crazygirl.ogg"
  ,"miku-plustellia-壁の彩度-discoradio.ogg"
  ,"miku-ぽわぽわP-ストロボライト.ogg"
  ,"miku-rabbitforgets.ogg"
  ,"miku-re:package-lastnightgoodnight.ogg"
  ,"miku-re:package-ourmusic.ogg"
  ,"miku-re:package-sutorobonaitsu.ogg"
  ,"miku-rollinggirl.ogg"
  ,"miku-ryo-メルト-melt.ogg"
  ,"miku-senseiniitteyaro.ogg"
  ,"miku-sevencolors-レモネード.ogg"
  ,"miku-shoukinosatadenia.ogg"
  ,"miku-stratosphere.ogg"
  ,"miku-supernova.ogg"
  ,"miku-tam-lastnightgoodnight.ogg"
  ,"miku-tanatofobia.ogg"
  ,"miku-thearmyforyourenvy-スーパー・ノヴァ.ogg"
  ,"miku-theendlesslove.ogg"
  ,"miku-tinyparadise-snowflake.ogg"
  ,"miku-tinyparadise-tinyparadise.ogg"
  ,"miku-unfragment.ogg"
  ,"miku-worldismine-ルドイズマイン.ogg"
  ,"miku-yakiimo.ogg"
  ,"miku-文学少年の憂鬱-オリジナル.ogg"
  ,"miku-カラフルポップビートオリジナル曲.ogg"
  ,"miku-杯本選life.ogg"
  ,"miku-杯本選初音ミクどういうことなのダンス.ogg"
  ,"miku-般若心経beautyfloor-buddhamix.ogg"
  ,"miku-般若心経ポップ.ogg"
  ,"niconicochorus-blackrockshooter.ogg"
  ,"niconicochorus-justbefriends.ogg"
  ,"rin-dixieflatline-gemini.ogg"
  ,"rin-elegumitokyo-二人、恋してgirlsside.ogg"
  ,"rin-helloworld.ogg"
  ,"rin-jutenija.ogg"
  ,"rin-lastnightgoodnight.ogg"
  ,"rin-ripples-evergreen.ogg"
  ,"rin-っ´ω`c.ogg"
  ,"rollinggirl-piano.ogg"
  ,"seeu-gagain-따라리라ddadada.ogg"
  ,"utau-雪歌ユフbeyondオリジナル曲.ogg"
  ,"yuki-discochocolatheque.ogg"
  ,"yuki-shouwasenhosiga^ru.ogg"
  ,"yuki-shouwasenhosiga^ru.ogg"]

_______________________________________________
Haskell-Cafe mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/haskell-cafe

Reply via email to