On 2012-01-20 23:44, Gwern Branwen wrote:
On Fri, Jan 20, 2012 at 1:57 PM, Twan van Laarhoven<[email protected]> wrote:
Here is some example code (untested):
Well, you're right that it doesn't work. I tried to fix the crucial
function, 'atLeastThisManyDescendants', but it's missing something
because varying parts doesn't much affect the results when I try it
out on example input - it either returns everything or nothing, it
seems:
atLeastThisManyDescendants :: Int -> Trie a -> [CommonPrefix a]
atLeastThisManyDescendants minD trie@(Trie l d t')
| d < minD = []
| null forChildren = [Prefix [] trie]
| otherwise = forChildren
where
forChildren = [ Prefix (x:pfx) nms
| (x,t) <- Map.toList t'
, Prefix pfx nms <- atLeastThisManyDescendants l t ]
It should be "atLeastThisManyDescendants minD t", minD is a threshold for the
minimum numer of descendants, and it stays the same in the recursive call.
That's what you get for not testing your code :)
With the correct function I get a result like:
*Main> mapM_ (print . prefix) $ atLeastThisManyDescendants 4 test1
"gumi-"
"luka-"
"miku-a"
"miku-h"
"miku-m"
"miku-n"
"miku-p"
"miku-r"
"miku-s"
"miku-t"
"rin-"
Notice that there are lots of "miku-X" prefixes found. This is probably not what
you want. What exactly do you want the algorithm to do? For example, "" is
obviously a prefix of every string, but it is not very long. On the other hand,
each string is a prefix of itself, but that prefix is shared by only one string
(usually).
By the way, the sort and compare adjacent pairs approach corresponds to
"atLeastThisManyDescendants 2".
Twan
import qualified Data.Map as Map
-- A trie datatype
data Trie a = Trie { numLeafs, numDescendant :: !Int
, children :: Map.Map a (Trie a) }
instance (Show a) => Show (Trie a) where
showsPrec _ t = showString "fromList " . shows (toList t)
-- The empty trie
empty :: Trie a
empty = Trie 0 0 Map.empty
-- A trie that contains a single string
singleton :: Ord a => [a] -> Trie a
singleton [] = Trie 1 1 Map.empty
singleton (x:xs) = Trie 0 1 (Map.singleton x (singleton xs))
-- Merge two tries
merge :: Ord a => Trie a -> Trie a -> Trie a
merge (Trie l d c) (Trie l' d' c')
= Trie (l+l') (d+d') (Map.unionWith merge c c')
fromList :: Ord a => [[a]] -> Trie a
fromList = foldr merge empty . map singleton
toList :: Trie a -> [[a]]
toList (Trie l _ c)
= replicate l []
++ [ x:xs | (x,t) <- Map.toList c, xs <- toList t ]
data CommonPrefix a = Prefix { prefix :: [a], names :: Trie a }
instance (Show a) => Show (CommonPrefix a) where
showsPrec _ (Prefix p ns) = shows p . showString " ++ " . shows (toList ns)
-- Find prefixes that have at least minD descendants.
-- when there is a prefix xs with >=minD descendants, then shorter prefixes
will not be returned
atLeastThisManyDescendants :: Int -> Trie a -> [CommonPrefix a]
atLeastThisManyDescendants minD trie@(Trie l d c)
| d < minD = [] -- too few descendants
| null forChildren = [Prefix [] trie] -- all longer prefixes have too few
descendants, but this prefix doesn't
| otherwise = forChildren -- there are longer prefixes with enough
descendants, return them
where
forChildren = [ Prefix (x:pfx) names
| (x,t) <- Map.toList c
, Prefix pfx names <- atLeastThisManyDescendants minD t ]
test1 = fromList
["chorus-kiminoshiranaimonogatari.ogg"
,"chorus-mrmusic.ogg"
,"choucho-lastnightgoodnight.ogg"
,"dylanislame-aikotoba.ogg"
,"electriclove-ã¨ã¬ã¯ããªãã¯ã»ã©ã-korskremix.ogg"
,"gumi-bacon8-justhangingaround.ogg"
,"gumi-iapologizetoyou.ogg"
,"gumi-montblanc.ogg"
,"gumi-mozaikrole.ogg"
,"gumi-ãããã¼ã·ã³ã»ãµã¤ã¶.ogg"
,"gumi-showasengirl.ogg"
,"gumi-sweetfloatflatsã¹ã¤ã¼ãããã¼ãã¢ãã¼ã.ogg"
,"gumi-timewarpedafterchoppingmystagbeetle.ogg"
,"gumi-ãªãªã¸ãã«æ²-ä»ããã·ã¡ã°ãª.ogg"
,"gumi-ãã¯ãªãªã¸ãã«è¦ªå.ogg"
,"kaito-byakkoyano.ogg"
,"kaito-flowertail.ogg"
,"kasaneteto-tam-ochamekinouéé³ããå¹ã£åãããã¡ããæ©è½.ogg"
,"len-crime-timetosaygoodbye.ogg"
,"len-fireâflower.ogg"
,"len-ponponpon.ogg"
,"lily-prototype.ogg"
,"luka-apolxcore-waitingforyou.ogg"
,"luka-dimããã¤.ogg"
,"luka-dion-myheartwillgoon.ogg"
,"luka-dirgefilozofio-dirgeasleepinjesus.ogg"
,"luka-ã¢ã´ã¢ãã-doubelariatããã«ã©ãªã¢ãã.ogg"
,"luka-emon-heartbeats.ogg"
,"luka-emonloid3-ããã¼ããã¼.ogg"
,"luka-everybreathyoutake.ogg"
,"luka-ãªãªã¸ãã«-garden.ogg"
,"luka-justbefriends.ogg"
,"lukameiko-gemini.ogg"
,"luka-milkyway.ogg"
,"luka-ãã¿ãã-ããã.ogg"
,"luka-tic-tick.ogg"
,"luka-torinouta.ogg"
,"luka-zeijakukei-shounenshoujo.ogg"
,"luka-åæã«ã¢ãã¡-nologic-ä½ã£ã¦ã¿ã.ogg"
,"luka-é§ç®äººé.ogg"
,"meiko-artemis-awake.ogg"
,"miku-9ronicleãã©ãã.ogg"
,"miku-acolorlinkingworld-ãã®ä¸çã®ä¸ã§.ogg"
,"miku-acolorlinkingworld-éãè±.ogg"
,"miku-a+jugos-lullabyforkindness.ogg"
,"miku-akayaka-beacon.ogg"
,"miku-akayakap-sunrise.ogg"
,"miku-aoihana.ogg"
,"miku-arabianresponse.ogg"
,"miku-avtechno-tear.ogg"
,"miku-ããããããã¦cicci.ogg"
,"miku-cleantears-remind2011natsu-greenhillzonecrystiararemix.ogg"
,"miku-cleantears-remind2011natsu-å¤å½±summerwindremix.ogg"
,"miku-clocklockworks.ogg"
,"miku-dancedancevol2-runner.ogg"
,"miku-daniwellp-chaoticuniverse.ogg"
,"miku-dixieflatline-shinonomescrumble.ogg"
,"miku-electricloveã¨ã¬ã¯ããªãã¯ã©ã´.ogg"
,"miku-elegumitokyo-kissmebaby.ogg"
,"miku-galaxyodyssey-cryingirl.ogg"
,"miku-galaxyodyssey-galaxyspacelines.ogg"
,"miku-hakamairi.ogg"
,"miku-haruna.ogg"
,"miku-heartshooter.ogg"
,"miku-hoshikuzutokakera.ogg"
,"miku-innes.ogg"
,"miku-innocenceåé³ãã¯.ogg"
,"miku-jemappelle-motion-likeyou.ogg"
,"miku-jemappelle-motion-ohwell.ogg"
,"miku-jevannip-myfavoritesummer.ogg"
,"miku-kakokyuudance-éå¼å¸ãã³ã¹.ogg"
,"miku-kz-packaged.ogg"
,"miku-kz-tellyourworld.ogg"
,"miku-lastscene.ogg"
,"miku-lostmemoriesä»ã-åé³ãã¯.ogg"
,"miku-lovelyday.ogg"
,"miku-ããããlove_song.ogg"
,"mikulukagumi-prayfor.ogg"
,"miku-maple-åé³ãã¯æ¥-ãªãªã¸ãã«æ².ogg"
,"miku-more1.5.ogg"
,"[email protected]"
,"[email protected]"
,"miku-nana-ãã¼ãã¹ãã©ãã¯-ãããã¼è¬è¥ã³ã¢.ogg"
,"miku-nekomimiswitch.ogg"
,"miku-nightrainbow.ogg"
,"miku-noyounome.ogg"
,"miku-ããããããã®ãããã®ã¼ããªãªã¸ãã«.ogg"
,"miku-pandolistp-neverendinghammertime.ogg"
,"miku-ã¸ã©ã¼ãP-birthdayofeden-deepsleep.ogg"
,"miku-ã¸ã©ã¼ãP-birthdayofeden-æ°´ä¸èªæ¸.ogg"
,"miku-plustellia-dear.ogg"
,"miku-plustellia-å£ã®å½©åº¦-crazygirl.ogg"
,"miku-plustellia-å£ã®å½©åº¦-discoradio.ogg"
,"miku-ã½ãã½ãP-ã¹ãããã©ã¤ã.ogg"
,"miku-rabbitforgets.ogg"
,"miku-re:package-lastnightgoodnight.ogg"
,"miku-re:package-ourmusic.ogg"
,"miku-re:package-sutorobonaitsu.ogg"
,"miku-rollinggirl.ogg"
,"miku-ryo-ã¡ã«ã-melt.ogg"
,"miku-senseiniitteyaro.ogg"
,"miku-sevencolors-ã¬ã¢ãã¼ã.ogg"
,"miku-shoukinosatadenia.ogg"
,"miku-stratosphere.ogg"
,"miku-supernova.ogg"
,"miku-tam-lastnightgoodnight.ogg"
,"miku-tanatofobia.ogg"
,"miku-thearmyforyourenvy-ã¹ã¼ãã¼ã»ãã´ã¡.ogg"
,"miku-theendlesslove.ogg"
,"miku-tinyparadise-snowflake.ogg"
,"miku-tinyparadise-tinyparadise.ogg"
,"miku-unfragment.ogg"
,"miku-worldismine-ã«ãã¤ãºãã¤ã³.ogg"
,"miku-yakiimo.ogg"
,"miku-æå¦å°å¹´ã®æé¬±-ãªãªã¸ãã«.ogg"
,"miku-ã«ã©ãã«ããããã¼ããªãªã¸ãã«æ².ogg"
,"miku-æ¯æ¬é¸life.ogg"
,"miku-æ¯æ¬é¸åé³ãã¯ã©ããããã¨ãªã®ãã³ã¹.ogg"
,"miku-è¬è¥å¿çµbeautyfloor-buddhamix.ogg"
,"miku-è¬è¥å¿çµããã.ogg"
,"niconicochorus-blackrockshooter.ogg"
,"niconicochorus-justbefriends.ogg"
,"rin-dixieflatline-gemini.ogg"
,"rin-elegumitokyo-äºäººãæãã¦girlsside.ogg"
,"rin-helloworld.ogg"
,"rin-jutenija.ogg"
,"rin-lastnightgoodnight.ogg"
,"rin-ripples-evergreen.ogg"
,"rin-ã£Â´Ïï½c.ogg"
,"rollinggirl-piano.ogg"
,"seeu-gagain-ë°ë¼ë¦¬ë¼ddadada.ogg"
,"utau-éªæã¦ãbeyondãªãªã¸ãã«æ².ogg"
,"yuki-discochocolatheque.ogg"
,"yuki-shouwasenhosiga^ru.ogg"
,"yuki-shouwasenhosiga^ru.ogg"]
_______________________________________________
Haskell-Cafe mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/haskell-cafe