Sumitava Mukherjee wrote:
I need to randomly sample from a list where all choices have weights
attached to them. The probability of them being choosen is dependent
on the weights.
I am not sure why everybody is normalizing by dividing the weights.
This isa possibility (I had fun writing it).

def sample_without_replacement(choices, weights):
    '''Yield elements sampled w/o replacement by weighting'''
    if len(weights) != len(choices):
        raise ValueError('%d choices, but %d weights?' % (
            len(choices), len(weights)))
    if min(weights) < 0:
        raise ValueError('Negative weights?: %s' % (
                    [(i, w) for i, w in enumerate(weights) if w < 0]))

    # look at only non-zero probabilities
    combined = [(w, v) for w, v in zip(weights, choices) if w > 0]

    # Go from highest probability down to reduce expected traversal
    combined.sort(key=operator.itemgetter(0), reverse=True)

    total = sum(w for w, v in combined) # sum(weights) also works
    while combined:
        spot = sample = random.random() * total
        for n, (weight, choice) in enumerate(combined):
            spot -= weight
            if spot <= 0:
                break
        else:
            # n, (weight, choice) = 0, combined[0] # Highest probability
            raise ValueError('%f left after choosing %f/%f?: %s' % (
                                spot, sample, total, combined))
        yield choice
        total -= weight
        if weight > total * 256: # arbitrary choice for recalculation
            # precision affected, rebuild
            total = sum(w for w, v in combined)
        del combined[n]
    raise ValueError('Samplng more than %d without replacement?' % (
                      sum(1 for w in weights if w > 0)))

for n in range(10):
    gen = sample_without_replacement('abcdef', [32,16,8,4,2,1])
    print gen.next(), gen.next(), gen.next()

--Scott David Daniels
scott.dani...@acm.org
--
http://mail.python.org/mailman/listinfo/python-list

Reply via email to