On Wed, May 31, 2006 at 11:41:27AM -0700, David Miller ([EMAIL PROTECTED]) 
wrote:
> > >   Worse: he folded the jenkins algorith result with
> > > 
> > >    h ^= h >> 16;
> > >    h ^= h >> 8;
> > > 
> > >   Destroying the coverage of the function.
> > 
> > It was done to simulate socket code which uses the same folding.
> > Leaving 32bit space is just wrong, consider hash table size with that
> > index.
> 
> You absolutely show not do this shifting on the jenkins hash
> result, you destroy the distribution entirely.  Just mask
> it with the hash mask and that's all you need to do.
> 
> Brian is right, this is absolutely critical to using the Jenkins hash
> correctly.  You're "unmixing" the bits it worked so hard to mix.

That is wrong. And I have a code and picture to show that, 
and you dont - prove me wrong :)

Attached code and picture which shows _exactly_ the same distribution for
folded and not folded Jenkins hash distribution, and it's artifact
compared to XOR hash.

Fairly distributed values being XORed produce still fairly distributed
value.

-- 
        Evgeniy Polyakov

Attachment: hash_comparison.png
Description: PNG image

#include <stdlib.h>
#include <stdio.h>
#include <errno.h>

typedef unsigned int u32;
typedef unsigned short u16;
typedef unsigned char u8;

typedef unsigned int __u32;
typedef unsigned short __u16;
typedef unsigned char __u8;

#include "jhash.h"

struct hash_entry
{
        unsigned long           counter;
};

static inline num2ip(__u8 a1, __u8 a2, __u8 a3, __u8 a4)
{
        __u32 a = 0;

        a |= a1;
        a << 8;
        a |= a2;
        a << 8;
        a |= a3;
        a << 8;
        a |= a4;

        return a;
}

static inline __u8 get_random_byte(void)
{
        return 1 + (int) (255.0 * (rand() / (RAND_MAX + 1.0)));
}

static inline __u16 get_random_word(void)
{
        return 1 + (int) (65536.0 * (rand() / (RAND_MAX + 1.0)));
}

unsigned int hash_addr(__u32 faddr, __u16 fport, __u32 laddr, __u16 lport)
{
        unsigned int h = (laddr ^ lport) ^ (faddr ^ fport);
        h ^= h >> 16;
        h ^= h >> 8;
        return h;
}

unsigned int hash_addr1(__u32 faddr, __u16 fport, __u32 laddr, __u16 lport)
{
        u32 ports;
        unsigned int h;

        ports = lport;
        ports <<= 16;
        ports |= fport;

        h = jhash_2words(faddr, laddr, ports);
        //h ^= h >> 16;
        //h ^= h >> 8;

        return h;
}

int main()
{
        struct hash_entry *table;
        __u32 saddr, daddr;
        __u16 sport, dport;
        unsigned int hash, i, *results;
        unsigned int hash_size = 65536, iter_num = 100;

        table = malloc(hash_size * sizeof(struct hash_entry));
        if (!table)
                return -ENOMEM;
        
        results = malloc(hash_size * sizeof(unsigned int));
        if (!results)
                return -ENOMEM;

        for (i=0; i<hash_size; ++i) {
                results[i] = 0;
                table[i].counter = 0;
        }

        srand(time(NULL));

        daddr = num2ip(192, 168, 0, 1);
        dport = htons(80);
        
        for (i=0; i<hash_size*iter_num; ++i) {
                saddr = num2ip(get_random_byte(), get_random_byte(), 
get_random_byte(), get_random_byte());
                sport = get_random_word();

                hash = hash_addr(saddr, sport, daddr, dport);
                hash &= (hash_size - 1);

                table[hash].counter++;
        }

        for (i=0; i<hash_size; ++i)
                results[table[i].counter]++;
        
        for (i=0; i<hash_size; ++i)
                printf("%u %u\n", i, results[i]);
                //printf("%u %u\n", i, table[i].counter);
}

Reply via email to