Out of pseudo-morbid curiousity I was looking at inet_ehashfn, which looks
rather like this:
static inline int inet_ehashfn_orig(const __u32 laddr, const __u16 lport,
const __u32 faddr, const __u16 fport,
const int ehash_size)
{
int h = (laddr ^ lport) ^ (faddr ^ fport);
h ^= h >> 16;
h ^= h >> 8;
return h & (ehash_size - 1);
}
What are the two lines xoring h with shifts of itself meant to be doing?
I wrote a small test program and it does not seem that removing those two lines
makes much of a difference in the resulting hash:
$ ./hash `expr 128 \* 1024` 192.168.1.1 80 255.255.255.255 49152 131072
[ first arg sets size of hash table, second is the laddr, third is the lport,
fourth is the faddr -- -1 means generate random IPs, fifth is the starting fport
number and sixth is the number of things to stick into the hash...]
Hello, about to run the hash functions with:
size 131072 laddr c0a80101 lport 80 faddr ffffffff fport 49152 to 65535
131072 remotes
The orig hash went from 0 to 131071 max chain 8 used 82724 buckets
The no h shift hash went from 0 to 131071 max chain 8 used 82903 buckets
$ ./hash `expr 64 \* 1024` 192.168.1.1 80 255.255.255.255 49152 131072
Hello, about to run the hash functions with:
size 65536 laddr c0a80101 lport 80 faddr ffffffff fport 49152 to 65535
131072 remotes
The orig hash went from 0 to 65535 max chain 10 used 56637 buckets
The no h shift hash went from 0 to 65535 max chain 10 used 56697 buckets
$ ./hash `expr 32 \* 1024` 192.168.1.1 80 255.255.255.255 49152 131072
Hello, about to run the hash functions with:
size 32768 laddr c0a80101 lport 80 faddr ffffffff fport 49152 to 65535
131072 remotes
The orig hash went from 0 to 32767 max chain 13 used 32149 buckets
The no h shift hash went from 0 to 32767 max chain 14 used 32145 buckets
One thing I did notice is that if both the laddr and faddr are in the same
subnet, the hash is basically bounded by the client port numbers:
$ ./hash `expr 32 \* 1024` 192.168.1.1 80 192.168.1.2 49152 131072
Hello, about to run the hash functions with:
size 32768 laddr c0a80101 lport 80 faddr c0a80102 fport 49152 to 65535
131072 remotes
The orig hash went from 16384 to 32767 max chain 9 used 16383 buckets
The no h shift hash went from 16384 to 32767 max chain 9 used 16383 buckets
$ ./hash `expr 64 \* 1024` 192.168.1.1 80 192.168.1.2 49152 131072
Hello, about to run the hash functions with:
size 65536 laddr c0a80101 lport 80 faddr c0a80102 fport 49152 to 65535
131072 remotes
The orig hash went from 49152 to 65535 max chain 9 used 16383 buckets
The no h shift hash went from 49152 to 65535 max chain 9 used 16383 buckets
$ ./hash `expr 128 \* 1024` 192.168.1.1 80 192.168.1.2 49152 131072
Hello, about to run the hash functions with:
size 131072 laddr c0a80101 lport 80 faddr c0a80102 fport 49152 to 65535
131072 remotes
The orig hash went from 49152 to 65535 max chain 9 used 16383 buckets
The no h shift hash went from 49152 to 65535 max chain 9 used 16383 buckets
The test program isn't quite smart enough to create N IPs in the same subnet
though. There are a couple of other swags in the test program but they didn't
seem to work terribly well.
Anyhow, assuming my test program isn't fubar, are those "h shifts" really
necessary in the hash function?
rick jones
$ cat hash.c
#include <stdio.h>
#include <limits.h>
#include <stdlib.h>
#include <malloc.h>
#define __u32 uint32_t
#define __u16 uint16_t
int inet_ehashfn_orig(const __u32 laddr, const __u16 lport,
const __u32 faddr, const __u16 fport,
const int ehash_size)
{
int h = (laddr ^ lport) ^ (faddr ^ fport);
h ^= h >> 16;
h ^= h >> 8;
return h & (ehash_size - 1);
}
int inet_ehashfn_shift_port(const __u32 laddr, const __u16 lport,
const __u32 faddr, const __u16 fport,
const int ehash_size)
{
int h = (laddr ^ lport) ^ (faddr ^ (fport << 16));
h ^= h >> 16;
h ^= h >> 8;
return h & (ehash_size - 1);
}
int inet_ehashfn_no_h_shift(const __u32 laddr, const __u16 lport,
const __u32 faddr, const __u16 fport,
const int ehash_size)
{
int h = (laddr ^ lport) ^ (faddr ^ fport);
return h & (ehash_size - 1);
}
int inet_ehashfn_both(const __u32 laddr, const __u16 lport,
const __u32 faddr, const __u16 fport,
const int ehash_size)
{
int h = (laddr ^ lport) ^ (faddr ^ (fport << 16));
return h & (ehash_size - 1);
}
main(int argc, char *argv[]) {
int32_t hash_size;
uint32_t laddr,faddr;
uint16_t lport,fport;
int32_t min_hash,max_hash,max_len,buckets_used;
int32_t i, hash;
int32_t num_entries;
int *orig_hash_table;
int *shift_port_hash_table;
int *no_h_shift_table;
int *both_table;
int *random_ips;
unsigned short *random_ports;
hash_size = atoi(argv[1]);
laddr = inet_addr(argv[2]);
lport = atoi(argv[3]);
faddr = inet_addr(argv[4]);
fport = atoi(argv[5]);
num_entries = atoi(argv[6]);
orig_hash_table = malloc(hash_size * sizeof(int));
shift_port_hash_table = malloc(hash_size * sizeof(int));
no_h_shift_table = malloc(hash_size * sizeof(int));
both_table = malloc(hash_size * sizeof(int));
random_ips = malloc(num_entries * sizeof(int));
random_ports = malloc(num_entries * sizeof(short));
printf("Hello, about to run the hash functions with:\n");
printf("\t size %d laddr %x lport %d faddr %x fport %d to 65535\n",
hash_size, laddr, lport, faddr, fport);
printf("\t %d remotes\n",num_entries);
srand(getpid());
for (i = 0; i< num_entries; i++) {
if (faddr == -1) {
random_ips[i] = random();
random_ports[i] = (random() % (65535 - fport)) + fport;
}
else {
random_ips[i] = faddr;
random_ports[i] = (i % (65535 - fport)) + fport;
}
}
for (i = 0; i < hash_size; i++) {
orig_hash_table[i] = 0;
}
for (i = 0; i < num_entries; i++) {
hash = inet_ehashfn_orig(laddr,
lport,
random_ips[i],
random_ports[i],
hash_size);
orig_hash_table[hash]++;
}
buckets_used = 0;
min_hash = INT_MAX;
max_hash = INT_MIN;
max_len = INT_MIN;
for (i = 0; i < hash_size; i ++) {
if (orig_hash_table[i]) {
buckets_used++;
if (i < min_hash) min_hash = i;
if (i > max_hash) max_hash = i;
if (orig_hash_table[i] > max_len) max_len = orig_hash_table[i];
}
}
printf("The orig hash went from %d to %d max chain %d used %d buckets\n",
min_hash, max_hash, max_len, buckets_used);
for (i = 0; i < hash_size; i++) {
no_h_shift_table[i] = 0;
}
for (i = 0; i < num_entries; i++) {
hash = inet_ehashfn_no_h_shift(laddr,
lport,
random_ips[i],
random_ports[i],
hash_size);
no_h_shift_table[hash]++;
}
buckets_used = 0;
min_hash = INT_MAX;
max_hash = INT_MIN;
max_len = INT_MIN;
for (i = 0; i < hash_size; i ++) {
if (no_h_shift_table[i]) {
buckets_used++;
if (i < min_hash) min_hash = i;
if (i > max_hash) max_hash = i;
if (no_h_shift_table[i] > max_len) max_len = no_h_shift_table[i];
}
}
printf("The no h shift hash went from %d to %d max chain %d used %d
buckets\n",
min_hash, max_hash, max_len, buckets_used);
}
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html