Here's the code, but I fail see to the point if the goal is compare IntrusiveHashMap to some theoretical hash map rather than the actual alternative hash maps that have been proposed. I thought your specific recommendation was to use std::unordered_map instead, so that's what I tested against. Also, wasn't your original objection to having to write and maintain our own hash containers? But now we're going to write our own "better designed" ones? I think this is the point at which I give up, where I have to beat unspecified alternate implementations and meet unspecified performance goals.
1. TEST_CASE("IntrusiveHashMapPerf", "[IntrusiveHashMap][performance]") 2. { 3. // Force these so I can easily change the set of tests. 4. auto start = std::chrono::high_resolution_clock::now(); 5. auto delta = std::chrono::high_resolution_clock::now() - start; 6. constexpr int N_LOOPS = 1000; 7. constexpr int N = 1009; // prime > N_LOOPS 8. 9. std::vector<const char *> strings; 10. 11. std::uniform_int_distribution<short> char_gen {'a', 'z'}; 12. std::uniform_int_distribution<short> length_gen { 20, 40 }; 13. std::minstd_rand randu; 14. 15. Map ihm; 16. std::unordered_map<std::string, Thing> um; 17. 18. strings.reserve(N); 19. for ( int i = 0 ; i < N ; ++i ) { 20. auto len = length_gen(randu); 21. char *s = static_cast<char *>(malloc(len + 1)); 22. for (decltype(len) j = 0; j < len; ++j) { 23. s[j] = char_gen(randu); 24. } 25. s[len] = 0; 26. strings.push_back(s); 27. } 28. 29. // Fill the IntrusiveHashMap 30. start = std::chrono::high_resolution_clock::now(); 31. for (int i = 0; i < N ; ++i) { 32. auto thing = new Thing{strings[i], i}; 33. ihm.insert(thing); 34. } 35. delta = std::chrono::high_resolution_clock::now() - start; 36. std::cout << "IHM populate " << delta.count() << "ns or " << std::chrono::duration_cast<std::chrono::milliseconds>(delta).count() 37. << "ms" << std::endl; 38. 39. // Do some lookups 40. start = std::chrono::high_resolution_clock::now(); 41. for ( int i = 0 ; i < N_LOOPS ; ++i ) { 42. for (int j = 0, idx = i; j < N; ++j, idx = (idx + i) % N) { 43. ihm.find(strings[idx]); 44. } 45. } 46. delta = std::chrono::high_resolution_clock::now() - start; 47. std::cout << "IHM find " << delta.count() << "ns or " << std::chrono::duration_cast<std::chrono::milliseconds>(delta).count() 48. << "ms" << std::endl; 49. 50. // Fill the std::unordered_map 51. start = std::chrono::high_resolution_clock::now(); 52. for (int i = 0; i < N ; ++i) { 53. um.emplace(strings[i], Thing{strings[i], i}); 54. } 55. delta = std::chrono::high_resolution_clock::now() - start; 56. std::cout << "UM populate " << delta.count() << "ns or " << std::chrono::duration_cast<std::chrono::milliseconds>(delta).count() 57. << "ms" << std::endl; 58. 59. // Do some lookups 60. start = std::chrono::high_resolution_clock::now(); 61. for ( int i = 0 ; i < N_LOOPS ; ++i ) { 62. for (int j = 0, idx = i; j < N; ++j, idx = (idx + i) % N) { 63. um.find(strings[idx]); 64. } 65. } 66. delta = std::chrono::high_resolution_clock::now() - start; 67. std::cout << "UM find " << delta.count() << "ns or " << std::chrono::duration_cast<std::chrono::milliseconds>(delta).count() 68. << "ms" << std::endl; 69. 70. // Test double indexing 71. 72. std::vector<Thing2> things; 73. things.reserve(N); 74. for ( int i = 0 ; i < N ; ++i ) { 75. things.emplace_back(strings[i], i); 76. } 77. 78. std::unordered_map<std::string, Thing2*> um_by_name; 79. std::unordered_map<int, Thing2*> um_by_n; 80. 81. IntrusiveHashMap<Thing2::LinkByName> ihm_by_name; 82. IntrusiveHashMap<Thing2::LinkByN> ihm_by_n; 83. 84. // Fill the IntrusiveHashMap 85. start = std::chrono::high_resolution_clock::now(); 86. for (int i = 0; i < N ; ++i) { 87. ihm_by_name.insert(&things[i]); 88. ihm_by_n.insert(&things[i]); 89. } 90. delta = std::chrono::high_resolution_clock::now() - start; 91. std::cout << "IHM2 populate " << delta.count() << "ns or " << std::chrono::duration_cast<std::chrono::milliseconds>(delta).count() 92. << "ms" << std::endl; 93. 94. // Do some lookups 95. start = std::chrono::high_resolution_clock::now(); 96. for ( int i = 0 ; i < N_LOOPS ; ++i ) { 97. for (int j = 0, idx = i; j < N; ++j, idx = (idx + i) % N) { 98. Thing2 * thing = ihm_by_n.find(idx); 99. ihm_by_name.iterator_for(thing); 100. } 101. } 102. delta = std::chrono::high_resolution_clock::now() - start; 103. std::cout << "IHM2 find " << delta.count() << "ns or " << std::chrono::duration_cast<std::chrono::milliseconds>(delta).count() 104. << "ms" << std::endl; 105. 106. // Fill the std::unordered_map 107. start = std::chrono::high_resolution_clock::now(); 108. for (int i = 0; i < N ; ++i) { 109. um_by_n.emplace(things[i]._n, &things[i]); 110. um_by_name.emplace(things[i]._payload, &things[i]); 111. } 112. delta = std::chrono::high_resolution_clock::now() - start; 113. std::cout << "UM2 populate " << delta.count() << "ns or " << std::chrono::duration_cast<std::chrono::milliseconds>(delta).count() 114. << "ms" << std::endl; 115. 116. // Do some lookups 117. start = std::chrono::high_resolution_clock::now(); 118. for ( int i = 0 ; i < N_LOOPS ; ++i ) { 119. for (int j = 0, idx = i; j < N; ++j, idx = (idx + i) % N) { 120. um_by_name.find(strings[idx]); 121. um_by_n.find(idx); 122. } 123. } 124. delta = std::chrono::high_resolution_clock::now() - start; 125. std::cout << "UM2 find " << delta.count() << "ns or " << std::chrono::duration_cast<std::chrono::milliseconds>(delta).count() 126. << "ms" << std::endl; 127. 128. }; <https://apaste.info/> I think you'll find, though, that even the best designed hash map must still compute a hash to do a lookup and that is going to take more computation than IntrusiveHashMap::iterator_for. If you mean it would slower than that but still fast enough to not be a performance issue, perhaps you could elucidate what your threshold for "not a performance hit" is. On Fri, Aug 24, 2018 at 12:46 PM Bryan Call <bc...@apache.org> wrote: > I stated that a better designed hash table (not a chained hash table) > would have much better performance and that a double find would not be a > performance hit. The benchmark was done with two chained hash tables, > which are known not to have great performance. > > Can you post the source code of the benchmark? > >