sw/inc/densebplustree.cxx | 79 ++++++++++++++++++++++--------------- sw/inc/densebplustree.hxx | 4 - sw/qa/core/densebplustree-test.cxx | 27 ++++++++++++ 3 files changed, 76 insertions(+), 34 deletions(-)
New commits: commit a16616044711b2af07284edbb6b49facccc5fa20 Author: Jan Holesovsky <ke...@suse.cz> Date: Thu Jan 31 21:06:42 2013 +0100 Dense B+ tree: Don't initilize NodeWithIndex in the default constructor. It is not necessary, and valgrind suggests it takes us some time. Indeed: BigPtrArray - append: 45 msec BigPtrArray - insert at front: 6580 msec BigPtrArray - insert in the middle: 3081 msec DenseBPlusTree - append: 47 msec DenseBPlusTree - insert at front: 167 msec DenseBPlusTree - insert in the middle: 87 msec I am happy now, I do not think I can do it any better. Change-Id: If7c6882daf712af37db4b43c13ab6aedb0086da0 diff --git a/sw/inc/densebplustree.hxx b/sw/inc/densebplustree.hxx index be3252a..90cb400 100644 --- a/sw/inc/densebplustree.hxx +++ b/sw/inc/densebplustree.hxx @@ -96,7 +96,7 @@ private: DBPTreeNode< Key, Value > *pNode; Key nIndex; - NodeWithIndex() : pNode( NULL ), nIndex( 0 ) {} + NodeWithIndex() {} NodeWithIndex( DBPTreeNode< Key, Value > *p, Key n ) : pNode( p ), nIndex( n ) {} }; commit 2bcd2598ad45738079889752577fa1cb6f7e0c93 Author: Jan Holesovsky <ke...@suse.cz> Date: Thu Jan 31 20:23:20 2013 +0100 Dense B+ tree: Use binary search when searching in the Keys inside nodes. Also started measuring one more case - inserting in the middle. Before this change, it took about 100 msec. BigPtrArray - append: 45 msec BigPtrArray - insert at front: 6510 msec BigPtrArray - insert in the middle: 3043 msec DenseBPlusTree - append: 48 msec DenseBPlusTree - insert at front: 167 msec DenseBPlusTree - insert in the middle: 90 msec Change-Id: I2cc0a151b26931d90c8915a6ba879cf0e386b3b2 diff --git a/sw/inc/densebplustree.cxx b/sw/inc/densebplustree.cxx index a186546..4ea3892 100644 --- a/sw/inc/densebplustree.cxx +++ b/sw/inc/densebplustree.cxx @@ -289,13 +289,28 @@ typename DenseBPlusTree< Key, Value >::NodeWithIndex DenseBPlusTree< Key, Value DBPTreeNode< Key, Value > *pNode = m_pRoot; rParentsLength = 0; - // recursion is nice for the alg. description, but for implementation, we - // want to unwind it + // traverse from the root to the leaves while ( pNode->m_bIsInternal ) { - int i = 0; - while ( i < pNode->m_nUsed - 1 && pNode->m_pKeys[ i ] <= nPos ) - ++i; + int i; + if ( pNode->m_nUsed < 2 || nPos < pNode->m_pKeys[ 0 ] ) // nPos too small, we continue leftmost + i = 0; + else if ( pNode->m_pKeys[ pNode->m_nUsed - 2 ] <= nPos ) // nPos is too big, continue rightmost + i = pNode->m_nUsed - 1; + else + { + // binary search, the values are ordered + i = 1; + int max = pNode->m_nUsed - 2; + while ( i < max ) + { + int pivot = i + ( max - i ) / 2; + if ( pNode->m_pKeys[ pivot ] <= nPos ) + i = pivot + 1; + else + max = pivot; + } + } // m_pKeys in children are relative if ( i > 0 ) diff --git a/sw/qa/core/densebplustree-test.cxx b/sw/qa/core/densebplustree-test.cxx index 0ea0d8e..38e9501 100644 --- a/sw/qa/core/densebplustree-test.cxx +++ b/sw/qa/core/densebplustree-test.cxx @@ -70,6 +70,13 @@ int main( int, char** ) print_time( "BigPtrArray - insert at front", tv_before, tv_after ); gettimeofday( &tv_before, NULL ); + BigPtrArray bparr3; + for ( int i = 0; i < 1000000; i++ ) + bparr3.Insert( new BigPtrEntryMock(i), bparr3.Count() / 2 ); + gettimeofday( &tv_after, NULL ); + print_time( "BigPtrArray - insert in the middle", tv_before, tv_after ); + + gettimeofday( &tv_before, NULL ); DenseBPlusTree< int, BigPtrEntryMock* > aTest; for ( int i = 0; i < 1000000; ++i ) aTest.Insert( new BigPtrEntryMock(i), aTest.Count() ); @@ -82,6 +89,13 @@ int main( int, char** ) aTest2.Insert( new BigPtrEntryMock(i), 0 ); gettimeofday( &tv_after, NULL ); print_time( "DenseBPlusTree - insert at front", tv_before, tv_after ); + + gettimeofday( &tv_before, NULL ); + DenseBPlusTree< int, BigPtrEntryMock* > aTest3; + for ( int i = 0; i < 1000000; ++i ) + aTest3.Insert( new BigPtrEntryMock(i), aTest3.Count() / 2 ); + gettimeofday( &tv_after, NULL ); + print_time( "DenseBPlusTree - insert in the middle", tv_before, tv_after ); #endif #if 0 commit 40605ecc1faa02939e5e03abecdadbaea6afbe12 Author: Jan Holesovsky <ke...@suse.cz> Date: Thu Jan 31 18:59:22 2013 +0100 Dense B+ tree: Avoid some unnecessary (and actually wrong) memcpy's. No performance impact - too rare operation. Change-Id: I0b7095b7b369753ed54c3a72f52f8d9cf95f26e7 diff --git a/sw/inc/densebplustree.cxx b/sw/inc/densebplustree.cxx index cebf95d..a186546 100644 --- a/sw/inc/densebplustree.cxx +++ b/sw/inc/densebplustree.cxx @@ -357,31 +357,23 @@ DBPTreeNode< Key, Value >* DenseBPlusTree< Key, Value >::splitNode( DBPTreeNode< { aParent.pNode->insert( aParent.nIndex + 1, offset, pNewNode ); - memcpy( pNewParents, pParents, sizeof( pParents[ 0 ] ) * nParentsLength ); + memcpy( pNewParents, pParents, sizeof( pParents[ 0 ] ) * ( nParentsLength - 1 ) ); rNewParentsLength = nParentsLength; pNewParents[ rNewParentsLength - 1 ] = aParent; } else { - NodeWithIndex pRetParents[ m_nDepth ]; - int nRetParentsLength; - DBPTreeNode< Key, Value > *pNewParent = splitNode( aParent.pNode, bIsAppend, pParents, nParentsLength - 1, pRetParents, nRetParentsLength ); + DBPTreeNode< Key, Value > *pNewParent = splitNode( aParent.pNode, bIsAppend, pParents, nParentsLength - 1, pNewParents, rNewParentsLength ); if ( aParent.nIndex <= aParent.pNode->m_nUsed ) { aParent.pNode->insert( aParent.nIndex + 1, offset, pNewNode ); - - memcpy( pNewParents, pParents, sizeof( pParents[ 0 ] ) * nParentsLength ); - rNewParentsLength = nParentsLength; - pNewParents[ rNewParentsLength - 1 ] = aParent; + pNewParents[ rNewParentsLength++ ] = aParent; } else { pNewParent->insert( aParent.nIndex - aParent.pNode->m_nUsed + 1, offset, pNewNode ); - - memcpy( pNewParents, pParents, sizeof( pParents[ 0 ] ) * nParentsLength ); - rNewParentsLength = nParentsLength; - pNewParents[ rNewParentsLength - 1 ] = NodeWithIndex( pNewParent, aParent.nIndex - aParent.pNode->m_nUsed + 1 ); + pNewParents[ rNewParentsLength++ ] = NodeWithIndex( pNewParent, aParent.nIndex - aParent.pNode->m_nUsed + 1 ); } } } diff --git a/sw/qa/core/densebplustree-test.cxx b/sw/qa/core/densebplustree-test.cxx index e270d09..0ea0d8e 100644 --- a/sw/qa/core/densebplustree-test.cxx +++ b/sw/qa/core/densebplustree-test.cxx @@ -52,6 +52,7 @@ void print_time( const char* msg, const struct timeval &before, const struct tim int main( int, char** ) { +#if 1 struct timeval tv_before, tv_after; gettimeofday( &tv_before, NULL ); @@ -81,6 +82,18 @@ int main( int, char** ) aTest2.Insert( new BigPtrEntryMock(i), 0 ); gettimeofday( &tv_after, NULL ); print_time( "DenseBPlusTree - insert at front", tv_before, tv_after ); +#endif + +#if 0 + DenseBPlusTree< int, int > aNumbers; + aNumbers.Insert( 20, 0 ); + aNumbers.Insert( 10, 0 ); + aNumbers.Insert( 30, 2 ); + aNumbers.Insert( 1000, 3 ); + for ( int i = 0; i < 100; ++i ) + aNumbers.Insert( i, 3 );//aNumbers.Count() ); + aNumbers.dump(); +#endif return 0; } commit 509d83a3f7ddbff57ceb11a91f5aa210d14f079a Author: Jan Holesovsky <ke...@suse.cz> Date: Thu Jan 31 18:40:20 2013 +0100 Dense B+ tree: Produce more packed trees with append()s. Normally we split the nodes half/half; but in the append() case, it is much more probable that the next operation will be an append() too, so let most of the data (children pointers or values) in the original node, and transfer just 2 to the newly created one (so that there is still space for appends in the middle without having to grow the tree). BigPtrArray - append: 46 msec BigPtrArray - insert at front: 6661 msec DenseBPlusTree - append: 50 msec DenseBPlusTree - insert at front: 169 msec Change-Id: I7e492abad8238d40e79607e354b0bdee9bd386a4 diff --git a/sw/inc/densebplustree.cxx b/sw/inc/densebplustree.cxx index c936ad8..cebf95d 100644 --- a/sw/inc/densebplustree.cxx +++ b/sw/inc/densebplustree.cxx @@ -106,38 +106,46 @@ struct DBPTreeNode /** Split node, and make the original one smaller. @return relative key shift of the node. + @param bIsAppend in case we are appending, we deliberately keep most of the data untouched, creating as empty node as possible */ - int copyFromSplitNode( DBPTreeNode *pNode ) + int copyFromSplitNode( DBPTreeNode *pNode, bool bIsAppend ) { + assert( sOrder > 2 ); assert( pNode->m_nUsed == sOrder ); - const int sKeep = sOrder / 2; + // we optimize for the case of appending + // it is expected that we first create the entire structure (so want + // it to be dense from the space point of view), but when performing + // later, the distribution has to be 'fair', because the access is + // more or less random + int nHowMuchKeep = bIsAppend? sOrder - 2: sOrder / 2; + int offset = 0; m_bIsInternal = pNode->m_bIsInternal; if ( m_bIsInternal ) { - for ( int i = sKeep; i < pNode->m_nUsed; ++i ) - m_pChildren[ i - sKeep ] = pNode->m_pChildren[ i ]; + for ( int i = nHowMuchKeep; i < pNode->m_nUsed; ++i ) + m_pChildren[ i - nHowMuchKeep ] = pNode->m_pChildren[ i ]; // we have to 'relativize' the keys - offset = pNode->m_pKeys[ sKeep - 1 ]; - for ( int i = sKeep; i < pNode->m_nUsed - 1; ++i ) - m_pKeys[ i - sKeep ] = pNode->m_pKeys[ i ] - offset; + offset = pNode->m_pKeys[ nHowMuchKeep - 1 ]; + for ( int i = nHowMuchKeep; i < pNode->m_nUsed - 1; ++i ) + m_pKeys[ i - nHowMuchKeep ] = pNode->m_pKeys[ i ] - offset; } else { - for ( int i = sKeep; i < pNode->m_nUsed; ++i ) - m_pValues[ i - sKeep ] = pNode->m_pValues[ i ]; + for ( int i = nHowMuchKeep; i < pNode->m_nUsed; ++i ) + m_pValues[ i - nHowMuchKeep ] = pNode->m_pValues[ i ]; - offset = sKeep; + offset = nHowMuchKeep; m_pNext = pNode->m_pNext; pNode->m_pNext = this; } - m_nUsed = pNode->m_nUsed - sKeep; - pNode->m_nUsed = sKeep; + m_nUsed = pNode->m_nUsed - nHowMuchKeep; + pNode->m_nUsed = nHowMuchKeep; return offset; } @@ -179,7 +187,7 @@ void DenseBPlusTree< Key, Value >::Insert( const Value& rValue, Key nPos ) { NodeWithIndex pNewParents[ m_nDepth ]; int nNewParentsLength; - DBPTreeNode< Key, Value > *pNewLeaf = splitNode( aLeaf.pNode, pParents, nParentsLength, pNewParents, nNewParentsLength ); + DBPTreeNode< Key, Value > *pNewLeaf = splitNode( aLeaf.pNode, nPos == m_nCount, pParents, nParentsLength, pNewParents, nNewParentsLength ); if ( aLeaf.nIndex <= aLeaf.pNode->m_nUsed ) aLeaf.pNode->insert( aLeaf.nIndex, rValue ); @@ -314,12 +322,12 @@ void DenseBPlusTree< Key, Value >::shiftNodes( const NodeWithIndex pParents[], i } template < class Key, class Value > -DBPTreeNode< Key, Value >* DenseBPlusTree< Key, Value >::splitNode( DBPTreeNode< Key, Value > *pNode, const NodeWithIndex pParents[], int nParentsLength, NodeWithIndex pNewParents[], int &rNewParentsLength ) +DBPTreeNode< Key, Value >* DenseBPlusTree< Key, Value >::splitNode( DBPTreeNode< Key, Value > *pNode, bool bIsAppend, const NodeWithIndex pParents[], int nParentsLength, NodeWithIndex pNewParents[], int &rNewParentsLength ) { assert( pNode->m_nUsed == sOrder ); DBPTreeNode< Key, Value > *pNewNode = new DBPTreeNode< Key, Value >; - int offset = pNewNode->copyFromSplitNode( pNode ); + int offset = pNewNode->copyFromSplitNode( pNode, bIsAppend ); // update the last leaf if necessary if ( pNode == m_pLastLeaf ) @@ -357,7 +365,7 @@ DBPTreeNode< Key, Value >* DenseBPlusTree< Key, Value >::splitNode( DBPTreeNode< { NodeWithIndex pRetParents[ m_nDepth ]; int nRetParentsLength; - DBPTreeNode< Key, Value > *pNewParent = splitNode( aParent.pNode, pParents, nParentsLength - 1, pRetParents, nRetParentsLength ); + DBPTreeNode< Key, Value > *pNewParent = splitNode( aParent.pNode, bIsAppend, pParents, nParentsLength - 1, pRetParents, nRetParentsLength ); if ( aParent.nIndex <= aParent.pNode->m_nUsed ) { diff --git a/sw/inc/densebplustree.hxx b/sw/inc/densebplustree.hxx index 027e2d8..be3252a 100644 --- a/sw/inc/densebplustree.hxx +++ b/sw/inc/densebplustree.hxx @@ -123,7 +123,7 @@ private: void shiftNodes( const NodeWithIndex pParents[], int nParentsLength, int nHowMuch ); /// Split the node, and adjust parents accordingly. - DBPTreeNode< Key, Value >* splitNode( DBPTreeNode< Key, Value > *pNode, const NodeWithIndex pParents[], int nParentsLength, NodeWithIndex *pNewParents, int &rNewParentsLength ); + DBPTreeNode< Key, Value >* splitNode( DBPTreeNode< Key, Value > *pNode, bool bIsAppend, const NodeWithIndex pParents[], int nParentsLength, NodeWithIndex *pNewParents, int &rNewParentsLength ); }; // include the implementation _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits