> -----Original Message----- > From: stef...@apache.org [mailto:stef...@apache.org] > Sent: zondag 13 april 2014 06:41 > To: comm...@subversion.apache.org > Subject: svn commit: r1586922 - in /subversion/trunk/subversion: > include/private/svn_io_private.h include/svn_types.h libsvn_subr/io.c > libsvn_subr/stream.c > > Author: stefan2 > Date: Sun Apr 13 04:40:40 2014 > New Revision: 1586922 > > URL: http://svn.apache.org/r1586922 > Log: > Speed up file / stream comparison, i.e. minimize the processing overhead > for finding the first mismatch. > > The approach is two-sided. Instead of fetching SVN__STREAM_CHUNK_SIZE > from all sources before comparing data, we start with a much lower 4kB > and increase the chunk size until we reach SVN__STREAM_CHUNK_SIZE > while > making sure that all reads are naturally aligned. So, we quickly find > mismatches near the beginning of the file. > > On the other end side, we bump the SVN__STREAM_CHUNK_SIZE to 64kB > which > gives better throughput for longer distances to the first mismatch - > without causing ill effects in APR's memory management. > > * subversion/include/svn_types.h > (SVN__STREAM_CHUNK_SIZE): Bump to 64k and add some documentation > on > the general restrictions for future changes.
This reverts recent changes to reduce server side load... (see config file behavior changes, recently discussed on dev@s.a.o") Please revert this and use a different variable... And I really wonder how you determined that the rest of the patch is going to help *for all users* both client and server side. Bert > > * subversion/include/private/svn_io_private.h > (svn_io__next_chunk_size): New utility function generating the new > read block size sequence. > > * subversion/libsvn_subr/io.c > (svn_io__next_chunk_size): Implement. > (contents_identical_p, > contents_three_identical_p): Let the new utility determine the read > block size. > > * subversion/libsvn_subr/stream.c > (svn_stream_contents_same2): Ditto. > > Modified: > subversion/trunk/subversion/include/private/svn_io_private.h > subversion/trunk/subversion/include/svn_types.h > subversion/trunk/subversion/libsvn_subr/io.c > subversion/trunk/subversion/libsvn_subr/stream.c > > Modified: subversion/trunk/subversion/include/private/svn_io_private.h > URL: > http://svn.apache.org/viewvc/subversion/trunk/subversion/include/private > /svn_io_private.h?rev=1586922&r1=1586921&r2=1586922&view=diff > ========================================================== > ==================== > --- subversion/trunk/subversion/include/private/svn_io_private.h (original) > +++ subversion/trunk/subversion/include/private/svn_io_private.h Sun Apr > 13 04:40:40 2014 > @@ -72,6 +72,14 @@ svn_io__is_finfo_read_only(svn_boolean_t > apr_finfo_t *file_info, > apr_pool_t *pool); > > +/** Given that @a total_read bytes have already been read from a file or > + * stream, return a suggestion for the size of the next block to process. > + * This value will be <= #SVN__STREAM_CHUNK_SIZE. > + * > + * @since New in 1.9. > + */ > +apr_size_t > +svn_io__next_chunk_size(apr_off_t total_read); > > /** Buffer test handler function for a generic stream. @see svn_stream_t > * and svn_stream__is_buffered(). > > Modified: subversion/trunk/subversion/include/svn_types.h > URL: > http://svn.apache.org/viewvc/subversion/trunk/subversion/include/svn_ty > pes.h?rev=1586922&r1=1586921&r2=1586922&view=diff > ========================================================== > ==================== > --- subversion/trunk/subversion/include/svn_types.h (original) > +++ subversion/trunk/subversion/include/svn_types.h Sun Apr 13 04:40:40 > 2014 > @@ -1142,8 +1142,12 @@ typedef svn_error_t *(*svn_commit_callba > * > * NOTE: This is an internal macro, put here for convenience. > * No public API may depend on the particular value of this macro. > + * > + * NOTE: The implementation assumes that this is a power of two >= 4k. > + * Moreover, it should be less than 80kB to prevent memory > + * fragmentation in the APR memory allocator. > */ > -#define SVN__STREAM_CHUNK_SIZE 16384 > +#define SVN__STREAM_CHUNK_SIZE 0x10000 > #endif > > /** The maximum amount we can ever hold in memory. */ > > Modified: subversion/trunk/subversion/libsvn_subr/io.c > URL: > http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/io. > c?rev=1586922&r1=1586921&r2=1586922&view=diff > ========================================================== > ==================== > --- subversion/trunk/subversion/libsvn_subr/io.c (original) > +++ subversion/trunk/subversion/libsvn_subr/io.c Sun Apr 13 04:40:40 2014 > @@ -63,6 +63,7 @@ > #include "svn_config.h" > #include "svn_private_config.h" > #include "svn_ctype.h" > +#include "svn_sorts.h" > > #include "private/svn_atomic.h" > #include "private/svn_io_private.h" > @@ -4501,6 +4502,17 @@ svn_io_read_version_file(int *version, > } > > > +apr_size_t > +svn_io__next_chunk_size(apr_off_t total_read) > +{ > + /* Started with total_read===, this will generate a sequence ensuring > + aligned access with increasing block size up to > SVN__STREAM_CHUNK_SIZE: > + 4k@ offset 0, 4k@ offset 4k, 8k@ offset 8k, 16k@ offset 16k etc. > + */ > + return total_read ? (apr_size_t)MIN(total_read, > SVN__STREAM_CHUNK_SIZE) > + : (apr_size_t)4096; > +} > + > > /* Do a byte-for-byte comparison of FILE1 and FILE2. */ > static svn_error_t * > @@ -4517,6 +4529,7 @@ contents_identical_p(svn_boolean_t *iden > apr_file_t *file2_h; > svn_boolean_t eof1 = FALSE; > svn_boolean_t eof2 = FALSE; > + apr_off_t total_read = 0; > > SVN_ERR(svn_io_file_open(&file1_h, file1, APR_READ, APR_OS_DEFAULT, > pool)); > @@ -4532,14 +4545,17 @@ contents_identical_p(svn_boolean_t *iden > *identical_p = TRUE; /* assume TRUE, until disproved below */ > while (!err && !eof1 && !eof2) > { > + apr_size_t to_read = svn_io__next_chunk_size(total_read); > + total_read += to_read; > + > err = svn_io_file_read_full2(file1_h, buf1, > - SVN__STREAM_CHUNK_SIZE, &bytes_read1, > + to_read, &bytes_read1, > &eof1, pool); > if (err) > break; > > err = svn_io_file_read_full2(file2_h, buf2, > - SVN__STREAM_CHUNK_SIZE, &bytes_read2, > + to_read, &bytes_read2, > &eof2, pool); > if (err) > break; > @@ -4585,6 +4601,7 @@ contents_three_identical_p(svn_boolean_t > svn_boolean_t eof1 = FALSE; > svn_boolean_t eof2 = FALSE; > svn_boolean_t eof3 = FALSE; > + apr_off_t total_read = 0; > > SVN_ERR(svn_io_file_open(&file1_h, file1, APR_READ, APR_OS_DEFAULT, > scratch_pool)); > @@ -4621,6 +4638,9 @@ contents_three_identical_p(svn_boolean_t > apr_size_t bytes_read1, bytes_read2, bytes_read3; > svn_boolean_t read_1, read_2, read_3; > > + apr_size_t to_read = svn_io__next_chunk_size(total_read); > + total_read += to_read; > + > read_1 = read_2 = read_3 = FALSE; > > /* As long as a file is not at the end yet, and it is still > @@ -4628,8 +4648,8 @@ contents_three_identical_p(svn_boolean_t > if (!eof1 && (*identical_p12 || *identical_p13)) > { > err = svn_io_file_read_full2(file1_h, buf1, > - SVN__STREAM_CHUNK_SIZE, &bytes_read1, > - &eof1, scratch_pool); > + to_read, &bytes_read1, > + &eof1, scratch_pool); > if (err) > break; > read_1 = TRUE; > @@ -4638,8 +4658,8 @@ contents_three_identical_p(svn_boolean_t > if (!eof2 && (*identical_p12 || *identical_p23)) > { > err = svn_io_file_read_full2(file2_h, buf2, > - SVN__STREAM_CHUNK_SIZE, &bytes_read2, > - &eof2, scratch_pool); > + to_read, &bytes_read2, > + &eof2, scratch_pool); > if (err) > break; > read_2 = TRUE; > @@ -4648,8 +4668,8 @@ contents_three_identical_p(svn_boolean_t > if (!eof3 && (*identical_p13 || *identical_p23)) > { > err = svn_io_file_read_full2(file3_h, buf3, > - SVN__STREAM_CHUNK_SIZE, &bytes_read3, > - &eof3, scratch_pool); > + to_read, &bytes_read3, > + &eof3, scratch_pool); > if (err) > break; > read_3 = TRUE; > > Modified: subversion/trunk/subversion/libsvn_subr/stream.c > URL: > http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/str > eam.c?rev=1586922&r1=1586921&r2=1586922&view=diff > ========================================================== > ==================== > --- subversion/trunk/subversion/libsvn_subr/stream.c (original) > +++ subversion/trunk/subversion/libsvn_subr/stream.c Sun Apr 13 04:40:40 > 2014 > @@ -585,14 +585,20 @@ svn_stream_contents_same2(svn_boolean_t > { > char *buf1 = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE); > char *buf2 = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE); > - apr_size_t bytes_read1 = SVN__STREAM_CHUNK_SIZE; > - apr_size_t bytes_read2 = SVN__STREAM_CHUNK_SIZE; > + apr_size_t to_read = 0; > + apr_size_t bytes_read1 = 0; > + apr_size_t bytes_read2 = 0; > + apr_off_t total_read = 0; > svn_error_t *err = NULL; > > *same = TRUE; /* assume TRUE, until disproved below */ > - while (bytes_read1 == SVN__STREAM_CHUNK_SIZE > - && bytes_read2 == SVN__STREAM_CHUNK_SIZE) > + while (bytes_read1 == to_read && bytes_read2 == to_read) > { > + to_read = svn_io__next_chunk_size(total_read); > + bytes_read1 = to_read; > + bytes_read2 = to_read; > + total_read += to_read; > + > err = svn_stream_read_full(stream1, buf1, &bytes_read1); > if (err) > break; >