On Mon, Dec 20, 2010 at 2:16 PM, Johan Corveleyn <jcor...@gmail.com> wrote: > On Mon, Dec 20, 2010 at 11:03 AM, Julian Foad <julian.f...@wandisco.com> > wrote: >> On Mon, 2010-12-20, Johan Corveleyn wrote: >>> New macro version (increment only, decrement is similar): >>> [[[ >>> /* For all files in the FILE array, increment the curp pointer. If a file >>> * points before the beginning of file, let it point at the first byte >>> again. >>> * If the end of the current chunk is reached, read the next chunk in the >>> * buffer and point curp to the start of the chunk. If EOF is reached, set >>> * curp equal to endp to indicate EOF. */ >>> #define increment_pointers(all_files, files_len, pool) >>> \ >>> do { >>> \ >>> int i; >>> \ >>> >>> \ >>> for (i = 0; i < files_len; i++) >>> \ >>> { >>> \ >>> if (all_files[i]->chunk == -1) /* indicates before beginning of file >>> */\ >>> all_files[i]->chunk = 0; /* point to beginning of file again */ >>> \ >>> else if (all_files[i]->curp != all_files[i]->endp - 1) >>> \ >>> all_files[i]->curp++; >>> \ >> >> Hi Johan. >> >> Here you are having to test for two special cases every time: chunk==-1 >> and curp==endp-1. I would suggest changing the specification of "before >> beginning of file" to include the promise that curp==endp-1, so that you >> don't have to use a separate test here and can instead test for this >> special case within increment_chunk(). > > Good idea. I'll try this tonight ...
Ok, this worked pretty well. It's a little bit faster (about 1 second, which seems right intuitively). One style question though: should these macros be all upper case (INCREMENT_POINTERS and DECREMENT_POINTERS)? I guess so. The code now looks as follows (in attachment a patch relative to diff-optimizations-bytes branch): [[[ /* For all files in the FILE array, increment the curp pointer. If a file * points before the beginning of file, let it point at the first byte again. * If the end of the current chunk is reached, read the next chunk in the * buffer and point curp to the start of the chunk. If EOF is reached, set * curp equal to endp to indicate EOF. */ #define increment_pointers(all_files, files_len, pool) \ do { \ int i; \ \ for (i = 0; i < files_len; i++) \ { \ if (all_files[i]->curp < all_files[i]->endp - 1) \ all_files[i]->curp++; \ else \ SVN_ERR(increment_chunk(all_files[i], pool)); \ } \ } while (0) /* For all files in the FILE array, decrement the curp pointer. If the * start of a chunk is reached, read the previous chunk in the buffer and * point curp to the last byte of the chunk. If the beginning of a FILE is * reached, set chunk to -1 to indicate BOF. */ #define decrement_pointers(all_files, files_len, pool) \ do { \ int i; \ \ for (i = 0; i < files_len; i++) \ { \ if (all_files[i]->curp > all_files[i]->buffer) \ all_files[i]->curp--; \ else \ SVN_ERR(decrement_chunk(all_files[i], pool)); \ } \ } while (0) static svn_error_t * increment_chunk(struct file_info *file, apr_pool_t *pool) { apr_off_t length; apr_off_t last_chunk = offset_to_chunk(file->size); if (file->chunk == -1) { /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */ file->chunk = 0; file->curp = file->buffer; } else if (file->chunk == last_chunk) { /* We are at the last chunk. Indicate EOF by setting curp == endp. */ file->curp = file->endp; } else { /* There are still chunks left. Read next chunk and reset pointers. */ file->chunk++; length = file->chunk == last_chunk ? offset_in_chunk(file->size) : CHUNK_SIZE; SVN_ERR(read_chunk(file->file, file->path, file->buffer, length, chunk_to_offset(file->chunk), pool)); file->endp = file->buffer + length; file->curp = file->buffer; } return SVN_NO_ERROR; } static svn_error_t * decrement_chunk(struct file_info *file, apr_pool_t *pool) { if (file->chunk == 0) { /* We are already at the first chunk. Indicate BOF (Beginning Of File) by setting chunk = -1 and curp = endp - 1. Both conditions are important. They help the increment step to catch the BOF situation in an efficient way. */ file->chunk--; file->curp = file->endp - 1; } else { /* Read previous chunk and reset pointers. */ file->chunk--; SVN_ERR(read_chunk(file->file, file->path, file->buffer, CHUNK_SIZE, chunk_to_offset(file->chunk), pool)); file->endp = file->buffer + CHUNK_SIZE; file->curp = file->endp - 1; } return SVN_NO_ERROR; } ]]] Cheers, -- Johan
Index: subversion/libsvn_diff/diff_file.c =================================================================== --- subversion/libsvn_diff/diff_file.c (revision 1050737) +++ subversion/libsvn_diff/diff_file.c (working copy) @@ -252,76 +252,99 @@ datasource_open(void *baton, svn_diff_datasource_e * If the end of the current chunk is reached, read the next chunk in the * buffer and point curp to the start of the chunk. If EOF is reached, set * curp equal to endp to indicate EOF. */ -static svn_error_t * -increment_pointers(struct file_info *file[], int file_len, apr_pool_t *pool) -{ - int i; +#define increment_pointers(all_files, files_len, pool) \ + do { \ + int i; \ + \ + for (i = 0; i < files_len; i++) \ + { \ + if (all_files[i]->curp < all_files[i]->endp - 1) \ + all_files[i]->curp++; \ + else \ + SVN_ERR(increment_chunk(all_files[i], pool)); \ + } \ + } while (0) - for (i = 0; i < file_len; i++) - if (file[i]->chunk == -1) /* indicates before beginning of file */ - { - file[i]->chunk = 0; /* point to beginning of file again */ - } - else if (file[i]->curp == file[i]->endp - 1) - { - apr_off_t last_chunk = offset_to_chunk(file[i]->size); - if (file[i]->chunk == last_chunk) - { - file[i]->curp++; /* curp == endp signals end of file */ - } - else - { - apr_off_t length; - file[i]->chunk++; - length = file[i]->chunk == last_chunk ? - offset_in_chunk(file[i]->size) : CHUNK_SIZE; - SVN_ERR(read_chunk(file[i]->file, file[i]->path, file[i]->buffer, - length, chunk_to_offset(file[i]->chunk), - pool)); - file[i]->endp = file[i]->buffer + length; - file[i]->curp = file[i]->buffer; - } - } - else - { - file[i]->curp++; - } - return SVN_NO_ERROR; -} - /* For all files in the FILE array, decrement the curp pointer. If the * start of a chunk is reached, read the previous chunk in the buffer and * point curp to the last byte of the chunk. If the beginning of a FILE is * reached, set chunk to -1 to indicate BOF. */ +#define decrement_pointers(all_files, files_len, pool) \ + do { \ + int i; \ + \ + for (i = 0; i < files_len; i++) \ + { \ + if (all_files[i]->curp > all_files[i]->buffer) \ + all_files[i]->curp--; \ + else \ + SVN_ERR(decrement_chunk(all_files[i], pool)); \ + } \ + } while (0) + + static svn_error_t * -decrement_pointers(struct file_info *file[], int file_len, apr_pool_t *pool) +increment_chunk(struct file_info *file, apr_pool_t *pool) { - int i; + apr_off_t length; + apr_off_t last_chunk = offset_to_chunk(file->size); - for (i = 0; i < file_len; i++) - if (file[i]->curp == file[i]->buffer) - { - if (file[i]->chunk == 0) - file[i]->chunk--; /* chunk == -1 signals beginning of file */ - else - { - file[i]->chunk--; - SVN_ERR(read_chunk(file[i]->file, file[i]->path, file[i]->buffer, - CHUNK_SIZE, chunk_to_offset(file[i]->chunk), - pool)); - file[i]->endp = file[i]->buffer + CHUNK_SIZE; - file[i]->curp = file[i]->endp - 1; - } - } - else - { - file[i]->curp--; - } + if (file->chunk == -1) + { + /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */ + file->chunk = 0; + file->curp = file->buffer; + } + else if (file->chunk == last_chunk) + { + /* We are at the last chunk. Indicate EOF by setting curp == endp. */ + file->curp = file->endp; + } + else + { + /* There are still chunks left. Read next chunk and reset pointers. */ + file->chunk++; + length = file->chunk == last_chunk ? + offset_in_chunk(file->size) : CHUNK_SIZE; + SVN_ERR(read_chunk(file->file, file->path, file->buffer, + length, chunk_to_offset(file->chunk), + pool)); + file->endp = file->buffer + length; + file->curp = file->buffer; + } + + return SVN_NO_ERROR; +} + +static svn_error_t * +decrement_chunk(struct file_info *file, apr_pool_t *pool) +{ + if (file->chunk == 0) + { + /* We are already at the first chunk. Indicate BOF (Beginning Of File) + by setting chunk = -1 and curp = endp - 1. Both conditions are + important. They help the increment step to catch the BOF situation + in an efficient way. */ + file->chunk--; + file->curp = file->endp - 1; + } + else + { + /* Read previous chunk and reset pointers. */ + file->chunk--; + SVN_ERR(read_chunk(file->file, file->path, file->buffer, + CHUNK_SIZE, chunk_to_offset(file->chunk), + pool)); + file->endp = file->buffer + CHUNK_SIZE; + file->curp = file->endp - 1; + } + return SVN_NO_ERROR; } + /* Check whether one of the FILEs has its pointers 'before' the beginning of * the file (this can happen while scanning backwards). This is the case if * one of them has chunk == -1. */