On Mon, Dec 20, 2010 at 2:16 PM, Johan Corveleyn <jcor...@gmail.com> wrote:
> On Mon, Dec 20, 2010 at 11:03 AM, Julian Foad <julian.f...@wandisco.com> 
> wrote:
>> On Mon, 2010-12-20, Johan Corveleyn wrote:
>>> New macro version (increment only, decrement is similar):
>>> [[[
>>> /* For all files in the FILE array, increment the curp pointer.  If a file
>>>  * points before the beginning of file, let it point at the first byte 
>>> again.
>>>  * If the end of the current chunk is reached, read the next chunk in the
>>>  * buffer and point curp to the start of the chunk.  If EOF is reached, set
>>>  * curp equal to endp to indicate EOF. */
>>> #define increment_pointers(all_files, files_len, pool)                      
>>>  \
>>>   do {                                                                      
>>>  \
>>>     int i;                                                                  
>>>  \
>>>                                                                             
>>>  \
>>>     for (i = 0; i < files_len; i++)                                         
>>>  \
>>>     {                                                                       
>>>  \
>>>       if (all_files[i]->chunk == -1) /* indicates before beginning of file 
>>> */\
>>>         all_files[i]->chunk = 0;     /* point to beginning of file again */ 
>>>  \
>>>       else if (all_files[i]->curp != all_files[i]->endp - 1)                
>>>  \
>>>         all_files[i]->curp++;                                               
>>>  \
>>
>> Hi Johan.
>>
>> Here you are having to test for two special cases every time: chunk==-1
>> and curp==endp-1.  I would suggest changing the specification of "before
>> beginning of file" to include the promise that curp==endp-1, so that you
>> don't have to use a separate test here and can instead test for this
>> special case within increment_chunk().
>
> Good idea. I'll try this tonight ...

Ok, this worked pretty well. It's a little bit faster (about 1 second,
which seems right intuitively).

One style question though: should these macros be all upper case
(INCREMENT_POINTERS and DECREMENT_POINTERS)? I guess so.

The code now looks as follows (in attachment a patch relative to
diff-optimizations-bytes branch):

[[[
/* For all files in the FILE array, increment the curp pointer.  If a file
 * points before the beginning of file, let it point at the first byte again.
 * If the end of the current chunk is reached, read the next chunk in the
 * buffer and point curp to the start of the chunk.  If EOF is reached, set
 * curp equal to endp to indicate EOF. */
#define increment_pointers(all_files, files_len, pool)                       \
  do {                                                                       \
    int i;                                                                   \
                                                                             \
    for (i = 0; i < files_len; i++)                                          \
    {                                                                        \
      if (all_files[i]->curp < all_files[i]->endp - 1)                       \
        all_files[i]->curp++;                                                \
      else                                                                   \
        SVN_ERR(increment_chunk(all_files[i], pool));                        \
    }                                                                        \
  } while (0)


/* For all files in the FILE array, decrement the curp pointer.  If the
 * start of a chunk is reached, read the previous chunk in the buffer and
 * point curp to the last byte of the chunk.  If the beginning of a FILE is
 * reached, set chunk to -1 to indicate BOF. */
#define decrement_pointers(all_files, files_len, pool)                       \
  do {                                                                       \
    int i;                                                                   \
                                                                             \
    for (i = 0; i < files_len; i++)                                          \
    {                                                                        \
      if (all_files[i]->curp > all_files[i]->buffer)                         \
        all_files[i]->curp--;                                                \
      else                                                                   \
        SVN_ERR(decrement_chunk(all_files[i], pool));                        \
    }                                                                        \
  } while (0)


static svn_error_t *
increment_chunk(struct file_info *file, apr_pool_t *pool)
{
  apr_off_t length;
  apr_off_t last_chunk = offset_to_chunk(file->size);

  if (file->chunk == -1)
    {
      /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
      file->chunk = 0;
      file->curp = file->buffer;
    }
  else if (file->chunk == last_chunk)
    {
      /* We are at the last chunk. Indicate EOF by setting curp == endp. */
      file->curp = file->endp;
    }
  else
    {
      /* There are still chunks left. Read next chunk and reset pointers. */
      file->chunk++;
      length = file->chunk == last_chunk ?
        offset_in_chunk(file->size) : CHUNK_SIZE;
      SVN_ERR(read_chunk(file->file, file->path, file->buffer,
                         length, chunk_to_offset(file->chunk),
                         pool));
      file->endp = file->buffer + length;
      file->curp = file->buffer;
    }

  return SVN_NO_ERROR;
}


static svn_error_t *
decrement_chunk(struct file_info *file, apr_pool_t *pool)
{
  if (file->chunk == 0)
    {
      /* We are already at the first chunk. Indicate BOF (Beginning Of File)
         by setting chunk = -1 and curp = endp - 1. Both conditions are
         important. They help the increment step to catch the BOF situation
         in an efficient way. */
      file->chunk--;
      file->curp = file->endp - 1;
    }
  else
    {
      /* Read previous chunk and reset pointers. */
      file->chunk--;
      SVN_ERR(read_chunk(file->file, file->path, file->buffer,
                         CHUNK_SIZE, chunk_to_offset(file->chunk),
                         pool));
      file->endp = file->buffer + CHUNK_SIZE;
      file->curp = file->endp - 1;
    }

  return SVN_NO_ERROR;
}
]]]

Cheers,
-- 
Johan
Index: subversion/libsvn_diff/diff_file.c
===================================================================
--- subversion/libsvn_diff/diff_file.c  (revision 1050737)
+++ subversion/libsvn_diff/diff_file.c  (working copy)
@@ -252,76 +252,99 @@ datasource_open(void *baton, svn_diff_datasource_e
  * If the end of the current chunk is reached, read the next chunk in the
  * buffer and point curp to the start of the chunk.  If EOF is reached, set
  * curp equal to endp to indicate EOF. */
-static svn_error_t *
-increment_pointers(struct file_info *file[], int file_len, apr_pool_t *pool)
-{
-  int i;
+#define increment_pointers(all_files, files_len, pool)                       \
+  do {                                                                       \
+    int i;                                                                   \
+                                                                             \
+    for (i = 0; i < files_len; i++)                                          \
+    {                                                                        \
+      if (all_files[i]->curp < all_files[i]->endp - 1)                       \
+        all_files[i]->curp++;                                                \
+      else                                                                   \
+        SVN_ERR(increment_chunk(all_files[i], pool));                        \
+    }                                                                        \
+  } while (0)
 
-  for (i = 0; i < file_len; i++)
-    if (file[i]->chunk == -1) /* indicates before beginning of file */
-      {
-        file[i]->chunk = 0; /* point to beginning of file again */
-      }
-    else if (file[i]->curp == file[i]->endp - 1)
-      {
-        apr_off_t last_chunk = offset_to_chunk(file[i]->size);
-        if (file[i]->chunk == last_chunk)
-          {
-            file[i]->curp++; /* curp == endp signals end of file */
-          }
-        else
-          {
-            apr_off_t length;
-            file[i]->chunk++;
-            length = file[i]->chunk == last_chunk ? 
-              offset_in_chunk(file[i]->size) : CHUNK_SIZE;
-            SVN_ERR(read_chunk(file[i]->file, file[i]->path, file[i]->buffer,
-                               length, chunk_to_offset(file[i]->chunk),
-                               pool));
-            file[i]->endp = file[i]->buffer + length;
-            file[i]->curp = file[i]->buffer;
-          }
-      }
-    else
-      {
-        file[i]->curp++;
-      }
 
-  return SVN_NO_ERROR;
-}
-
 /* For all files in the FILE array, decrement the curp pointer.  If the
  * start of a chunk is reached, read the previous chunk in the buffer and
  * point curp to the last byte of the chunk.  If the beginning of a FILE is
  * reached, set chunk to -1 to indicate BOF. */
+#define decrement_pointers(all_files, files_len, pool)                       \
+  do {                                                                       \
+    int i;                                                                   \
+                                                                             \
+    for (i = 0; i < files_len; i++)                                          \
+    {                                                                        \
+      if (all_files[i]->curp > all_files[i]->buffer)                         \
+        all_files[i]->curp--;                                                \
+      else                                                                   \
+        SVN_ERR(decrement_chunk(all_files[i], pool));                        \
+    }                                                                        \
+  } while (0)
+
+
 static svn_error_t *
-decrement_pointers(struct file_info *file[], int file_len, apr_pool_t *pool)
+increment_chunk(struct file_info *file, apr_pool_t *pool)
 {
-  int i;
+  apr_off_t length;
+  apr_off_t last_chunk = offset_to_chunk(file->size);
 
-  for (i = 0; i < file_len; i++)
-    if (file[i]->curp == file[i]->buffer)
-      {
-        if (file[i]->chunk == 0)
-          file[i]->chunk--; /* chunk == -1 signals beginning of file */
-        else
-          {
-            file[i]->chunk--;
-            SVN_ERR(read_chunk(file[i]->file, file[i]->path, file[i]->buffer,
-                               CHUNK_SIZE, chunk_to_offset(file[i]->chunk),
-                               pool));
-            file[i]->endp = file[i]->buffer + CHUNK_SIZE;
-            file[i]->curp = file[i]->endp - 1;
-          }
-      }
-    else
-      {
-        file[i]->curp--;
-      }
+  if (file->chunk == -1)
+    {
+      /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
+      file->chunk = 0;
+      file->curp = file->buffer;
+    }
+  else if (file->chunk == last_chunk)
+    {
+      /* We are at the last chunk. Indicate EOF by setting curp == endp. */
+      file->curp = file->endp;
+    }
+  else
+    {
+      /* There are still chunks left. Read next chunk and reset pointers. */
+      file->chunk++;
+      length = file->chunk == last_chunk ? 
+        offset_in_chunk(file->size) : CHUNK_SIZE;
+      SVN_ERR(read_chunk(file->file, file->path, file->buffer,
+                         length, chunk_to_offset(file->chunk),
+                         pool));
+      file->endp = file->buffer + length;
+      file->curp = file->buffer;
+    }
+  
+  return SVN_NO_ERROR;
+}
 
+
+static svn_error_t *
+decrement_chunk(struct file_info *file, apr_pool_t *pool)
+{
+  if (file->chunk == 0)
+    {
+      /* We are already at the first chunk. Indicate BOF (Beginning Of File)
+         by setting chunk = -1 and curp = endp - 1. Both conditions are
+         important. They help the increment step to catch the BOF situation
+         in an efficient way. */
+      file->chunk--; 
+      file->curp = file->endp - 1;
+    }
+  else
+    {
+      /* Read previous chunk and reset pointers. */
+      file->chunk--;
+      SVN_ERR(read_chunk(file->file, file->path, file->buffer,
+                         CHUNK_SIZE, chunk_to_offset(file->chunk),
+                         pool));
+      file->endp = file->buffer + CHUNK_SIZE;
+      file->curp = file->endp - 1;
+    }
+  
   return SVN_NO_ERROR;
 }
 
+
 /* Check whether one of the FILEs has its pointers 'before' the beginning of
  * the file (this can happen while scanning backwards). This is the case if
  * one of them has chunk == -1. */

Reply via email to