Hi,

Here is a second iteration of the patch. It now passes make check.

Differences from the previous version are:
- Support for \r eol-style (\n and \r\n was already ok).
- The number of prefix_lines is now passed to svn_diff__lcs, so it can
use that value to set the position offset of the "EOF" marker
correctly, in case one of both files has become empty after skipping
the prefix. This fixes the crashes in blame_tests.py 2 and 7.

The patch is pretty big, so please let me know if I should split it up
to make it more reviewable (I could easily split it up between the
prefix-finding and the suffix-finding, at the cost of having overview
over the entire algorithm).

Still to do:
- Think about why results are sometimes different (because of
eliminated suffix, the LCS can sometimes be slightly different), and
what can be done about it.
- Generalize for more than 2 datasources (for diff3 and diff4).
- revv svn_diff_fns_t and maybe other stuff I've changed in public API.
- Add support for -x-b, -x-w, and -x--ignore-eol-style options.

But I'd like to do those things in follow-up patches, after this one
has been reviewed and digested a little bit. So at this point: review,
feedback, ... very welcome :-).

Log message:
[[[
Make svn_diff_diff skip identical prefix and suffix to make diff and blame
faster.

* subversion/include/svn_diff.h
  (svn_diff_fns_t): Added new function types datasources_open and
   get_prefix_lines to the vtable.

* subversion/libsvn_diff/diff_memory.c
  (datasources_open): New function (does nothing).
  (get_prefix_lines): New function (does nothing).
  (svn_diff__mem_vtable): Added new functions datasources_open and
   get_prefix_lines.

* subversion/libsvn_diff/diff_file.c
  (svn_diff__file_baton_t): Added members prefix_lines, suffix_start_chunk[4]
   and suffix_offset_in_chunk.
  (increment_pointer_or_chunk, decrement_pointer_or_chunk): New functions.
  (find_identical_prefix, find_identical_suffix): New functions.
  (datasources_open): New function, to open both datasources and find their
   identical prefix and suffix.
  (get_prefix_lines): New function.
  (datasource_get_next_token): Stop at start of identical suffix.
  (svn_diff__file_vtable): Added new functions datasources_open and
   get_prefix_lines.

* subversion/libsvn_diff/diff.h
  (svn_diff__get_tokens): Added argument "datasource_opened", to indicate that
   the datasource was already opened.

* subversion/libsvn_diff/token.c
  (svn_diff__get_tokens): Added argument "datasource_opened". Only open the
   datasource if datasource_opened is FALSE. Set the starting offset of the
   position list to the number of prefix lines.

* subversion/libsvn_diff/lcs.c
  (svn_diff__lcs): Added argument "prefix_lines". Use this to correctly set
   the offset of the sentinel position for EOF, even if one of the files
   became empty after eliminating the identical prefix.

* subversion/libsvn_diff/diff.c
  (svn_diff__diff): Add a chunk of "common" diff for identical prefix.
  (svn_diff_diff): Use new function datasources_open, to open original and
   modified at once, and find their identical prefix and suffix. Pass
   prefix_lines to svn_diff__lcs and to svn_diff__diff.

* subversion/libsvn_diff/diff3.c
  (svn_diff_diff3): Pass datasource_opened = FALSE to svn_diff__get_tokens.
   Pass prefix_lines = 0 to svn_diff__lcs.

* subversion/libsvn_diff/diff4.c
 (svn_diff_diff4): Pass datasource_opened = FALSE to svn_diff__get_tokens.
   Pass prefix_lines = 0 to svn_diff__lcs.
]]]

Cheers,
-- 
Johan
Index: subversion/include/svn_diff.h
===================================================================
--- subversion/include/svn_diff.h       (revision 1003326)
+++ subversion/include/svn_diff.h       (working copy)
@@ -112,6 +112,11 @@ typedef struct svn_diff_fns_t
   svn_error_t *(*datasource_open)(void *diff_baton,
                                   svn_diff_datasource_e datasource);
 
+  /** Open the datasources of type @a datasources. */
+  svn_error_t *(*datasources_open)(void *diff_baton, apr_off_t *prefix_lines,
+                                   svn_diff_datasource_e datasource0,
+                                   svn_diff_datasource_e datasource1);
+
   /** Close the datasource of type @a datasource. */
   svn_error_t *(*datasource_close)(void *diff_baton,
                                    svn_diff_datasource_e datasource);
@@ -124,6 +129,9 @@ typedef struct svn_diff_fns_t
                                             void *diff_baton,
                                             svn_diff_datasource_e datasource);
 
+  /** Get the number of identical prefix lines from the @a diff_baton. */
+  apr_off_t (*get_prefix_lines)(void *diff_baton);
+
   /** A function for ordering the tokens, resembling 'strcmp' in functionality.
    * @a compare should contain the return value of the comparison:
    * If @a ltoken and @a rtoken are "equal", return 0.  If @a ltoken is
Index: subversion/libsvn_diff/diff_file.c
===================================================================
--- subversion/libsvn_diff/diff_file.c  (revision 1003326)
+++ subversion/libsvn_diff/diff_file.c  (working copy)
@@ -77,6 +77,10 @@ typedef struct svn_diff__file_baton_t
   char *curp[4];
   char *endp[4];
 
+  apr_off_t prefix_lines;
+  int suffix_start_chunk[4];
+  apr_off_t suffix_offset_in_chunk[4];
+
   /* List of free tokens that may be reused. */
   svn_diff__file_token_t *tokens;
 
@@ -233,7 +237,385 @@ datasource_open(void *baton, svn_diff_datasource_e
                     curp, length, 0, file_baton->pool);
 }
 
+static svn_error_t *
+increment_pointer_or_chunk(svn_diff__file_baton_t *file_baton,
+                           char **curp, char **endp, int *chunk_number,
+                           char *buffer, apr_off_t last_chunk_number, int idx)
+{
+  apr_off_t length;
 
+  if ((*curp) == (*endp) - 1)
+    {
+      if (*chunk_number == last_chunk_number)
+        (*curp)++; /* *curp == *endp with last chunk signals end of file */
+      else
+        {
+          (*chunk_number)++;
+          length = *chunk_number == last_chunk_number ?
+            offset_in_chunk(file_baton->size[idx]) : CHUNK_SIZE;
+          SVN_ERR(read_chunk(file_baton->file[idx],
+                             file_baton->path[idx],
+                             buffer, length,
+                             chunk_to_offset(*chunk_number),
+                             file_baton->pool));
+          *endp = buffer + length;
+          *curp = buffer;
+        }
+    }
+  else
+    {
+      (*curp)++;
+    }
+
+  return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+decrement_pointer_or_chunk(svn_diff__file_baton_t *file_baton,
+                           char **curp, char **endp, int *chunk_number,
+                           char *buffer, int idx)
+{
+  if (*curp == buffer)
+    {
+      if (*chunk_number == 0)
+        (*chunk_number)--; /* *chunk_number == -1 signals beginning of file */
+      else
+        {
+          (*chunk_number)--;
+          SVN_ERR(read_chunk(file_baton->file[idx],
+                             file_baton->path[idx],
+                             buffer, CHUNK_SIZE,
+                             chunk_to_offset(*chunk_number),
+                             file_baton->pool));
+          *endp = buffer + CHUNK_SIZE;
+          *curp = *endp - 1;
+        }
+    }
+  else
+    {
+      (*curp)--;
+    }
+
+  return SVN_NO_ERROR;
+}
+
+/* Find the identical prefix for idx0 and idx1, counting number of lines.
+ * After this function is finished, the buffers, chunks, curp's and endp's 
+ * of the file_baton are set to point at the first byte after the prefix. */
+static svn_error_t *
+find_identical_prefix(svn_diff__file_baton_t *file_baton,
+                      svn_boolean_t *at_least_one_end_reached,
+                      apr_off_t *prefix_lines,
+                      int idx0, int idx1)
+{
+  apr_off_t last_chunk0, last_chunk1;
+  svn_boolean_t had_cr = FALSE;
+
+  last_chunk0 = offset_to_chunk(file_baton->size[idx0]);
+  last_chunk1 = offset_to_chunk(file_baton->size[idx1]);
+
+  *at_least_one_end_reached = FALSE;
+  *prefix_lines = 0;
+  while (*file_baton->curp[idx0] == *file_baton->curp[idx1] 
+         && !*at_least_one_end_reached)
+    {
+      /* ### TODO: see if we can take advantage of 
+         diff options like ignore_eol_style or ignore_space. */
+      if (*file_baton->curp[idx0] == '\r')
+        {
+          (*prefix_lines)++;
+          had_cr = TRUE;
+        }
+      else if (*file_baton->curp[idx0] == '\n' && !had_cr)
+        {
+          (*prefix_lines)++;
+          had_cr = FALSE;
+        }
+      else 
+        {
+          had_cr = FALSE;
+        }
+
+      SVN_ERR(increment_pointer_or_chunk(file_baton,
+                                         &file_baton->curp[idx0],
+                                         &file_baton->endp[idx0], 
+                                         &file_baton->chunk[idx0],
+                                         file_baton->buffer[idx0],
+                                         last_chunk0, idx0));
+      SVN_ERR(increment_pointer_or_chunk(file_baton,
+                                         &file_baton->curp[idx1],
+                                         &file_baton->endp[idx1],
+                                         &file_baton->chunk[idx1],
+                                         file_baton->buffer[idx1],
+                                         last_chunk1, idx1));
+      *at_least_one_end_reached = 
+        file_baton->curp[idx0] == file_baton->endp[idx0] 
+        || file_baton->curp[idx1] == file_baton->endp[idx1];
+    }
+
+  /* If both files reached their end (i.e. are fully identical), we're done */
+  if (file_baton->curp[idx0] == file_baton->endp[idx0] 
+        && file_baton->curp[idx1] == file_baton->endp[idx1])
+    {
+      file_baton->prefix_lines = *prefix_lines;
+      return SVN_NO_ERROR;
+    }
+
+  if (had_cr && (*file_baton->curp[idx0] == '\n' 
+                 || *file_baton->curp[idx1] == '\n'))
+    {
+      /* We ended in the middle of a \r\n for one file, but \r for the other.
+         Back up one byte, so the next loop will back up the entire line. And 
+         decrement *prefix_lines, since we counted one too many for the \r. */
+      (*prefix_lines)--;
+      SVN_ERR(decrement_pointer_or_chunk(file_baton,
+                                         &file_baton->curp[idx0],
+                                         &file_baton->endp[idx0],
+                                         &file_baton->chunk[idx0], 
+                                         file_baton->buffer[idx0],
+                                         idx0));
+      SVN_ERR(decrement_pointer_or_chunk(file_baton,
+                                         &file_baton->curp[idx1],
+                                         &file_baton->endp[idx1],
+                                         &file_baton->chunk[idx1], 
+                                         file_baton->buffer[idx1],
+                                         idx1));      
+    }
+
+  /* Back up to the last eol sequence (\n, \r\n or \r) */
+  do
+    {
+      SVN_ERR(decrement_pointer_or_chunk(file_baton,
+                                         &file_baton->curp[idx0],
+                                         &file_baton->endp[idx0],
+                                         &file_baton->chunk[idx0], 
+                                         file_baton->buffer[idx0],
+                                         idx0));
+      SVN_ERR(decrement_pointer_or_chunk(file_baton,
+                                         &file_baton->curp[idx1],
+                                         &file_baton->endp[idx1],
+                                         &file_baton->chunk[idx1], 
+                                         file_baton->buffer[idx1],
+                                         idx1));
+    } while (*file_baton->curp[idx0] != '\n'
+             && *file_baton->curp[idx0] != '\r'
+             && file_baton->chunk[idx0] != -1 
+             && file_baton->chunk[idx1] != -1);
+
+  /* Slide one byte forward, to point past the eol sequence */
+  if (file_baton->chunk[idx0] == -1)
+    file_baton->chunk[idx0] = 0; /* point to beginning of file again */
+  else
+    SVN_ERR(increment_pointer_or_chunk(file_baton,
+                                       &file_baton->curp[idx0],
+                                       &file_baton->endp[idx0],
+                                       &file_baton->chunk[idx0],
+                                       file_baton->buffer[idx0],
+                                       last_chunk0, idx0));
+  if (file_baton->chunk[idx1] == -1)
+    file_baton->chunk[idx1] = 0; /* point to beginning of file again */
+  else
+    SVN_ERR(increment_pointer_or_chunk(file_baton,
+                                       &file_baton->curp[idx1],
+                                       &file_baton->endp[idx1],
+                                       &file_baton->chunk[idx1],
+                                       file_baton->buffer[idx1],
+                                       last_chunk1, idx1));
+
+  file_baton->prefix_lines = *prefix_lines;
+  return SVN_NO_ERROR;
+}
+
+/* Find the identical suffix for idx0 and idx1. Before this function is called
+ * the file_baton's curp's and chunks should be positioned right after the 
+ * identical prefix (which is the case after find_identical_prefix),
+ * so we can determine where suffix scanning should ultimately stop. */
+static svn_error_t *
+find_identical_suffix(svn_diff__file_baton_t *file_baton,
+                      int idx0, int idx1)
+{
+  char *suffix_buffer0, *suffix_buffer1;
+  int suffix_chunk0, suffix_chunk1;
+  apr_off_t length0, length1;
+  apr_off_t last_chunk0, last_chunk1;
+  apr_off_t suffix_min_offset0;
+  apr_off_t suffix_min_chunk0;
+  char *curp0, *curp1;
+  char *endp0, *endp1;
+
+  last_chunk0 = offset_to_chunk(file_baton->size[idx0]);
+  last_chunk1 = offset_to_chunk(file_baton->size[idx1]);
+
+  /* Position everything at last chunk, pointer to last byte */
+  suffix_buffer0 = apr_palloc(file_baton->pool, 
+    (apr_size_t) (file_baton->size[idx0] > CHUNK_SIZE ? 
+                   CHUNK_SIZE : file_baton->size[idx0]));
+  suffix_chunk0 = last_chunk0;
+  length0 = file_baton->size[idx0] % CHUNK_SIZE;
+  SVN_ERR(read_chunk(file_baton->file[idx0], file_baton->path[idx0],
+                     suffix_buffer0, length0,
+                     chunk_to_offset(suffix_chunk0),
+                     file_baton->pool));
+  endp0 = suffix_buffer0 + length0;
+  curp0 = endp0 - 1;
+
+  suffix_buffer1 = apr_palloc(file_baton->pool, 
+    (apr_size_t) (file_baton->size[idx1] > CHUNK_SIZE ?
+                   CHUNK_SIZE : file_baton->size[idx1]));
+  suffix_chunk1 = last_chunk1;
+  length1 = file_baton->size[idx1] % CHUNK_SIZE;
+  SVN_ERR(read_chunk(file_baton->file[idx1], file_baton->path[idx1],
+                     suffix_buffer1, length1,
+                     chunk_to_offset(suffix_chunk1),
+                     file_baton->pool));
+  endp1 = suffix_buffer1 + length1;
+  curp1 = endp1 - 1;
+
+  /* Get the chunk and pointer offset at which we should stop scanning 
+   * backward for the identical suffix. This is just past the prefix. */
+  suffix_min_chunk0 = file_baton->chunk[idx0];
+  suffix_min_offset0 = file_baton->curp[idx0] - file_baton->buffer[idx0];
+  if (file_baton->size[idx0] > file_baton->size[idx1])
+    {
+      suffix_min_chunk0 += 
+        (file_baton->size[idx0] - file_baton->size[idx1]) / CHUNK_SIZE;
+      suffix_min_offset0 += 
+        (file_baton->size[idx0] - file_baton->size[idx1]) % CHUNK_SIZE;
+    }
+
+  /* Scan backwards until mismatch or until we are where the prefix ended */
+  while (*curp0 == *curp1 && suffix_chunk0 != -1 && suffix_chunk1 != -1
+         && !(suffix_chunk0 == suffix_min_chunk0 
+              && (curp0 - suffix_buffer0) == suffix_min_offset0))
+    {
+      SVN_ERR(decrement_pointer_or_chunk(file_baton, &curp0, &endp0,
+                                         &suffix_chunk0, suffix_buffer0,
+                                         idx0));
+      SVN_ERR(decrement_pointer_or_chunk(file_baton, &curp1, &endp1,
+                                         &suffix_chunk1, suffix_buffer1,
+                                         idx1));
+    }
+
+  /* Slide one byte forward, to point at the first byte of common suffix */
+  if (suffix_chunk0 == -1)
+    suffix_chunk0 = 0; /* point to beginning of file again */
+  else
+    SVN_ERR(increment_pointer_or_chunk(file_baton, &curp0, &endp0, 
+                                       &suffix_chunk0, suffix_buffer0,
+                                       last_chunk0, idx0));
+  if (suffix_chunk1 == -1)
+    suffix_chunk1 = 0; /* point to beginning of file again */
+  else
+    SVN_ERR(increment_pointer_or_chunk(file_baton, &curp1, &endp1, 
+                                       &suffix_chunk1, suffix_buffer1,
+                                       last_chunk1, idx1));
+
+  /* Skip until we find an eol sequence (\n, \r\n or \r), or until at least
+     one file reaches its end. */
+  while (*curp0 != '\n' && *curp0 != '\r'
+         && !(curp0 == endp0 || curp1 == endp1))
+    {
+      SVN_ERR(increment_pointer_or_chunk(file_baton, &curp0, &endp0, 
+                                         &suffix_chunk0, suffix_buffer0,
+                                         last_chunk0, idx0));
+      SVN_ERR(increment_pointer_or_chunk(file_baton, &curp1, &endp1, 
+                                         &suffix_chunk1, suffix_buffer1,
+                                         last_chunk1, idx1));
+    }
+
+  /* Slide one or two more bytes, to point past the eol. */
+  if (*curp0 == '\r')
+    {
+      SVN_ERR(increment_pointer_or_chunk(file_baton, &curp0, &endp0,
+                                         &suffix_chunk0, suffix_buffer0,
+                                         last_chunk0, idx0));
+      SVN_ERR(increment_pointer_or_chunk(file_baton, &curp1, &endp1,
+                                         &suffix_chunk1, suffix_buffer1,
+                                         last_chunk1, idx1));
+    }
+  if (*curp0 == '\n')
+    {
+      SVN_ERR(increment_pointer_or_chunk(file_baton, &curp0, &endp0,
+                                         &suffix_chunk0, suffix_buffer0,
+                                         last_chunk0, idx0));
+      SVN_ERR(increment_pointer_or_chunk(file_baton, &curp1, &endp1,
+                                         &suffix_chunk1, suffix_buffer1,
+                                         last_chunk1, idx1));
+    }
+
+  file_baton->suffix_start_chunk[idx0] = suffix_chunk0;
+  file_baton->suffix_start_chunk[idx1] = suffix_chunk1;
+  file_baton->suffix_offset_in_chunk[idx0] = curp0 - suffix_buffer0;
+  file_baton->suffix_offset_in_chunk[idx1] = curp1 - suffix_buffer1;
+
+  return SVN_NO_ERROR;
+}
+
+/* Implements svn_diff_fns_t::datasource_open */
+static svn_error_t *
+datasources_open(void *baton, apr_off_t *prefix_lines,
+                 svn_diff_datasource_e datasource0, 
+                 svn_diff_datasource_e datasource1)
+{
+  svn_diff__file_baton_t *file_baton = baton;
+  int idx0, idx1;
+  apr_finfo_t finfo0, finfo1;
+  apr_off_t length0, length1;
+  svn_boolean_t at_least_one_end_reached;
+
+  /* Open datasource0 and read first chunk */
+  idx0 = datasource_to_index(datasource0);
+  SVN_ERR(svn_io_file_open(&file_baton->file[idx0], file_baton->path[idx0],
+                           APR_READ, APR_OS_DEFAULT, file_baton->pool));
+  SVN_ERR(svn_io_file_info_get(&finfo0, APR_FINFO_SIZE,
+                               file_baton->file[idx0], file_baton->pool));
+  file_baton->size[idx0] = finfo0.size;
+  length0 = (apr_off_t) (finfo0.size > CHUNK_SIZE ? CHUNK_SIZE : finfo0.size);
+  file_baton->buffer[idx0] = apr_palloc(file_baton->pool, (apr_size_t) 
length0);
+  SVN_ERR(read_chunk(file_baton->file[idx0], file_baton->path[idx0],
+                     file_baton->buffer[idx0], length0, 0, file_baton->pool));
+  file_baton->endp[idx0] = file_baton->buffer[idx0] + length0;
+  file_baton->curp[idx0] = file_baton->buffer[idx0];
+
+  /* Open datasource1 and read first chunk */
+  idx1 = datasource_to_index(datasource1);
+  SVN_ERR(svn_io_file_open(&file_baton->file[idx1], file_baton->path[idx1],
+                           APR_READ, APR_OS_DEFAULT, file_baton->pool));
+  SVN_ERR(svn_io_file_info_get(&finfo1, APR_FINFO_SIZE,
+                               file_baton->file[idx1], file_baton->pool));
+  file_baton->size[idx1] = finfo1.size;
+  length1 = (apr_off_t) (finfo1.size > CHUNK_SIZE ? CHUNK_SIZE : finfo1.size);
+  file_baton->buffer[idx1] = apr_palloc(file_baton->pool, (apr_size_t) 
length1);
+  SVN_ERR(read_chunk(file_baton->file[idx1], file_baton->path[idx1],
+                     file_baton->buffer[idx1], length1, 0, file_baton->pool));
+  file_baton->endp[idx1] = file_baton->buffer[idx1] + length1;
+  file_baton->curp[idx1] = file_baton->buffer[idx1];
+
+  if (length0 == 0 || length1 == 0)
+    /* There will not be any identical prefix/suffix, so we're done. */
+    return SVN_NO_ERROR;
+
+  SVN_ERR(find_identical_prefix(file_baton, &at_least_one_end_reached,
+                                prefix_lines, idx0, idx1));
+
+  if (at_least_one_end_reached)
+    /* At least one file consisted totally of identical prefix, 
+     * so there will be no identical suffix. We're done. */
+    return SVN_NO_ERROR;
+
+  SVN_ERR(find_identical_suffix(file_baton, idx0, idx1));
+
+  return SVN_NO_ERROR;
+}
+
+static apr_off_t
+get_prefix_lines(void *baton)
+{
+  svn_diff__file_baton_t *file_baton = baton;
+
+  return file_baton->prefix_lines;
+}
+
 /* Implements svn_diff_fns_t::datasource_close */
 static svn_error_t *
 datasource_close(void *baton, svn_diff_datasource_e datasource)
@@ -277,6 +659,11 @@ datasource_get_next_token(apr_uint32_t *hash, void
       return SVN_NO_ERROR;
     }
 
+  if (file_baton->suffix_start_chunk[idx] || 
file_baton->suffix_offset_in_chunk[idx])
+    if (file_baton->chunk[idx] == file_baton->suffix_start_chunk[idx]
+        && (curp - file_baton->buffer[idx]) == 
file_baton->suffix_offset_in_chunk[idx])
+      return SVN_NO_ERROR;
+
   /* Get a new token */
   file_token = file_baton->tokens;
   if (file_token)
@@ -526,8 +913,10 @@ token_discard_all(void *baton)
 static const svn_diff_fns_t svn_diff__file_vtable =
 {
   datasource_open,
+  datasources_open,
   datasource_close,
   datasource_get_next_token,
+  get_prefix_lines,
   token_compare,
   token_discard,
   token_discard_all
Index: subversion/libsvn_diff/diff_memory.c
===================================================================
--- subversion/libsvn_diff/diff_memory.c        (revision 1003326)
+++ subversion/libsvn_diff/diff_memory.c        (working copy)
@@ -95,7 +95,23 @@ datasource_open(void *baton, svn_diff_datasource_e
   return SVN_NO_ERROR;
 }
 
+/* Implements svn_diff_fns_t::datasources_open */
+static svn_error_t *
+datasources_open(void *baton, apr_off_t *prefix_lines,
+                 svn_diff_datasource_e datasource0, 
+                 svn_diff_datasource_e datasource1)
+{
+  /* Do nothing: everything is already there and initialized to 0 */
+  return SVN_NO_ERROR;
+}
 
+/* Implements svn_diff_fns_t::datasource_get_prefix_lines */
+static apr_off_t
+get_prefix_lines(void *baton)
+{
+  return 0;
+}
+
 /* Implements svn_diff_fns_t::datasource_close */
 static svn_error_t *
 datasource_close(void *baton, svn_diff_datasource_e datasource)
@@ -189,8 +205,10 @@ token_discard_all(void *baton)
 static const svn_diff_fns_t svn_diff__mem_vtable =
 {
   datasource_open,
+  datasources_open,
   datasource_close,
   datasource_get_next_token,
+  get_prefix_lines,
   token_compare,
   token_discard,
   token_discard_all
Index: subversion/libsvn_diff/token.c
===================================================================
--- subversion/libsvn_diff/token.c      (revision 1003326)
+++ subversion/libsvn_diff/token.c      (working copy)
@@ -139,6 +139,7 @@ svn_diff__get_tokens(svn_diff__position_t **positi
                      void *diff_baton,
                      const svn_diff_fns_t *vtable,
                      svn_diff_datasource_e datasource,
+                     svn_boolean_t datasource_opened,
                      apr_pool_t *pool)
 {
   svn_diff__position_t *start_position;
@@ -152,10 +153,11 @@ svn_diff__get_tokens(svn_diff__position_t **positi
   *position_list = NULL;
 
 
-  SVN_ERR(vtable->datasource_open(diff_baton, datasource));
+  if (!datasource_opened)
+    SVN_ERR(vtable->datasource_open(diff_baton, datasource));
 
   position_ref = &start_position;
-  offset = 0;
+  offset = vtable->get_prefix_lines(diff_baton);
   hash = 0; /* The callback fn doesn't need to touch it per se */
   while (1)
     {
Index: subversion/libsvn_diff/lcs.c
===================================================================
--- subversion/libsvn_diff/lcs.c        (revision 1003326)
+++ subversion/libsvn_diff/lcs.c        (working copy)
@@ -163,6 +163,7 @@ svn_diff__lcs_reverse(svn_diff__lcs_t *lcs)
 svn_diff__lcs_t *
 svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) 
*/
               svn_diff__position_t *position_list2, /* pointer to tail (ring) 
*/
+              apr_off_t prefix_lines,
               apr_pool_t *pool)
 {
   int idx;
@@ -180,9 +181,11 @@ svn_diff__lcs(svn_diff__position_t *position_list1
    */
   lcs = apr_palloc(pool, sizeof(*lcs));
   lcs->position[0] = apr_pcalloc(pool, sizeof(*lcs->position[0]));
-  lcs->position[0]->offset = position_list1 ? position_list1->offset + 1 : 1;
+  lcs->position[0]->offset = position_list1 ? 
+    position_list1->offset + 1 : prefix_lines + 1;
   lcs->position[1] = apr_pcalloc(pool, sizeof(*lcs->position[1]));
-  lcs->position[1]->offset = position_list2 ? position_list2->offset + 1 : 1;
+  lcs->position[1]->offset = position_list2 ?
+    position_list2->offset + 1 : prefix_lines + 1;
   lcs->length = 0;
   lcs->refcount = 1;
   lcs->next = NULL;
Index: subversion/libsvn_diff/diff.h
===================================================================
--- subversion/libsvn_diff/diff.h       (revision 1003326)
+++ subversion/libsvn_diff/diff.h       (working copy)
@@ -91,6 +91,7 @@ typedef enum svn_diff__normalize_state_t
 svn_diff__lcs_t *
 svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) 
*/
               svn_diff__position_t *position_list2, /* pointer to tail (ring) 
*/
+              apr_off_t prefix_lines,
               apr_pool_t *pool);
 
 
@@ -111,6 +112,7 @@ svn_diff__get_tokens(svn_diff__position_t **positi
                      void *diff_baton,
                      const svn_diff_fns_t *vtable,
                      svn_diff_datasource_e datasource,
+                     svn_boolean_t datasource_opened,
                      apr_pool_t *pool);
 
 
Index: subversion/libsvn_diff/diff.c
===================================================================
--- subversion/libsvn_diff/diff.c       (revision 1003326)
+++ subversion/libsvn_diff/diff.c       (working copy)
@@ -43,6 +43,22 @@ svn_diff__diff(svn_diff__lcs_t *lcs,
   svn_diff_t *diff;
   svn_diff_t **diff_ref = &diff;
 
+  if (want_common && (original_start > 1))
+    {
+      /* we have a prefix to skip */
+      (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
+
+      (*diff_ref)->type = svn_diff__type_common;
+      (*diff_ref)->original_start = 0;
+      (*diff_ref)->original_length = original_start - 1;
+      (*diff_ref)->modified_start = 0;
+      (*diff_ref)->modified_length = modified_start - 1;
+      (*diff_ref)->latest_start = 0;
+      (*diff_ref)->latest_length = 0;
+
+      diff_ref = &(*diff_ref)->next;
+    }
+
   while (1)
     {
       if (original_start < lcs->position[0]->offset
@@ -108,6 +124,7 @@ svn_diff_diff(svn_diff_t **diff,
   svn_diff__lcs_t *lcs;
   apr_pool_t *subpool;
   apr_pool_t *treepool;
+  apr_off_t prefix_lines = 0;
 
   *diff = NULL;
 
@@ -116,17 +133,22 @@ svn_diff_diff(svn_diff_t **diff,
 
   svn_diff__tree_create(&tree, treepool);
 
+  SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines,
+    svn_diff_datasource_original, svn_diff_datasource_modified));
+
   /* Insert the data into the tree */
   SVN_ERR(svn_diff__get_tokens(&position_list[0],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_original,
+                               TRUE,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[1],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_modified,
+                               TRUE,
                                subpool));
 
   /* The cool part is that we don't need the tokens anymore.
@@ -139,10 +161,10 @@ svn_diff_diff(svn_diff_t **diff,
   svn_pool_destroy(treepool);
 
   /* Get the lcs */
-  lcs = svn_diff__lcs(position_list[0], position_list[1], subpool);
+  lcs = svn_diff__lcs(position_list[0], position_list[1], prefix_lines, 
subpool);
 
   /* Produce the diff */
-  *diff = svn_diff__diff(lcs, 1, 1, TRUE, pool);
+  *diff = svn_diff__diff(lcs, prefix_lines + 1, prefix_lines + 1, TRUE, pool);
 
   /* Get rid of all the data we don't have a use for anymore */
   svn_pool_destroy(subpool);
Index: subversion/libsvn_diff/diff3.c
===================================================================
--- subversion/libsvn_diff/diff3.c      (revision 1003326)
+++ subversion/libsvn_diff/diff3.c      (working copy)
@@ -173,7 +173,7 @@ svn_diff__resolve_conflict(svn_diff_t *hunk,
         position[1]->next = start_position[1];
       }
 
-    *lcs_ref = svn_diff__lcs(position[0], position[1],
+    *lcs_ref = svn_diff__lcs(position[0], position[1], 0,
                              subpool);
 
     /* Fix up the EOF lcs element in case one of
@@ -267,18 +267,21 @@ svn_diff_diff3(svn_diff_t **diff,
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_original,
+                               FALSE,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[1],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_modified,
+                               FALSE,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[2],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_latest,
+                               FALSE,
                                subpool));
 
   /* Get rid of the tokens, we don't need them to calc the diff */
@@ -289,9 +292,9 @@ svn_diff_diff3(svn_diff_t **diff,
   svn_pool_destroy(treepool);
 
   /* Get the lcs for original-modified and original-latest */
-  lcs_om = svn_diff__lcs(position_list[0], position_list[1],
+  lcs_om = svn_diff__lcs(position_list[0], position_list[1], 0,
                          subpool);
-  lcs_ol = svn_diff__lcs(position_list[0], position_list[2],
+  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], 0,
                          subpool);
 
   /* Produce a merged diff */
Index: subversion/libsvn_diff/diff4.c
===================================================================
--- subversion/libsvn_diff/diff4.c      (revision 1003326)
+++ subversion/libsvn_diff/diff4.c      (working copy)
@@ -194,24 +194,28 @@ svn_diff_diff4(svn_diff_t **diff,
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_original,
+                               FALSE,
                                subpool2));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[1],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_modified,
+                               FALSE,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[2],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_latest,
+                               FALSE,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[3],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_ancestor,
+                               FALSE,
                                subpool2));
 
   /* Get rid of the tokens, we don't need them to calc the diff */
@@ -222,7 +226,7 @@ svn_diff_diff4(svn_diff_t **diff,
   svn_pool_clear(subpool3);
 
   /* Get the lcs for original - latest */
-  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], subpool3);
+  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], 0, subpool3);
   diff_ol = svn_diff__diff(lcs_ol, 1, 1, TRUE, pool);
 
   svn_pool_clear(subpool3);
@@ -243,7 +247,7 @@ svn_diff_diff4(svn_diff_t **diff,
   /* Get the lcs for common ancestor - original
    * Do reverse adjustements
    */
-  lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], subpool3);
+  lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], 0, subpool3);
   diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
   adjust_diff(diff_ol, diff_adjust);
 
@@ -252,7 +256,7 @@ svn_diff_diff4(svn_diff_t **diff,
   /* Get the lcs for modified - common ancestor
    * Do forward adjustments
    */
-  lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], subpool3);
+  lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], 0, subpool3);
   diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
   adjust_diff(diff_ol, diff_adjust);
 

Reply via email to