Testing equality between svnrdump and svnadmin dump

Julian Foad Fri, 09 Jan 2015 10:21:07 -0800

For Your Interest,

I am having some success with testing for equality between the outputs of 
svnrdump and svnadmin dump.


The attached patch makes the test suite run both 'svnadmin dump' and 'svnrdump 
dump' on the sandbox repository  (if created) after each test, and compare the 
results. This is just one way of getting access to a moderately wide range of 
different repository contents. It's not the only way.

The few differences I have found seem to be already known (but not necessarily 
adequately addressed). These include: svnrdump always sets 'Prop-delta: true' 
even when unnecessary; svnrdump doesn't output some of the checksum headers 
that svnadmin does; they put different numbers of blank lines between sections 
in some cases.

In its current state, about 4 or 5 tests fail. A few of these are svnadmin 
tests that deliberately create an invalid repo, that then fails to dump. At 
least one other fail appears to be a bug in the test suite's dumpfile parser. 
That parser needs some more work. For now, the patch changes the test suite's 
parsing of headers in ways that make it more generic (happier to read anything) 
and does less checking of the expected sequence (so in this respect it reduces 
an aspect of the test coverage that is used for some existing tests).

I'm not planning to commit this in its current state.

Mostly I'm planning to use it to help me check for mistakes while I deduplicate 
the dumpfile handling code in svnadmin, svnrdump and svndumpfilter.


- Julian

* subversion/tests/cmdline/svntest/sandbox.py

* subversion/tests/cmdline/svntest/testcase.py
  (FunctionTestCase): 

* subversion/tests/cmdline/svntest/verify.py

* subversion/tests/libsvn_fs_fs/fs-fs-pack-test.c
  (id_parser_test): 

--This line, and those below, will be ignored--

Index: subversion/tests/cmdline/svntest/sandbox.py
===================================================================
--- subversion/tests/cmdline/svntest/sandbox.py	(revision 1650306)
+++ subversion/tests/cmdline/svntest/sandbox.py	(working copy)
@@ -23,12 +23,13 @@
 
 import os
 import shutil
 import copy
 import urllib
 import logging
+import re
 
 import svntest
 
 logger = logging.getLogger()
 
 
@@ -43,12 +44,14 @@ class Sandbox:
     self.test_paths = []
 
     self._set_name("%s-%d" % (module, idx))
     # This flag is set to True by build() and returned by is_built()
     self._is_built = False
 
+    self.was_cwd = os.getcwd()
+
   def _set_name(self, name, read_only=False):
     """A convenience method for renaming a sandbox, useful when
     working with multiple repositories in the same unit test."""
     if not name is None:
       self.name = name
     self.read_only = read_only
@@ -378,12 +381,48 @@ class Sandbox:
     _, output, _ = svntest.actions.run_and_verify_svnlook(
                      None, svntest.verify.AnyOutput, [],
                      'youngest', self.repo_dir)
     youngest = int(output[0])
     return youngest
 
+  def verify(self):
+    """Do additional testing that should hold for any sandbox, such as
+       verifying that the repository can be dumped.
+    """
+    svnrdump_headers_missing = re.compile(
+        "Text-content-sha1: .*|Text-copy-source-md5: .*|"
+        "Text-copy-source-sha1: .*|Text-delta-base-sha1: .*"
+    )
+    svnrdump_headers_always = re.compile(
+        "Prop-delta: .*"
+    )
+
+    if self.is_built() and not self.read_only:
+      # verify that we can in fact dump the repo
+      # (except for the few tests that deliberately corrupt the repo)
+      os.chdir(self.was_cwd)
+      if os.path.exists(self.repo_dir):
+        print("Testing dump...")
+        svnadmin_dumpfile = svntest.actions.run_and_verify_dump(self.repo_dir,
+                                                          deltas=True)
+        svnrdump_dumpfile = svntest.actions.run_and_verify_svnrdump(
+                        None, svntest.verify.AnyOutput, [], 0,
+                        'dump', '-q', self.repo_url)
+        svnadmin_dumpfile = [l for l in svnadmin_dumpfile
+                             if not svnrdump_headers_missing.match(l)]
+        svnadmin_dumpfile = [l for l in svnadmin_dumpfile
+                             if not svnrdump_headers_always.match(l)]
+        svnrdump_dumpfile = [l for l in svnrdump_dumpfile
+                             if not svnrdump_headers_always.match(l)]
+        svntest.verify.compare_dump_files(None, None,
+                                          svnadmin_dumpfile, svnrdump_dumpfile)
+
+    else:
+      print("NOT testing dump: is_built=%d, read_only=%d"
+            % (self.is_built(), self.read_only))
+
 def is_url(target):
   return (target.startswith('^/')
           or target.startswith('file://')
           or target.startswith('http://')
           or target.startswith('https://')
           or target.startswith('svn://')
Index: subversion/tests/cmdline/svntest/testcase.py
===================================================================
--- subversion/tests/cmdline/svntest/testcase.py	(revision 1650306)
+++ subversion/tests/cmdline/svntest/testcase.py	(working copy)
@@ -170,13 +170,15 @@ class FunctionTestCase(TestCase):
     function was defined."""
 
     filename = self.func.func_code.co_filename
     return os.path.splitext(os.path.basename(filename))[0]
 
   def run(self, sandbox):
-    return self.func(sandbox)
+    result = self.func(sandbox)
+    sandbox.verify()
+    return result
 
 
 class _XFail(TestCase):
   """A test that is expected to fail, if its condition is true."""
 
   _result_map = {
Index: subversion/tests/cmdline/svntest/verify.py
===================================================================
--- subversion/tests/cmdline/svntest/verify.py	(revision 1650306)
+++ subversion/tests/cmdline/svntest/verify.py	(working copy)
@@ -488,32 +488,52 @@ class DumpParser:
                                 % (self.current, self.lines[self.current]))
       else:
         return False
     self.current += 1
     return True
 
+  def parse_header(self, header):
+    regex = '([^:]*): (.*)$'
+    m = re.match(regex, self.lines[self.current])
+    if not m:
+      raise SVNDumpParseError("expected a header '%s' at line %d, but found:\n%s"
+                              % (regex, self.current,
+                                 self.lines[self.current]))
+    self.current += 1
+    return m.groups()
+
+  def parse_headers(self):
+    headers = []
+    while self.lines[self.current] != '\n':
+      key, val = self.parse_header(self)
+      headers.append((key, val))
+    return headers
+
+  def parse_boolean(self, header, required):
+    return self.parse_line(header + ': (false|true)$', required)
+
   def parse_format(self):
     return self.parse_line('SVN-fs-dump-format-version: ([0-9]+)$')
 
   def parse_uuid(self):
     return self.parse_line('UUID: ([0-9a-z-]+)$')
 
   def parse_revision(self):
     return self.parse_line('Revision-number: ([0-9]+)$')
 
+  def parse_prop_delta(self):
+    return self.parse_line('Prop-delta: (false|true)$', required=False)
+
   def parse_prop_length(self, required=True):
     return self.parse_line('Prop-content-length: ([0-9]+)$', required)
 
   def parse_content_length(self, required=True):
     return self.parse_line('Content-length: ([0-9]+)$', required)
 
   def parse_path(self):
-    path = self.parse_line('Node-path: (.+)$', required=False)
-    if not path and self.lines[self.current] == 'Node-path: \n':
-      self.current += 1
-      path = ''
+    path = self.parse_line('Node-path: (.*)$', required=False)
     return path
 
   def parse_kind(self):
     return self.parse_line('Node-kind: (.+)$', required=False)
 
   def parse_action(self):
@@ -538,12 +558,21 @@ class DumpParser:
   def parse_text_md5(self):
     return self.parse_line('Text-content-md5: ([0-9a-z]+)$', required=False)
 
   def parse_text_sha1(self):
     return self.parse_line('Text-content-sha1: ([0-9a-z]+)$', required=False)
 
+  def parse_text_delta(self):
+    return self.parse_line('Text-delta: (false|true)$', required=False)
+
+  def parse_text_delta_base_md5(self):
+    return self.parse_line('Text-delta-base-md5: ([0-9a-f]+)$', required=False)
+
+  def parse_text_delta_base_sha1(self):
+    return self.parse_line('Text-delta-base-sha1: ([0-9a-f]+)$', required=False)
+
   def parse_text_length(self):
     return self.parse_line('Text-content-length: ([0-9]+)$', required=False)
 
   def get_props(self):
     props = []
     while not re.match('PROPS-END$', self.lines[self.current]):
@@ -567,14 +596,20 @@ class DumpParser:
           key += props[curprop[0]]
           curprop[0] += 1
         key = key[:-1]
 
         return key
 
-      key = read_key_or_value(curprop)
-      value = read_key_or_value(curprop)
+      if props[curprop[0]].startswith('K'):
+        key = read_key_or_value(curprop)
+        value = read_key_or_value(curprop)
+      elif props[curprop[0]].startswith('D'):
+        key = read_key_or_value(curprop)
+        value = None
+      else:
+        raise
       prophash[key] = value
 
     return prophash
 
   def get_content(self, length):
     content = ''
@@ -587,45 +622,58 @@ class DumpParser:
       raise SVNDumpParseError("content length expected %d actual %d at line %d"
                               % (length, len(content), self.current))
     return content
 
   def parse_one_node(self):
     node = {}
-    node['kind'] = self.parse_kind()
-    action = self.parse_action()
-    node['copyfrom_rev'] = self.parse_copyfrom_rev()
-    node['copyfrom_path'] = self.parse_copyfrom_path()
-    node['copy_md5'] = self.parse_copy_md5()
-    node['copy_sha1'] = self.parse_copy_sha1()
-    node['prop_length'] = self.parse_prop_length(required=False)
-    node['text_length'] = self.parse_text_length()
-    node['text_md5'] = self.parse_text_md5()
-    node['text_sha1'] = self.parse_text_sha1()
-    node['content_length'] = self.parse_content_length(required=False)
+    headers_list = self.parse_headers()
+    headers = { k:v for (k, v) in headers_list }
+    for key, header in [
+        ('kind', 'Node-kind'),
+        ('copyfrom_rev', 'Node-copyfrom-rev'),
+        ('copyfrom_path', 'Node-copyfrom-path'),
+        ('copy_md5', 'Text-copy-source-md5'),
+        ('copy_sha1', 'Text-copy-source-sha1'),
+        ('prop_delta', 'Prop-delta'),
+        ('prop_length', 'Prop-content-length'),
+        ('text_delta', 'Text-delta'),
+        ('text_delta_base_md5', 'Text-delta-base-md5'),
+        ('text_delta_base_sha1', 'Text-delta-base-sha1'),
+        ('text_length', 'Text-content-length'),
+        ('text_md5', 'Text-content-md5'),
+        ('text_sha1', 'Text-content-sha1'),
+        ('content_length', 'Content-length'),
+        ]:
+      node[key] = headers.get(header, None)
+
+    action = headers['Node-action']
+
     self.parse_blank()
     if node['prop_length']:
       node['props'] = self.get_props()
     if node['text_length']:
       node['content'] = self.get_content(int(node['text_length']))
     # Hard to determine how may blanks is 'correct' (a delete that is
     # followed by an add that is a replace and a copy has one fewer
     # than expected but that can't be predicted until seeing the add)
     # so allow arbitrary number
     blanks = 0
     while self.current < len(self.lines) and self.parse_blank(required=False):
       blanks += 1
-    node['blanks'] = blanks
+    ### disable temporarily, as svnrdump behaves differently from svnadmin
+    ### on a replace-with-copy (bug -- should file an issue)
+    #node['blanks'] = blanks
     return action, node
 
   def parse_all_nodes(self):
     nodes = {}
     while True:
       if self.current >= len(self.lines):
         break
       path = self.parse_path()
-      if not path and not path is '':
+      if path is None:
         break
       if not nodes.get(path):
         nodes[path] = {}
       action, node = self.parse_one_node()
       if nodes[path].get(action):
         raise SVNDumpParseError("duplicate action '%s' for node '%s' at line %d"
Index: subversion/tests/libsvn_fs_fs/fs-fs-pack-test.c
===================================================================
--- subversion/tests/libsvn_fs_fs/fs-fs-pack-test.c	(revision 1650306)
+++ subversion/tests/libsvn_fs_fs/fs-fs-pack-test.c	(working copy)
@@ -24,12 +24,13 @@
 #include <string.h>
 #include <apr_pools.h>
 
 #include "../svn_test.h"
 #include "../../libsvn_fs_fs/fs.h"
 #include "../../libsvn_fs_fs/fs_fs.h"
+#include "../../libsvn_fs_fs/util.h"
 
 #include "svn_hash.h"
 #include "svn_pools.h"
 #include "svn_props.h"
 #include "svn_fs.h"
 #include "private/svn_string_private.h"
@@ -1240,12 +1241,64 @@ id_parser_test(const svn_test_opts_t *op
 
   return SVN_NO_ERROR;
 }
 
 #undef REPO_NAME
 
+/* ------------------------------------------------------------------------ */
+
+#define REPO_NAME "test-repo-compare_r0_template"
+static svn_error_t *
+compare_r0_template(const svn_test_opts_t *opts,
+                    apr_pool_t *pool)
+{
+  svn_fs_t *fs;
+  svn_stringbuf_t *r0;
+  const char *expected;
+
+  /* Create a filesystem and read its r0. */
+  SVN_ERR(svn_test__create_fs(&fs, REPO_NAME, opts, pool));
+  SVN_ERR(svn_stringbuf_from_file2(&r0,
+                                   svn_fs_fs__path_rev_absolute(fs, 0, pool),
+                                   pool));
+
+  /* Compare to our template. */
+  if (svn_fs_fs__use_log_addressing(fs))
+    {
+      /* Compare only the non-binary part of formats 7 and up. */
+      expected = "PLAIN\nEND\nENDREP\n"
+                 "id: 0.0.r0/2\n"
+                 "type: dir\n"
+                 "count: 0\n"
+                 "text: 0 3 4 4 "
+                 "2d2977d1c96f487abe4a1e202dd03b4e\n"
+                 "cpath: /\n"
+                 "\n\nL2P-INDEX\n";
+
+      svn_stringbuf_remove(r0, strlen(expected), r0->len);
+    }
+  else
+    {
+      /* Full comparison for formats 1 .. 6 */
+      expected = "PLAIN\nEND\nENDREP\n"
+                 "id: 0.0.r0/17\n"
+                 "type: dir\n"
+                 "count: 0\n"
+                 "text: 0 0 4 4 "
+                 "2d2977d1c96f487abe4a1e202dd03b4e\n"
+                 "cpath: /\n"
+                 "\n\n17 107\n";
+    }
+
+  SVN_TEST_STRING_ASSERT(r0->data, expected);
+
+  return SVN_NO_ERROR;
+}
+
+#undef REPO_NAME
+
 
 /* The test table.  */
 
 static int max_threads = 4;
 
 static struct svn_test_descriptor_t test_funcs[] =
@@ -1282,10 +1335,12 @@ static struct svn_test_descriptor_t test
     SVN_TEST_OPTS_PASS(metadata_checksumming,
                        "metadata checksums being checked"),
     SVN_TEST_OPTS_PASS(revprop_caching_on_off,
                        "change revprops with enabled and disabled caching"),
     SVN_TEST_OPTS_PASS(id_parser_test,
                        "id parser test"),
+    SVN_TEST_OPTS_PASS(compare_r0_template,
+                       "expect r0 to match the template"),
     SVN_TEST_NULL
   };
 
 SVN_TEST_MAIN

Testing equality between svnrdump and svnadmin dump

Reply via email to