I (Julian Foad) wrote: > The few differences I have found seem to be already known (but not > necessarily > adequately addressed). These include: svnrdump always sets 'Prop-delta: > true' even when unnecessary; svnrdump doesn't output some of the > checksum headers that svnadmin does; they put different numbers of blank > lines > between sections in some cases. > > In its current state, about 4 or 5 tests fail. A few of these are svnadmin > tests > that deliberately create an invalid repo, that then fails to dump.
There were 4 failures in svnadmin_tests.py, all for that reason. The attached, updated patch avoids these spurious test failures, and also has a log message. > At least one > other fail appears to be a bug in the test suite's dumpfile parser. The other failure is not a parser bug as I speculated, but an interesting real difference. svnmucc_tests.py 2 'basic svnmucc tests' generates a no-op modification to an empty file, in revision 16. 'svnadmin dump' outputs this: [[[ I: Node-path: boozle/buz/svnmucc-test.py I: Node-kind: file I: Node-action: change I: I: ]]] whereas 'svnrdump dump' outputs this: [[[ I: Node-path: boozle/buz/svnmucc-test.py I: Node-kind: file I: Node-action: change I: Text-delta: true I: Text-delta-base-md5: d41d8cd98f00b204e9800998ecf8427e I: Text-content-length: 4 I: Text-content-md5: d41d8cd98f00b204e9800998ecf8427e I: Content-length: 4 I: I: SVN I: ]]] - Julian
After every test, dump the created repository (if any) with svnadmin and with svnrdump and compare the two. * subversion/tests/cmdline/svntest/sandbox.py (Sandbox.__init__): Remember the current working directory. (Sandbox.verify): New method. * subversion/tests/cmdline/svntest/testcase.py (FunctionTestCase.run): Call the sandbox's verify method afterwards. * subversion/tests/cmdline/svntest/verify.py (DumpParser): Improve the parsing of headers, fixing bugs and making it more generic. Don't store the number of blank lines, for now, because at the moment these often differ between svnadmin and svnrdump. * subversion/tests/cmdline/svnadmin_tests.py (fsfs_recover_handle_missing_revs_or_revprops_file): Restore the repository to a non-corrupt state, to avoid failing to dump. (verify_keep_going, verify_invalid_path_changes, verify_quickly): Remove the corrupt repository, to avoid failing to dump. --This line, and those below, will be ignored-- Index: subversion/tests/cmdline/svnadmin_tests.py =================================================================== --- subversion/tests/cmdline/svnadmin_tests.py (revision 1651050) +++ subversion/tests/cmdline/svnadmin_tests.py (working copy) @@ -1235,12 +1235,16 @@ def fsfs_recover_handle_missing_revs_or_ if svntest.verify.verify_outputs( "Output of 'svnadmin recover' is unexpected.", None, errput, None, ".*Revision 3 has a non-file where its revprops file should be.*"): raise svntest.Failure + # Restore the r3 revprops file, thus repairing the repository. + os.rmdir(revprop_3) + os.rename(revprop_was_3, revprop_3) + #---------------------------------------------------------------------- @Skip(svntest.main.tests_use_prepacakaged_repository) def create_in_repo_subdir(sbox): "'svnadmin create /path/to/repo/subdir'" @@ -2148,12 +2152,15 @@ def verify_keep_going(sbox): sbox.repo_dir) if svntest.verify.verify_outputs("Output of 'svnadmin verify' is unexpected.", None, errput, None, "svnadmin: E165011:.*"): raise svntest.Failure + # Don't leave a corrupt repository + svntest.main.safe_rmtree(sbox.repo_dir, True) + @SkipUnless(svntest.main.is_fs_type_fsfs) def verify_invalid_path_changes(sbox): "detect invalid changed path list entries" sbox.build(create_wc = False) repo_url = sbox.repo_url @@ -2287,12 +2294,15 @@ def verify_invalid_path_changes(sbox): sbox.repo_dir) if svntest.verify.verify_outputs("Output of 'svnadmin verify' is unexpected.", None, errput, None, "svnadmin: E165011:.*"): raise svntest.Failure + # Don't leave a corrupt repository + svntest.main.safe_rmtree(sbox.repo_dir, True) + def verify_denormalized_names(sbox): "detect denormalized names and name collisions" sbox.build(create_wc = False) svntest.main.safe_rmtree(sbox.repo_dir, True) @@ -2666,12 +2676,15 @@ def verify_quickly(sbox): if (svntest.main.fs_has_rep_sharing()): exp_out.insert(0, ".*Verifying.*metadata.*") if svntest.verify.verify_outputs("Unexpected error while running 'svnadmin verify'.", output, errput, exp_out, exp_err): raise svntest.Failure + # Don't leave a corrupt repository + svntest.main.safe_rmtree(sbox.repo_dir, True) + @SkipUnless(svntest.main.is_fs_type_fsfs) @SkipUnless(svntest.main.fs_has_pack) def fsfs_hotcopy_progress(sbox): "hotcopy progress reporting" Index: subversion/tests/cmdline/svntest/sandbox.py =================================================================== --- subversion/tests/cmdline/svntest/sandbox.py (revision 1650306) +++ subversion/tests/cmdline/svntest/sandbox.py (working copy) @@ -23,12 +23,13 @@ import os import shutil import copy import urllib import logging +import re import svntest logger = logging.getLogger() @@ -43,12 +44,14 @@ class Sandbox: self.test_paths = [] self._set_name("%s-%d" % (module, idx)) # This flag is set to True by build() and returned by is_built() self._is_built = False + self.was_cwd = os.getcwd() + def _set_name(self, name, read_only=False): """A convenience method for renaming a sandbox, useful when working with multiple repositories in the same unit test.""" if not name is None: self.name = name self.read_only = read_only @@ -378,12 +381,48 @@ class Sandbox: _, output, _ = svntest.actions.run_and_verify_svnlook( None, svntest.verify.AnyOutput, [], 'youngest', self.repo_dir) youngest = int(output[0]) return youngest + def verify(self): + """Do additional testing that should hold for any sandbox, such as + verifying that the repository can be dumped. + """ + svnrdump_headers_missing = re.compile( + "Text-content-sha1: .*|Text-copy-source-md5: .*|" + "Text-copy-source-sha1: .*|Text-delta-base-sha1: .*" + ) + svnrdump_headers_always = re.compile( + "Prop-delta: .*" + ) + + if self.is_built() and not self.read_only: + # verify that we can in fact dump the repo + # (except for the few tests that deliberately corrupt the repo) + os.chdir(self.was_cwd) + if os.path.exists(self.repo_dir): + print("Testing dump...") + svnadmin_dumpfile = svntest.actions.run_and_verify_dump(self.repo_dir, + deltas=True) + svnrdump_dumpfile = svntest.actions.run_and_verify_svnrdump( + None, svntest.verify.AnyOutput, [], 0, + 'dump', '-q', self.repo_url) + svnadmin_dumpfile = [l for l in svnadmin_dumpfile + if not svnrdump_headers_missing.match(l)] + svnadmin_dumpfile = [l for l in svnadmin_dumpfile + if not svnrdump_headers_always.match(l)] + svnrdump_dumpfile = [l for l in svnrdump_dumpfile + if not svnrdump_headers_always.match(l)] + svntest.verify.compare_dump_files(None, None, + svnadmin_dumpfile, svnrdump_dumpfile) + + else: + print("NOT testing dump: is_built=%d, read_only=%d" + % (self.is_built(), self.read_only)) + def is_url(target): return (target.startswith('^/') or target.startswith('file://') or target.startswith('http://') or target.startswith('https://') or target.startswith('svn://') Index: subversion/tests/cmdline/svntest/testcase.py =================================================================== --- subversion/tests/cmdline/svntest/testcase.py (revision 1650306) +++ subversion/tests/cmdline/svntest/testcase.py (working copy) @@ -170,13 +170,15 @@ class FunctionTestCase(TestCase): function was defined.""" filename = self.func.func_code.co_filename return os.path.splitext(os.path.basename(filename))[0] def run(self, sandbox): - return self.func(sandbox) + result = self.func(sandbox) + sandbox.verify() + return result class _XFail(TestCase): """A test that is expected to fail, if its condition is true.""" _result_map = { Index: subversion/tests/cmdline/svntest/verify.py =================================================================== --- subversion/tests/cmdline/svntest/verify.py (revision 1650306) +++ subversion/tests/cmdline/svntest/verify.py (working copy) @@ -488,32 +488,52 @@ class DumpParser: % (self.current, self.lines[self.current])) else: return False self.current += 1 return True + def parse_header(self, header): + regex = '([^:]*): (.*)$' + m = re.match(regex, self.lines[self.current]) + if not m: + raise SVNDumpParseError("expected a header '%s' at line %d, but found:\n%s" + % (regex, self.current, + self.lines[self.current])) + self.current += 1 + return m.groups() + + def parse_headers(self): + headers = [] + while self.lines[self.current] != '\n': + key, val = self.parse_header(self) + headers.append((key, val)) + return headers + + def parse_boolean(self, header, required): + return self.parse_line(header + ': (false|true)$', required) + def parse_format(self): return self.parse_line('SVN-fs-dump-format-version: ([0-9]+)$') def parse_uuid(self): return self.parse_line('UUID: ([0-9a-z-]+)$') def parse_revision(self): return self.parse_line('Revision-number: ([0-9]+)$') + def parse_prop_delta(self): + return self.parse_line('Prop-delta: (false|true)$', required=False) + def parse_prop_length(self, required=True): return self.parse_line('Prop-content-length: ([0-9]+)$', required) def parse_content_length(self, required=True): return self.parse_line('Content-length: ([0-9]+)$', required) def parse_path(self): - path = self.parse_line('Node-path: (.+)$', required=False) - if not path and self.lines[self.current] == 'Node-path: \n': - self.current += 1 - path = '' + path = self.parse_line('Node-path: (.*)$', required=False) return path def parse_kind(self): return self.parse_line('Node-kind: (.+)$', required=False) def parse_action(self): @@ -538,12 +558,21 @@ class DumpParser: def parse_text_md5(self): return self.parse_line('Text-content-md5: ([0-9a-z]+)$', required=False) def parse_text_sha1(self): return self.parse_line('Text-content-sha1: ([0-9a-z]+)$', required=False) + def parse_text_delta(self): + return self.parse_line('Text-delta: (false|true)$', required=False) + + def parse_text_delta_base_md5(self): + return self.parse_line('Text-delta-base-md5: ([0-9a-f]+)$', required=False) + + def parse_text_delta_base_sha1(self): + return self.parse_line('Text-delta-base-sha1: ([0-9a-f]+)$', required=False) + def parse_text_length(self): return self.parse_line('Text-content-length: ([0-9]+)$', required=False) def get_props(self): props = [] while not re.match('PROPS-END$', self.lines[self.current]): @@ -567,14 +596,20 @@ class DumpParser: key += props[curprop[0]] curprop[0] += 1 key = key[:-1] return key - key = read_key_or_value(curprop) - value = read_key_or_value(curprop) + if props[curprop[0]].startswith('K'): + key = read_key_or_value(curprop) + value = read_key_or_value(curprop) + elif props[curprop[0]].startswith('D'): + key = read_key_or_value(curprop) + value = None + else: + raise prophash[key] = value return prophash def get_content(self, length): content = '' @@ -587,45 +622,58 @@ class DumpParser: raise SVNDumpParseError("content length expected %d actual %d at line %d" % (length, len(content), self.current)) return content def parse_one_node(self): node = {} - node['kind'] = self.parse_kind() - action = self.parse_action() - node['copyfrom_rev'] = self.parse_copyfrom_rev() - node['copyfrom_path'] = self.parse_copyfrom_path() - node['copy_md5'] = self.parse_copy_md5() - node['copy_sha1'] = self.parse_copy_sha1() - node['prop_length'] = self.parse_prop_length(required=False) - node['text_length'] = self.parse_text_length() - node['text_md5'] = self.parse_text_md5() - node['text_sha1'] = self.parse_text_sha1() - node['content_length'] = self.parse_content_length(required=False) + headers_list = self.parse_headers() + headers = { k:v for (k, v) in headers_list } + for key, header in [ + ('kind', 'Node-kind'), + ('copyfrom_rev', 'Node-copyfrom-rev'), + ('copyfrom_path', 'Node-copyfrom-path'), + ('copy_md5', 'Text-copy-source-md5'), + ('copy_sha1', 'Text-copy-source-sha1'), + ('prop_delta', 'Prop-delta'), + ('prop_length', 'Prop-content-length'), + ('text_delta', 'Text-delta'), + ('text_delta_base_md5', 'Text-delta-base-md5'), + ('text_delta_base_sha1', 'Text-delta-base-sha1'), + ('text_length', 'Text-content-length'), + ('text_md5', 'Text-content-md5'), + ('text_sha1', 'Text-content-sha1'), + ('content_length', 'Content-length'), + ]: + node[key] = headers.get(header, None) + + action = headers['Node-action'] + self.parse_blank() if node['prop_length']: node['props'] = self.get_props() if node['text_length']: node['content'] = self.get_content(int(node['text_length'])) # Hard to determine how may blanks is 'correct' (a delete that is # followed by an add that is a replace and a copy has one fewer # than expected but that can't be predicted until seeing the add) # so allow arbitrary number blanks = 0 while self.current < len(self.lines) and self.parse_blank(required=False): blanks += 1 - node['blanks'] = blanks + ### disable temporarily, as svnrdump behaves differently from svnadmin + ### on a replace-with-copy (bug -- should file an issue) + #node['blanks'] = blanks return action, node def parse_all_nodes(self): nodes = {} while True: if self.current >= len(self.lines): break path = self.parse_path() - if not path and not path is '': + if path is None: break if not nodes.get(path): nodes[path] = {} action, node = self.parse_one_node() if nodes[path].get(action): raise SVNDumpParseError("duplicate action '%s' for node '%s' at line %d"