I'm working towards cleaning up the pristine store automatically. Although not strictly necessary, I think it will be helpful if a pristine text checksum in the DB shall be guaranteed always to reference a pristine text that is currently in the store. In support of this goal, the current patch enables SQLite checking for that constraint (no effect in sqlite versions < 3.6.19) and adjusts the 1.6-to-1.7 upgrade code to follow that rule.
I think I only made minor edits since it last passed all tests. Any feedback before I commit it? Philip, you probably know the upgrade code best. How does this look? - Julian
Enforce in the WC DB schema that a pristine text checksum always references a pristine text that is in the store. To accord with this, change the 1.6-to-1.7 upgrade code so that it populates the pristine store before the NODES table, and so that it always writes the SHA-1 checksum (not the MD-5) into the NODES table. ### The assertions in write_entry are a bit ugly and in one case incomplete. * subversion/libsvn_wc/wc-metadata.sql (ACTUAL_NODE): Declare 'older_checksum', 'left_checksum', 'right_checksum' as referencing a pristine text. (NODES): Declare 'checksum' as referencing a pristine text. * subversion/libsvn_wc/entries.c (write_entry): Receive the text-base checksums through a new 'text_base_info' parameter and use them in the NODES table rows. (entries_write_baton, entries_write_new_cb, svn_wc__write_upgraded_entries): Pass the text base info down the call stack to write_entry(). * subversion/libsvn_wc/entries.h (svn_wc__text_base_file_info_t, svn_wc__text_base_info_t): New structs. (svn_wc__write_upgraded_entries): Take a 'text_bases_info' parameter. * subversion/libsvn_wc/upgrade.c (migrate_text_bases): Return the checksums of all the files. Remove the 'dir_relpath' parameter and the code that filled in checksum columns in existing NODES rows. (upgrade_to_wcng): Migrate the text bases before the 'entries', and pass all the text-base checksums from the former to the latter. --This line, and those below, will be ignored-- Index: subversion/libsvn_wc/entries.c =================================================================== --- subversion/libsvn_wc/entries.c (revision 1051073) +++ subversion/libsvn_wc/entries.c (working copy) @@ -1578,6 +1578,7 @@ write_entry(struct write_baton **entry_n apr_int64_t wc_id, apr_int64_t repos_id, const svn_wc_entry_t *entry, + const svn_wc__text_base_info_t *text_base_info, const char *local_relpath, const char *entry_abspath, const svn_wc_entry_t *this_dir, @@ -1851,14 +1852,42 @@ write_entry(struct write_baton **entry_n } } - /* If there is a copied WORKING node file then we don't have the - revert-base checksum for this file. It will get updated - later when we transfer the pristine texts. */ - if (entry->kind == svn_node_dir || (working_node && entry->copied)) + if (entry->kind == svn_node_dir) base_node->checksum = NULL; else - SVN_ERR(svn_checksum_parse_hex(&base_node->checksum, svn_checksum_md5, - entry->checksum, result_pool)); + { + if (text_base_info && text_base_info->revert_base.sha1_checksum) + base_node->checksum = text_base_info->revert_base.sha1_checksum; + else if (text_base_info && text_base_info->normal_base.sha1_checksum) + base_node->checksum = text_base_info->normal_base.sha1_checksum; + else + base_node->checksum = NULL; + + /* The base MD5 checksum is available in the entry, unless there + * is a copied WORKING node. If possible, verify that the entry + * checksum matches the base file that we found. */ +#ifdef SVN_DEBUG + if (! (working_node && entry->copied)) + { + svn_checksum_t *entry_md5_checksum, *found_md5_checksum; + SVN_ERR(svn_checksum_parse_hex(&entry_md5_checksum, svn_checksum_md5, + entry->checksum, scratch_pool)); + if (text_base_info && text_base_info->revert_base.md5_checksum) + found_md5_checksum = text_base_info->revert_base.md5_checksum; + else if (text_base_info && text_base_info->normal_base.md5_checksum) + found_md5_checksum = text_base_info->normal_base.md5_checksum; + else + found_md5_checksum = NULL; + if (entry_md5_checksum && found_md5_checksum) + SVN_ERR_ASSERT(svn_checksum_match(entry_md5_checksum, + found_md5_checksum)); + else + { + /* SVN_ERR_ASSERT(entry->deleted || ...); */ + } + } +#endif + } if (this_dir->repos) { @@ -1960,10 +1989,9 @@ write_entry(struct write_baton **entry_n below_working_node->moved_here = FALSE; below_working_node->moved_to = FALSE; - /* We have a problem, the revert_base information isn't - available in the entry structure. The checksum will get - updated later when we transfer the pristines. */ - below_working_node->checksum = NULL; + /* The revert_base checksum isn't available in the entry structure, + so the caller provides it. */ + below_working_node->checksum = text_base_info->revert_base.sha1_checksum; below_working_node->translated_size = 0; below_working_node->changed_rev = SVN_INVALID_REVNUM; below_working_node->changed_date = 0; @@ -1995,9 +2023,24 @@ write_entry(struct write_baton **entry_n if (entry->kind == svn_node_dir) working_node->checksum = NULL; else - SVN_ERR(svn_checksum_parse_hex(&working_node->checksum, - svn_checksum_md5, - entry->checksum, result_pool)); + { + working_node->checksum = text_base_info->normal_base.sha1_checksum; + + /* If an MD5 checksum is present in the entry, we can verify that + * it matches the MD5 of the base file we found earlier. */ +#ifdef SVN_DEBUG + if (entry->checksum) + { + svn_checksum_t *md5_checksum; + SVN_ERR(svn_checksum_parse_hex(&md5_checksum, svn_checksum_md5, + entry->checksum, result_pool)); + SVN_ERR_ASSERT( + md5_checksum && text_base_info->normal_base.md5_checksum); + SVN_ERR_ASSERT(svn_checksum_match( + md5_checksum, text_base_info->normal_base.md5_checksum)); + } +#endif + } /* All subdirs start of incomplete, and stop being incomplete when the entries file in the subdir is upgraded. */ @@ -2098,6 +2141,7 @@ struct entries_write_baton const char *dir_abspath; const char *new_root_abspath; apr_hash_t *entries; + apr_hash_t *text_bases_info; struct write_baton *parent_node; struct write_baton *dir_node; apr_pool_t *result_pool; @@ -2139,7 +2183,7 @@ entries_write_new_cb(void *baton, /* Write out "this dir" */ SVN_ERR(write_entry(&ewb->dir_node, ewb->parent_node, db, sdb, - ewb->wc_id, ewb->repos_id, this_dir, dir_relpath, + ewb->wc_id, ewb->repos_id, this_dir, NULL, dir_relpath, svn_dirent_join(new_root_abspath, dir_relpath, scratch_pool), this_dir, FALSE, ewb->result_pool, iterpool)); @@ -2150,6 +2194,8 @@ entries_write_new_cb(void *baton, const char *name = svn__apr_hash_index_key(hi); const svn_wc_entry_t *this_entry = svn__apr_hash_index_val(hi); const char *child_abspath, *child_relpath; + svn_wc__text_base_info_t *text_base_info + = apr_hash_get(ewb->text_bases_info, name, APR_HASH_KEY_STRING); svn_pool_clear(iterpool); @@ -2163,7 +2209,7 @@ entries_write_new_cb(void *baton, child_relpath = svn_dirent_skip_ancestor(old_root_abspath, child_abspath); SVN_ERR(write_entry(NULL, ewb->dir_node, db, sdb, ewb->wc_id, ewb->repos_id, - this_entry, child_relpath, + this_entry, text_base_info, child_relpath, svn_dirent_join(new_root_abspath, child_relpath, scratch_pool), this_dir, TRUE, iterpool, iterpool)); @@ -2184,6 +2230,7 @@ svn_wc__write_upgraded_entries(void **di const char *dir_abspath, const char *new_root_abspath, apr_hash_t *entries, + apr_hash_t *text_bases_info, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { @@ -2195,6 +2242,7 @@ svn_wc__write_upgraded_entries(void **di ewb.dir_abspath = dir_abspath; ewb.new_root_abspath = new_root_abspath; ewb.entries = entries; + ewb.text_bases_info = text_bases_info; ewb.parent_node = parent_baton; ewb.result_pool = result_pool; Index: subversion/libsvn_wc/entries.h =================================================================== --- subversion/libsvn_wc/entries.h (revision 1051073) +++ subversion/libsvn_wc/entries.h (working copy) @@ -87,10 +87,30 @@ svn_wc__read_entries_old(apr_hash_t **en apr_pool_t *result_pool, apr_pool_t *scratch_pool); +/* The checksums of one pre-1.7 text-base file. If the text-base file + * exists, both checksums are filled in, otherwise both fields are NULL. */ +typedef struct svn_wc__text_base_file_info_t +{ + svn_checksum_t *sha1_checksum; + svn_checksum_t *md5_checksum; +} svn_wc__text_base_file_info_t; + +/* The text-base checksums of the normal base and/or the revert-base of one + * pre-1.7 versioned text file. */ +typedef struct svn_wc__text_base_info_t +{ + svn_wc__text_base_file_info_t normal_base; + svn_wc__text_base_file_info_t revert_base; +} svn_wc__text_base_info_t; + /* For internal use by upgrade.c to write entries in the wc-ng format. Return in DIR_BATON the baton to be passed as PARENT_BATON when upgrading child directories. Pass a NULL PARENT_BATON when upgrading - the root directory. */ + the root directory. + + TEXT_BASES_INFO is a hash of information about all the text bases found + in this directory's admin area, keyed on (const char *) name of the + versioned file, with (svn_wc__text_base_info_t *) values. */ svn_error_t * svn_wc__write_upgraded_entries(void **dir_baton, void *parent_baton, @@ -101,6 +121,7 @@ svn_wc__write_upgraded_entries(void **di const char *dir_abspath, const char *new_root_abspath, apr_hash_t *entries, + apr_hash_t *text_bases_info, apr_pool_t *result_pool, apr_pool_t *scratch_pool); Index: subversion/libsvn_wc/upgrade.c =================================================================== --- subversion/libsvn_wc/upgrade.c (revision 1051073) +++ subversion/libsvn_wc/upgrade.c (working copy) @@ -958,14 +958,15 @@ remove_suffix(const char *str, const cha DIR_ABSPATH into the pristine store of SDB which is located in directory NEW_WCROOT_ABSPATH. - If DIR_RELPATH is set then any .svn-revert files will trigger an - attempt to update the checksum in a NODES row below the top WORKING - node. */ + Set *TEXT_BASES_INFO to a new hash, allocated in RESULT_POOL, that maps + (const char *) name of the versioned file to (svn_wc__text_base_info_t *) + information about the pristine text. */ static svn_error_t * -migrate_text_bases(const char *dir_abspath, +migrate_text_bases(apr_hash_t **text_bases_info, + const char *dir_abspath, const char *new_wcroot_abspath, - const char *dir_relpath, svn_sqlite__db_t *sdb, + apr_pool_t *result_pool, apr_pool_t *scratch_pool) { apr_hash_t *dirents; @@ -975,6 +976,8 @@ migrate_text_bases(const char *dir_abspa TEXT_BASE_SUBDIR, scratch_pool); + *text_bases_info = apr_hash_make(result_pool); + /* Iterate over the text-base files */ SVN_ERR(svn_io_get_dirents3(&dirents, text_base_dir, TRUE, scratch_pool, scratch_pool)); @@ -982,11 +985,33 @@ migrate_text_bases(const char *dir_abspa hi = apr_hash_next(hi)) { const char *text_base_basename = svn__apr_hash_index_key(hi); - svn_checksum_t *md5_checksum; - svn_checksum_t *sha1_checksum; + const char *versioned_file_name; + svn_boolean_t is_revert_base; + svn_wc__text_base_info_t *info; + svn_wc__text_base_file_info_t *file_info; svn_pool_clear(iterpool); + /* Determine the versioned file name and whether this is a normal base + * or a revert base. */ + versioned_file_name + = remove_suffix(text_base_basename, SVN_WC__REVERT_EXT, result_pool); + if (versioned_file_name) + { + is_revert_base = TRUE; + } + else + { + versioned_file_name + = remove_suffix(text_base_basename, SVN_WC__BASE_EXT, result_pool); + is_revert_base = FALSE; + } + info = apr_hash_get(*text_bases_info, versioned_file_name, + APR_HASH_KEY_STRING); + if (info == NULL) + info = apr_pcalloc(result_pool, sizeof(*info)); + file_info = (is_revert_base ? &info->revert_base : &info->normal_base); + /* Calculate its checksums and copy it to the pristine store */ { const char *pristine_path; @@ -1009,23 +1034,25 @@ migrate_text_bases(const char *dir_abspa complexity. :) */ /* Gather the two checksums. */ - SVN_ERR(svn_io_file_checksum2(&md5_checksum, text_base_path, - svn_checksum_md5, iterpool)); - SVN_ERR(svn_io_file_checksum2(&sha1_checksum, text_base_path, - svn_checksum_sha1, iterpool)); + SVN_ERR(svn_io_file_checksum2(&file_info->md5_checksum, text_base_path, + svn_checksum_md5, result_pool)); + SVN_ERR(svn_io_file_checksum2(&file_info->sha1_checksum, text_base_path, + svn_checksum_sha1, result_pool)); SVN_ERR(svn_io_stat(&finfo, text_base_path, APR_FINFO_SIZE, iterpool)); /* Insert a row into the pristine table. */ SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, STMT_INSERT_PRISTINE)); - SVN_ERR(svn_sqlite__bind_checksum(stmt, 1, sha1_checksum, iterpool)); - SVN_ERR(svn_sqlite__bind_checksum(stmt, 2, md5_checksum, iterpool)); + SVN_ERR(svn_sqlite__bind_checksum(stmt, 1, file_info->sha1_checksum, + iterpool)); + SVN_ERR(svn_sqlite__bind_checksum(stmt, 2, file_info->md5_checksum, + iterpool)); SVN_ERR(svn_sqlite__bind_int64(stmt, 3, finfo.size)); SVN_ERR(svn_sqlite__insert(NULL, stmt)); SVN_ERR(svn_wc__db_pristine_get_future_path(&pristine_path, new_wcroot_abspath, - sha1_checksum, + file_info->sha1_checksum, iterpool, iterpool)); /* Ensure any sharding directories exist. */ @@ -1040,51 +1067,8 @@ migrate_text_bases(const char *dir_abspa iterpool)); } - /* If DIR_RELPATH is set and this text base is a "revert base", then - attempt to update the checksum in a NODES row below the top WORKING - node. */ - if (dir_relpath) - { - const char *name - = remove_suffix(text_base_basename, SVN_WC__REVERT_EXT, iterpool); - - if (name) - { - /* Assumming this revert-base is not an orphan, the - upgrade process will have inserted a NODES row with a - null checksum below the top-level working node. - Update that checksum now. */ - apr_int64_t op_depth = -1, wc_id = 1; - const char *local_relpath = svn_relpath_join(dir_relpath, name, - iterpool); - svn_boolean_t have_row; - svn_sqlite__stmt_t *stmt; - - SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, - STMT_SELECT_NODE_INFO)); - SVN_ERR(svn_sqlite__bindf(stmt, "is", wc_id, local_relpath)); - SVN_ERR(svn_sqlite__step(&have_row, stmt)); - if (have_row) - { - SVN_ERR(svn_sqlite__step(&have_row, stmt)); - if (have_row && svn_sqlite__column_is_null(stmt, 6) - && !strcmp(svn_sqlite__column_text(stmt, 4, NULL), - "file")) - op_depth = svn_sqlite__column_int64(stmt, 0); - } - SVN_ERR(svn_sqlite__reset(stmt)); - if (op_depth != -1) - { - SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, - STMT_UPDATE_CHECKSUM)); - SVN_ERR(svn_sqlite__bindf(stmt, "isi", wc_id, local_relpath, - op_depth)); - SVN_ERR(svn_sqlite__bind_checksum(stmt, 4, sha1_checksum, - iterpool)); - SVN_ERR(svn_sqlite__update(NULL, stmt)); - } - } - } + apr_hash_set(*text_bases_info, versioned_file_name, APR_HASH_KEY_STRING, + info); } svn_pool_destroy(iterpool); @@ -1176,6 +1160,7 @@ upgrade_to_wcng(void **dir_baton, apr_hash_t *entries; svn_wc_entry_t *this_dir; const char *old_wcroot_abspath, *dir_relpath; + apr_hash_t *text_bases_info; /* Don't try to mess with the WC if there are old log files left. */ @@ -1197,18 +1182,23 @@ upgrade_to_wcng(void **dir_baton, * The semantics and storage mechanisms between the two are vastly different, * so it's going to be a bit painful. Here's a plan for the operation: * - * 1) The 'entries' file needs to be moved to the new format. We read it - * using the old-format reader, and then use our compatibility code - * for writing entries to fill out the (new) wc_db state. + * 1) Read the old 'entries' using the old-format reader. + * + * 2) Create the new DB if it hasn't already been created. * - * 2) Convert wcprop to the wc-ng format + * 3) Use our compatibility code for writing entries to fill out the (new) + * DB state. Use the remembered checksums, since an entry has only the + * MD5 not the SHA1 checksum, and in the case of a revert-base doesn't + * even have that. * - * 3) Trash old, unused files and subdirs + * 4) Convert wcprop to the wc-ng format * - * ### (fill in other bits as they are implemented) + * 5) Migrate regular properties to the WC-NG DB. + * + * 6) Trash old, unused files and subdirs. */ - /***** ENTRIES *****/ + /***** ENTRIES - READ *****/ SVN_ERR(svn_wc__read_entries_old(&entries, dir_abspath, scratch_pool, scratch_pool)); @@ -1229,6 +1219,7 @@ upgrade_to_wcng(void **dir_baton, apr_pstrdup(hash_pool, this_dir->uuid)); } + /* Create the new DB if it hasn't already been created. */ if (!data->sdb) { const char *root_adm_abspath; @@ -1262,20 +1253,25 @@ upgrade_to_wcng(void **dir_baton, SVN_ERR(svn_wc__db_wclock_obtain(db, data->root_abspath, 0, FALSE, scratch_pool)); } - + + old_wcroot_abspath = svn_dirent_get_longest_ancestor(dir_abspath, + data->root_abspath, + scratch_pool); + dir_relpath = svn_dirent_skip_ancestor(old_wcroot_abspath, dir_abspath); + + /***** TEXT BASES *****/ + SVN_ERR(migrate_text_bases(&text_bases_info, dir_abspath, data->root_abspath, + data->sdb, scratch_pool, scratch_pool)); + + /***** ENTRIES - WRITE *****/ SVN_ERR(svn_wc__write_upgraded_entries(dir_baton, parent_baton, db, data->sdb, data->repos_id, data->wc_id, dir_abspath, data->root_abspath, - entries, + entries, text_bases_info, result_pool, scratch_pool)); /***** WC PROPS *****/ - - /* Ugh. We don't know precisely where the wcprops are. Ignore them. */ - old_wcroot_abspath = svn_dirent_get_longest_ancestor(dir_abspath, - data->root_abspath, - scratch_pool); - dir_relpath = svn_dirent_skip_ancestor(old_wcroot_abspath, dir_abspath); + /* If we don't know precisely where the wcprops are, ignore them. */ if (old_format != SVN_WC__WCPROPS_LOST) { apr_hash_t *all_wcprops; @@ -1291,9 +1287,6 @@ upgrade_to_wcng(void **dir_baton, all_wcprops, scratch_pool)); } - SVN_ERR(migrate_text_bases(dir_abspath, data->root_abspath, dir_relpath, - data->sdb, scratch_pool)); - /* Upgrade all the properties (including "this dir"). Note: this must come AFTER the entries have been migrated into the Index: subversion/libsvn_wc/wc-metadata.sql =================================================================== --- subversion/libsvn_wc/wc-metadata.sql (revision 1051073) +++ subversion/libsvn_wc/wc-metadata.sql (working copy) @@ -167,9 +167,9 @@ CREATE TABLE ACTUAL_NODE ( conflicts? Why do we need these in a column to refer to the pristine store? Can't we just parse the checksums from conflict_data as well? */ - older_checksum TEXT, - left_checksum TEXT, - right_checksum TEXT, + older_checksum TEXT REFERENCES PRISTINE (checksum), + left_checksum TEXT REFERENCES PRISTINE (checksum), + right_checksum TEXT REFERENCES PRISTINE (checksum), PRIMARY KEY (wc_id, local_relpath) ); @@ -415,7 +415,7 @@ CREATE TABLE NODES ( /* The SHA-1 checksum of the pristine text, if this node is a file and was moved here or copied here, else NULL. */ - checksum TEXT, + checksum TEXT REFERENCES PRISTINE (checksum), /* for kind==symlink, this specifies the target. */ symlink_target TEXT,