adm_ops.c

Greg Stein Fri, 04 May 2012 14:17:17 -0700

On Fri, May 4, 2012 at 9:13 AM,  <cmpil...@apache.org> wrote:
> Author: cmpilato
> Date: Fri May  4 13:13:00 2012
> New Revision: 1333936
>
> URL: http://svn.apache.org/viewvc?rev=1333936&view=rev
> Log:
> Teach libsvn_ra_serf to make use of the server-provided SHA1 checksums
> I introduced in 1.7 (iff they are provided, of course) to avoid
> downloading server content that's already locally available.


Nice!

>...
> +++ subversion/trunk/subversion/include/private/svn_wc_private.h Fri May  4 
> 13:13:00 2012
> @@ -1076,6 +1076,18 @@ svn_wc__node_get_md5_from_sha1(const svn
>                                apr_pool_t *result_pool,
>                                apr_pool_t *scratch_pool);
>
> +/* Like svn_wc_get_pristine_contents2(), but keyed on the
> +   SHA1_CHECKSUM rather than on the local absolute path of the working
> +   file.  WCROOT_ABSPATH is the absolute path of the root of the
> +   working copy in whose pristine database we'll be looking for these
> +   contents.  */
> +svn_error_t *
> +svn_wc__get_pristine_contents_by_checksum(svn_stream_t **contents,
> +                                          svn_wc_context_t *wc_ctx,
> +                                          const char *wcroot_abspath,
> +                                          const svn_checksum_t 
> *sha1_checksum,
> +                                          apr_pool_t *result_pool,
> +                                          apr_pool_t *scratch_pool);

That should be a WRI_ABSPATH, aka "Working copy Root Indicator". Any
path in the working copy is just fine. It doesn't have to be the
wcroot.

(and yes, I saw your and Bert's discussion about choosing this path,
but that is unrelated to *which* path you use from a working copy)

See the bottom of this email for more concerns about this function.

>
>  /* Gets an array of const char *repos_relpaths of descendants of 
> LOCAL_ABSPATH,
>  * which must be the op root of an addition, copy or move. The descendants
>
> Modified: subversion/trunk/subversion/include/svn_ra.h
> URL: 
> http://svn.apache.org/viewvc/subversion/trunk/subversion/include/svn_ra.h?rev=1333936&r1=1333935&r2=1333936&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/include/svn_ra.h (original)
> +++ subversion/trunk/subversion/include/svn_ra.h Fri May  4 13:13:00 2012
> @@ -120,6 +120,16 @@ typedef svn_error_t *(*svn_ra_invalidate
>                                                           const char *name,
>                                                           apr_pool_t *pool);
>
> +/** This is a function type which allows the RA layer to fetch the
> + * cached pristine file contents whose SHA1 checksum is @a
> + * sha1_checksum, if any.  @a *contents will be a read stream
> + * containing those contents if they are found; NULL otherwise.
> + */
> +typedef svn_error_t *(*svn_ra_get_wc_contents_func_t)(void *baton,
> +                                                      svn_stream_t 
> **contents,
> +                                                      svn_checksum_t 
> *sha1_checksum,
> +                                                      apr_pool_t *pool);

I understand this is "pattern", but we could simply declare the
callback in the structure without using a typedef. Maybe time to
start?

And SHA1_CHECKSUM should be const.

>...
> +++ subversion/trunk/subversion/libsvn_client/ra.c Fri May  4 13:13:00 2012
>...
> +/* This implements the `svn_ra_get_wc_contents_func_t' interface. */

Tho I guess this is a reasonable point in support of a typedef.

>...
> @@ -291,6 +320,7 @@ svn_client__open_ra_session_internal(svn
>
>   if (base_dir_abspath)
>     {
> +      const char *wcroot_abspath;
>       svn_error_t *err = svn_wc__node_get_repos_info(NULL, &uuid, ctx->wc_ctx,
>                                                      base_dir_abspath,
>                                                      pool, pool);
> @@ -303,7 +333,13 @@ svn_client__open_ra_session_internal(svn
>           uuid = NULL;
>         }
>       else
> -        SVN_ERR(err);
> +        {
> +          SVN_ERR(err);
> +        }
> +
> +      SVN_ERR(svn_wc__get_wc_root(&wcroot_abspath, ctx->wc_ctx,
> +                                  base_dir_abspath, pool, pool));
> +      cb->wcroot_abspath = wcroot_abspath;

That get_wc_root() can be avoided by using a WRI_ABSPATH.

>...
> +++ subversion/trunk/subversion/libsvn_ra_serf/update.c Fri May  4 13:13:00 
> 2012
> @@ -237,6 +237,10 @@ typedef struct report_info_t
>   /* Checksum for close_file */
>   const char *final_checksum;
>
> +  /* Stream containing file contents already cached in the working
> +     copy (which may be used to avoid a GET request for the same). */
> +  svn_stream_t *cached_contents;
> +
>   /* temporary property for this file which is currently being parsed
>    * It will eventually be stored in our parent directory's property hash.
>    */
> @@ -1229,6 +1233,179 @@ handle_propchange_only(report_info_t *in
>   return SVN_NO_ERROR;
>  }
>
> +/* "Fetch" a file whose contents were made available via the
> +   get_wc_contents() callback (as opposed to requiring a GET to the
> +   server), and feed the information through the associated update
> +   editor. */
> +static svn_error_t *
> +local_fetch(report_info_t *info)
> +{
> +  const svn_delta_editor_t *update_editor = info->dir->update_editor;
> +  svn_txdelta_window_t delta_window = { 0 };
> +  svn_txdelta_op_t delta_op;
> +  svn_string_t window_data;
> +  char read_buf[SVN__STREAM_CHUNK_SIZE + 1];
> +
> +  SVN_ERR(open_dir(info->dir));
> +  info->editor_pool = svn_pool_create(info->dir->dir_baton_pool);
> +
> +  /* Expand our full name now if we haven't done so yet. */
> +  if (!info->name)
> +    {
> +      info->name = svn_relpath_join(info->dir->name, info->base_name,
> +                                    info->editor_pool);
> +    }
> +
> +  if (SVN_IS_VALID_REVNUM(info->base_rev))
> +    {
> +      SVN_ERR(update_editor->open_file(info->name,
> +                                       info->dir->dir_baton,
> +                                       info->base_rev,
> +                                       info->editor_pool,
> +                                       &info->file_baton));
> +    }
> +  else
> +    {
> +      SVN_ERR(update_editor->add_file(info->name,
> +                                      info->dir->dir_baton,
> +                                      info->copyfrom_path,
> +                                      info->copyfrom_rev,
> +                                      info->editor_pool,
> +                                      &info->file_baton));
> +    }

There is a lot of code duplication between the above and handle_fetch().

> +
> +  SVN_ERR(update_editor->apply_textdelta(info->file_baton,
> +                                         info->base_checksum,
> +                                         info->editor_pool,
> +                                         &info->textdelta,
> +                                         &info->textdelta_baton));
> +
> +  while (1)
> +    {
> +      apr_size_t read_len = SVN__STREAM_CHUNK_SIZE;
> +
> +      SVN_ERR(svn_stream_read(info->cached_contents, read_buf, &read_len));
> +      if (read_len == 0)
> +        break;
> +
> +      window_data.data = read_buf;
> +      window_data.len = read_len;
> +
> +      delta_op.action_code = svn_txdelta_new;
> +      delta_op.offset = 0;
> +      delta_op.length = read_len;
> +
> +      delta_window.tview_len = read_len;
> +      delta_window.num_ops = 1;
> +      delta_window.ops = &delta_op;
> +      delta_window.new_data = &window_data;
> +
> +      SVN_ERR(info->textdelta(&delta_window, info->textdelta_baton));
> +
> +      if (read_len < SVN__STREAM_CHUNK_SIZE)
> +        break;
> +    }
> +
> +  SVN_ERR(info->textdelta(NULL, info->textdelta_baton));

Replace the while() thru the NULL window to:

  SVN_ERR(svn_txdelta_send_stream(info->cached_contents,
info->textdelta, info->textdelta_baton, NULL, scratch_pool));

(I guess info->pool is the scratch_pool)

Even better: please take the code above and replace the contents of
svn_txdelta_send_stream(). That will benefit all callers.

> +
> +  SVN_ERR(svn_stream_close(info->cached_contents));
> +  info->cached_contents = NULL;
> +
> +  if (info->lock_token)
> +    check_lock(info);
> +
> +  /* set all of the properties we received */
> +  SVN_ERR(svn_ra_serf__walk_all_props(info->props,
> +                                      info->base_name,
> +                                      info->base_rev,
> +                                      set_file_props, info,
> +                                      info->pool));
> +
> +  SVN_ERR(svn_ra_serf__walk_all_props(info->dir->removed_props,
> +                                      info->base_name,
> +                                      info->base_rev,
> +                                      remove_file_props, info,
> +                                      info->pool));
> +  if (info->fetch_props)
> +    {
> +      SVN_ERR(svn_ra_serf__walk_all_props(info->props,
> +                                          info->url,
> +                                          info->target_rev,
> +                                          set_file_props, info,
> +                                          info->pool));
> +    }
> +
> +  SVN_ERR(info->dir->update_editor->close_file(info->file_baton,
> +                                               info->final_checksum,
> +                                               info->pool));
> +
> +  /* We're done with our pools. */
> +  svn_pool_destroy(info->editor_pool);
> +  svn_pool_destroy(info->pool);

And again duplication with the lock and property handling, with handle_fetch().

It would have been nice to see these factored out.

> +
> +  return SVN_NO_ERROR;
> +}
> +
> +/* Implements svn_ra_serf__response_handler_t */
> +static svn_error_t *
> +handle_local_fetch(serf_request_t *request,
> +                   serf_bucket_t *response,
> +                   void *handler_baton,
> +                   apr_pool_t *pool)
> +{
> +  report_fetch_t *fetch_ctx = handler_baton;
> +  apr_status_t status;
> +  serf_status_line sl;
> +  svn_error_t *err;
> +  const char *data;
> +  apr_size_t len;
> +
> +  /* If the error code wasn't 200, something went wrong. Don't use the 
> returned
> +     data as its probably an error message. Just bail out instead. */
> +  status = serf_bucket_response_status(response, &sl);
> +  if (SERF_BUCKET_READ_ERROR(status))
> +    {
> +      return svn_error_wrap_apr(status, NULL);
> +    }
> +  if (sl.code != 200)
> +    {
> +      err = svn_error_createf(SVN_ERR_RA_DAV_REQUEST_FAILED, NULL,
> +                              _("GET request failed: %d %s"),
> +                              sl.code, sl.reason);

You sent a HEAD request.

>...

Note: we need to make the callback smarter. In your implementation, it
opens the pristine contents and *holds it open* until the HEAD
completes. If ra_serf queues 3000 HEAD requests... you now require
3000 file handles. Not a good idea.

Also note: Mark already ran into the file handle problem.

The solution is a custom stream.
svn_wc__get_pristine_contents_by_checksum() should use
svn_wc__db_pristine_check() to see if a pristine is present. If it
*does*, then it returns a stream with a custom read function. On the
first read, it will use svn_wc__db_pristine_read() to open the
pristine content stream (and the underlying file handle). Then it just
delegates reads to that inner stream.

As a recommendation, I would suggest creating a "lazy open" stream
which takes a callback that opens the stream upon first read. We are
going to need this lazy-open stream during commits (the RA layer may
choose not to deliver contents to the server, so we shouldn't bother
opening them).

Cheers,
-g

Re: svn commit: r1333936 - in /subversion/trunk/subversion: include/private/svn_wc_private.h include/svn_ra.h libsvn_client/ra.c libsvn_ra_serf/update.c libsvn_wc/adm_ops.c

Reply via email to