Hi,

this patch attempts to speed up svn_repos_authz_check_access, esp. when it
is called
repeatedly during the same HTTP request (or on the same connection).
Subversion issues
many HTTP requests that result only in a single call to
svn_repos_authz_check_access
(i.e. just for the path given in the request itself). Others may call
svn_repos_authz_check_access
lots of times, e.g "svn log" calls it for every modified path in every
revision in the
requested range of revisions.

This patch reduces the cumulative time for svn_repos_authz_check_access
(when called repeteadly in the same connection) for more than 50%
(e.g. running the attached test program on a large body of paths).

The observations behind the patch are:

(1) The functions like authz_get_path_access  in libsvn_repos/authz.c that
do the actual work all use

  svn_config_enumerate2(..., path, authz_parse_line, baton, ...)

to compute allow/deny information for the given path and user (implicit in
baton).
The result is computed over and over again, even if the same path and user
are specified than in a previous call.

(2) In "real life", i.e. in the Apache server, user has always the same
value for
the same request (even the same connection).

The patch augments the svn_authz_t struct wih a field "cache" which
is a apr_hash_t, mapping paths to already known allow/deny information.
(This cache is obviously correct only for the same value of user, so we
store that in another field "cached_user"; if the value of user changes,
we simply throw away the existing cache).
We only store a "path" in the cache if it has a [path] section in the
actual access control file, so the cache cannot grow larger than
the apr_hash_t used to store that.

The patch then replaces all of the above calls to svn_config_enumerate2
with a wrapper function, that first checks for a cached value.
Otherwise it will call svn_config_enumerate2 and cache the result.

It also changes the internal functions
- authz_get_path_access
- authz_get_any_access
- authz_get_tree_access
to take a svn_authz_t* as the first parameter (instead of svn_config_t*)
so that they have access to both the svn_config_t and the cache.

The patch needs to #include libsvn_subr/config_impl.h
in order to gain access to svn_config_t.pool:
the cache (apr_hash_t itself, keys and values) must be allocated from
the same pool as svn_config_t so that they have the same lifespan.

Cheers, Roderich

Attachment: libsvn_repos_authz.patch
Description: Binary data

/*
 * Usage: test-authz-cache [-c][-n][-s] ACCESS_FILE USER
 *
 * Load the ACCESS_FILE and run test for USER reading paths from stdin
 *          -c  cache authz information (default is not to cache)
 *          -n  don't lookup authz, just read the paths (for timing)
 *          -s  show cache contents at program end
 *
 * Run svn_repos_authz_check_access on a list of paths read from stdin
 * using an access file and user given as arguments.
 *
 * Stdin should contain one path (with a leading slash) perl line,
 * optionally separating revisions by a line starting with "-"
 * (this may affect pool handling). One way to create such list is:
 *
 *   svn log --verbose --xml ... | \
 *     sed -n -e 's|^<logentry.*|-|p' -e 's|^ *action=".">\(.*\)</path>|\1|p'
 *
 * Linking: -lsvn_repos -lsvn_subr -lapr-N
 */

#include "svn_pools.h"
#include "svn_repos.h"
#include "svn_cmdline.h"

/* FIXME must keep these consistent with subversion/libsvn_repos/authz.c */
extern svn_boolean_t authz_use_cache;       /* FIXME devel only */

struct svn_authz_t
{
  svn_config_t *cfg;
  const char *cached_user;
  apr_hash_t *cache;
};

struct access_cache_t
{
  svn_repos_authz_access_t allow;
  svn_repos_authz_access_t deny;
};


static svn_boolean_t 
display_authz_cache(const char *path, void *baton, apr_pool_t *pool)
{
  struct access_cache_t *cached = apr_hash_get((apr_hash_t*)baton, path, APR_HASH_KEY_STRING);

  if (!cached)
    printf("[%s] not cached\n", path);
  else
    printf("[%s] allow=%d deny=%d\n", path, cached->allow, cached->deny);

  return TRUE;
}

static void
usage(int argc, const char **argv)
{
  printf("Usage: %s [-c][-n][-s] ACCESS_FILE USER\n\n", argv[0]);
  printf("Load the ACCESS_FILE and run test for USER reading paths from stdin\n"
         "  -c  cache authz information\n"
         "  -n  don't lookup authz, just read the paths (for timing)\n"
         "  -s  show cache contents at program end\n");
  exit(2);
}

int
main(int argc, const char **argv)
{
  apr_pool_t *global_pool, *pool;
  svn_error_t *err;
  struct svn_authz_t *authz;
  const char *access_file;
  const char *user;
  char path[1024];
  svn_boolean_t use_cache = FALSE;
  svn_boolean_t show_cache = FALSE;
  svn_boolean_t no_run = FALSE;
  int optind = 0;
  int npaths = 0;
  int nrevs = 0;
  int granted = 0;
  int denied = 0;

  while (++optind < argc)
    {
      if (strcmp(argv[optind], "-c") == 0)
        { use_cache = TRUE; continue; }
      if (strcmp(argv[optind], "-n") == 0)
        { no_run = TRUE; continue; }
      if (strcmp(argv[optind], "-s") == 0)
        { show_cache = TRUE; continue; }
      break;
    }
  if (argc - optind != 2)
    usage(argc, argv);

  access_file = argv[optind++];
  user = argv[optind++];

  /* Initialize the app.  Send all error messages to 'stderr'.  */
  if (svn_cmdline_init(argv[0], stderr) != EXIT_SUCCESS)
    exit(2);

  global_pool = svn_pool_create(NULL);

  /* Read the access file and validate it. */
  if (err = svn_repos_authz_read(&authz, access_file, TRUE, global_pool))
    {
      svn_handle_error2(err, stderr, FALSE, "test-monty authz_read: ");
      exit(1);
    }
  authz_use_cache = use_cache;

  pool = svn_pool_create(global_pool);          /* create per-rev sub pool */

  while (fgets(path, sizeof(path), stdin))
  {
      char *endp = path + strlen(path) - 1;
      svn_boolean_t access_granted;
      if (*endp == '\n') *endp-- = '\0';        /* chomp */

      if (path[0] == '-')                       /* separator line */
      {
          nrevs++;
          svn_pool_clear(pool);                 /* clear sub pool */
          continue;
      }

      npaths++;
      if (no_run) continue;

      if (err = svn_repos_authz_check_access(authz, "", path, user,
                             svn_authz_read, &access_granted, pool))
        {
          svn_handle_error2(err, stderr, FALSE, "test-monty authz_check_access: ");
          exit(1);
        }

      if (access_granted) granted++;
      else                denied++;
  }


  svn_pool_destroy(global_pool);

  printf("\nprocessed %d revs, %d paths\n", nrevs, npaths);

  if (!no_run)
    {
      printf("%s: %d granted, %d denied\n", 
             use_cache ? "using cache": "NOT using cache", granted, denied);

      /* show cached info */
      if (use_cache && show_cache)
        {
          printf("\ncached sections:\n");
          svn_config_enumerate_sections2(authz->cfg, display_authz_cache, 
                                         authz->cache, global_pool);
        }
    }

  exit(0);
}

Reply via email to