On Thu, Jan 13, 2022 at 05:05:57PM +0100, Claudio Jeker wrote:
> This diff adds a new cache subdir called "valid". This is the place where
> all verified and good files are stored after a run. It makes -n work a lot
> better since -n will now only look at what's inside "valid" and ignore
> "rsync" and "rrdp".
> 
> The trust anchors are still stored in "ta" even if valid.
> The rsync repo will only hold temporary files and be empty otherwise.
> The rrdp repo still may contain some superfluous files that people
> included in their snapshots. Not keeping them could result in extra
> snapshot downloads since delta updates could refer to these files.
> Since there is a valid repo, rrdp no longer needs an additional temp
> directory for its sync.
> 
> With this rsync will use the --compare-dest feature and use the valid
> cache as a base.
> 
> The file cleanup at the end got more complex. There is now
> repo_move_valid() and repo_cleanup_rrdp() to do two initial steps of
> cleanup before the tree traversal starts. The fts function now uses
> some fts features to find a) empty directories and b) superfluous files.
> 
> I think the code changes outside of repo.c should be straight forward.
> 
> To the rpki connoisseur, this currently only implements a simple newest
> file wins startegy. The handling of .mft and their serial number will
> follow once this is in.

I have made a first pass over this. I will need to sleep on this and
think through the repo.c changes in more detail with a fresh head.

Couple leaks and a few stylistic remarks noted inline.

> -- 
> :wq Claudio
> 
> ? obj
> Index: extern.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
> retrieving revision 1.103
> diff -u -p -r1.103 extern.h
> --- extern.h  13 Jan 2022 13:46:03 -0000      1.103
> +++ extern.h  13 Jan 2022 15:50:40 -0000
> @@ -341,7 +341,7 @@ enum publish_type {
>  struct entity {
>       TAILQ_ENTRY(entity) entries;
>       char            *path;          /* path relative to repository */
> -     char            *file;          /* filename */
> +     char            *file;          /* filename or valid repo path */
>       unsigned char   *data;          /* optional data blob */
>       size_t           datasz;        /* length of optional data blob */
>       unsigned int     repoid;        /* repository identifier */
> @@ -380,6 +380,7 @@ struct stats {
>       size_t   vrps; /* total number of vrps */
>       size_t   uniqs; /* number of unique vrps */
>       size_t   del_files; /* number of files removed in cleanup */
> +     size_t   extra_files; /* number of superfluous files */
>       size_t   del_dirs; /* number of directories removed in cleanup */
>       size_t   brks; /* number of BGPsec Router Key (BRK) certificates */
>       struct timeval  elapsed_time;
> @@ -506,7 +507,7 @@ void               rrdp_clear(unsigned int);
>  void          rrdp_save_state(unsigned int, struct rrdp_session *);
>  int           rrdp_handle_file(unsigned int, enum publish_type, char *,
>                   char *, size_t, char *, size_t);
> -char         *repo_basedir(const struct repo *);
> +char         *repo_basedir(const struct repo *, int);
>  unsigned int  repo_id(const struct repo *);
>  const char   *repo_uri(const struct repo *);
>  struct repo  *ta_lookup(int, struct tal *);
> @@ -520,7 +521,8 @@ void               rsync_finish(unsigned int, int);
>  void          http_finish(unsigned int, enum http_result, const char *);
>  void          rrdp_finish(unsigned int, int);
>  
> -void          rsync_fetch(unsigned int, const char *, const char *);
> +void          rsync_fetch(unsigned int, const char *, const char *,
> +                 const char *);
>  void          http_fetch(unsigned int, const char *, const char *, int);
>  void          rrdp_fetch(unsigned int, const char *, const char *,
>                   struct rrdp_session *);
> Index: main.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
> retrieving revision 1.175
> diff -u -p -r1.175 main.c
> --- main.c    13 Jan 2022 13:18:41 -0000      1.175
> +++ main.c    13 Jan 2022 15:50:40 -0000
> @@ -151,17 +151,18 @@ entity_write_repo(struct repo *rp)
>       struct ibuf *b;
>       enum rtype type = RTYPE_REPO;
>       unsigned int repoid;
> -     char *path;
> +     char *path, *altpath;
>       int talid = 0;
>  
>       repoid = repo_id(rp);
> -     path = repo_basedir(rp);
> +     path = repo_basedir(rp, 0);
> +     altpath = repo_basedir(rp, 1);
>       b = io_new_buffer();
>       io_simple_buffer(b, &type, sizeof(type));
>       io_simple_buffer(b, &repoid, sizeof(repoid));
>       io_simple_buffer(b, &talid, sizeof(talid));
>       io_str_buffer(b, path);
> -     io_str_buffer(b, NULL);
> +     io_str_buffer(b, altpath);
>       io_buf_buffer(b, NULL, 0);
>       io_close_buffer(&procq, b);
>       free(path);

Don't leak altpath

        free(altpath);

> @@ -254,14 +255,15 @@ rrdp_fetch(unsigned int id, const char *
>   * Request a repository sync via rsync URI to directory local.
>   */
>  void
> -rsync_fetch(unsigned int id, const char *uri, const char *local)
> +rsync_fetch(unsigned int id, const char *uri, const char *local,
> +    const char *base)
>  {
>       struct ibuf     *b;
>  
>       b = io_new_buffer();
>       io_simple_buffer(b, &id, sizeof(id));
>       io_str_buffer(b, local);
> -     io_str_buffer(b, NULL);
> +     io_str_buffer(b, base);
>       io_str_buffer(b, uri);
>       io_close_buffer(&rsyncq, b);
>  }
> @@ -1246,8 +1248,8 @@ main(int argc, char *argv[])
>       logx("Certificate revocation lists: %zu", stats.crls);
>       logx("Ghostbuster records: %zu", stats.gbrs);
>       logx("Repositories: %zu", stats.repos);
> -     logx("Cleanup: removed %zu files, %zu directories",
> -         stats.del_files, stats.del_dirs);
> +     logx("Cleanup: removed %zu files, %zu directories, %zu superfluous",
> +         stats.del_files, stats.del_dirs, stats.extra_files);
>       logx("VRP Entries: %zu (%zu unique)", stats.vrps, stats.uniqs);
>  
>       /* Memory cleanup. */
> Index: output-json.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/rpki-client/output-json.c,v
> retrieving revision 1.22
> diff -u -p -r1.22 output-json.c
> --- output-json.c     4 Nov 2021 11:32:55 -0000       1.22
> +++ output-json.c     13 Jan 2022 15:50:40 -0000
> @@ -78,6 +78,7 @@ outputheader_json(FILE *out, struct stat
>           "\t\t\"vrps\": %zu,\n"
>           "\t\t\"uniquevrps\": %zu,\n"
>           "\t\t\"cachedir_del_files\": %zu,\n"
> +         "\t\t\"cachedir_superfluous_files\": %zu,\n"
>           "\t\t\"cachedir_del_dirs\": %zu\n"
>           "\t},\n\n",
>           st->mfts, st->mfts_fail, st->mfts_stale,
> @@ -85,7 +86,7 @@ outputheader_json(FILE *out, struct stat
>           st->gbrs,
>           st->repos,
>           st->vrps, st->uniqs,
> -         st->del_files, st->del_dirs) < 0)
> +         st->del_files, st->extra_files, st->del_dirs) < 0)
>               return -1;
>       return 0;
>  }
> Index: parser.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/rpki-client/parser.c,v
> retrieving revision 1.36
> diff -u -p -r1.36 parser.c
> --- parser.c  13 Jan 2022 14:58:21 -0000      1.36
> +++ parser.c  13 Jan 2022 15:50:40 -0000
> @@ -50,6 +50,7 @@ static struct crl_tree       crlt = RB_INITIA
>  struct parse_repo {
>       RB_ENTRY(parse_repo)     entry;
>       char                    *path;
> +     char                    *validpath;
>       unsigned int             id;
>  };
>  
> @@ -72,20 +73,77 @@ repo_get(unsigned int id)
>  }
>  
>  static void
> -repo_add(unsigned int id, char *path)
> +repo_add(unsigned int id, char *path, char *validpath)
>  {
>       struct parse_repo *rp;
>  
> -     if ((rp = malloc(sizeof(*rp))) == NULL)
> +     if ((rp = calloc(1, sizeof(*rp))) == NULL)
>               err(1, NULL);
>       rp->id = id;
> -     if ((rp->path = strdup(path)) == NULL)
> -             err(1, NULL);
> +     if (path)

The rest of the function tests against NULL, so I would do the same here, too

        if (path != NULL)

> +             if ((rp->path = strdup(path)) == NULL)
> +                     err(1, NULL);
> +     if (validpath)

ditto

> +             if ((rp->validpath = strdup(validpath)) == NULL)
> +                     err(1, NULL);
>  
>       if (RB_INSERT(repo_tree, &repos, rp) != NULL)
>               errx(1, "repository already added: id %d, %s", id, path);
>  }
>  
> +/*
> + * Build access path to file based on repoid, path and file values.
> + * If wantalt == 1 the function can return NULL, if wantalt == 0 it
> + * can not fail.
> + */
> +static char *
> +parse_filepath(unsigned int repoid, const char *path, const char *file,
> +    int wantalt)
> +{
> +     struct parse_repo       *rp;
> +     char                    *fn, *repopath;
> +
> +     /* build file path based on repoid, entity path and filename */
> +     rp = repo_get(repoid);
> +     if (rp == NULL) {
> +             /* no repo so no alternative path. */
> +             if (wantalt == 1)

I would treat wantalt as a Boolean

                if (wantalt)

> +                     return NULL;
> +
> +             if (path == NULL) {
> +                     if ((fn = strdup(file)) == NULL)
> +                                     err(1, NULL);

wrong indent

> +             } else {
> +                     if (asprintf(&fn, "%s/%s", path, file) == -1)
> +                             err(1, NULL);
> +             }
> +     } else {
> +             if (wantalt == 1)

again
                if (wantalt)

> +                     repopath = rp->validpath;
> +             else if (rp->path == NULL) 

trailing whitespace

> +                     repopath = rp->validpath;
> +             else
> +                     repopath = rp->path;

Consider dropping the 'else if':

                if (wantalt || rp->path == NULL)
                        repopath = rp->validpath;
                else
                        repopath = rp->path;

> +
> +             if (repopath == NULL)
> +                     return NULL;
> +
> +             if (path == NULL) {
> +                     if (asprintf(&fn, "%s/%s", repopath, file) == -1)
> +                             err(1, NULL);
> +             } else {
> +                     if (asprintf(&fn, "%s/%s/%s", repopath, path,
> +                         file) == -1)
> +                             err(1, NULL);
> +             }
> +     }
> +     return fn;
> +}
> +
> +/*
> + * Callback for X509_verify_cert() to handle critical extensions in old
> + * LibreSSL libraries or OpenSSL libs without RFC3779 support.
> + */
>  static int
>  verify_cb(int ok, X509_STORE_CTX *store_ctx)
>  {
> @@ -229,13 +287,8 @@ int
>  mft_check(const char *fn, struct mft *p)
>  {
>       size_t  i;
> -     int     fd, rc = 1;
> -     char    *cp, *h, *path = NULL;
> -
> -     /* Check hash of file now, but first build path for it */
> -     cp = strrchr(fn, '/');
> -     assert(cp != NULL);
> -     assert(cp - fn < INT_MAX);
> +     int     fd, try, rc = 1;
> +     char    *h, *path;
>  
>       for (i = 0; i < p->filesz; i++) {
>               const struct mftfile *m = &p->files[i];
> @@ -246,15 +299,24 @@ mft_check(const char *fn, struct mft *p)
>                       free(h);
>                       continue;
>               }
> -             if (asprintf(&path, "%.*s/%s", (int)(cp - fn), fn,
> -                 m->file) == -1)
> -                     err(1, NULL);
> -             fd = open(path, O_RDONLY);
> +
> +             fd = -1;
> +             try = 0;
> +             path = NULL;
> +             do {
> +                     free(path);
> +                     if ((path = parse_filepath(p->repoid, p->path, m->file,
> +                         try++)) == NULL)
> +                             break;
> +                     fd = open(path, O_RDONLY);
> +             } while (fd == -1 && try < 2);
> +
> +             free(path);
> +
>               if (!valid_filehash(fd, m->hash, sizeof(m->hash))) {
>                       warnx("%s: bad message digest for %s", fn, m->file);
>                       rc = 0;
>               }
> -             free(path);
>       }
>  
>       return rc;
> @@ -630,37 +692,6 @@ build_crls(const struct crl *crl, STACK_
>               err(1, "sk_X509_CRL_push");
>  }
>  
> -static char *
> -parse_filepath(struct entity *entp)
> -{
> -     struct parse_repo       *rp;
> -     char                    *file;
> -
> -     /* build file path based on repoid, entity path and filename */
> -     rp = repo_get(entp->repoid);
> -     if (rp == NULL) {
> -             if (entp->path == NULL) {
> -                     if ((file = strdup(entp->file)) == NULL)
> -                                     err(1, NULL);
> -             } else {
> -                     if (asprintf(&file, "%s/%s", entp->path,
> -                         entp->file) == -1)
> -                             err(1, NULL);
> -             }
> -     } else {
> -             if (entp->path == NULL) {
> -                     if (asprintf(&file, "%s/%s", rp->path,
> -                         entp->file) == -1)
> -                             err(1, NULL);
> -             } else {
> -                     if (asprintf(&file, "%s/%s/%s", rp->path,
> -                         entp->path, entp->file) == -1)
> -                             err(1, NULL);
> -             }
> -     }
> -     return file;
> -}
> -
>  static void
>  parse_entity(struct entityq *q, struct msgbuf *msgq)
>  {
> @@ -673,27 +704,41 @@ parse_entity(struct entityq *q, struct m
>       unsigned char   *f;
>       size_t           flen;
>       char            *file;
> -     int              c;
> +     int              c, try;
>  
>       while ((entp = TAILQ_FIRST(q)) != NULL) {
>               TAILQ_REMOVE(q, entp, entries);
>  
>               /* handle RTYPE_REPO first */
>               if (entp->type == RTYPE_REPO) {
> -                     repo_add(entp->repoid, entp->path);
> +                     repo_add(entp->repoid, entp->path, entp->file);
>                       entity_free(entp);
>                       continue;
>               }
>  
>               f = NULL;
> -             file = parse_filepath(entp);
> +             file = parse_filepath(entp->repoid, entp->path, entp->file, 0);
>               if (entp->type != RTYPE_TAL) {
> -                     f = load_file(file, &flen);
> -                     if (f == NULL)
> -                             warn("%s", file);
> +                     for (try = 0; try <= 1; try++) {
> +                             f = load_file(file, &flen);
> +                             if (f != NULL)
> +                                     break;

Maybe this for loop could be factored into a helper function? I find the control
flow tricky to follow. In a function you could use early returns and perhaps
duplicate the three lines loading the file.

If you want to keep a loop, consider turning it into a do-while loop to match
what mft_check() and rrdp_handle_file() do more closely.

> +
> +                             if (errno == ENOENT && try == 0) {
> +                                     char *fn;
> +                                     fn = parse_filepath(entp->repoid,
> +                                         entp->path, entp->file, 1);
> +                                     if (fn != NULL) {
> +                                             free(file);
> +                                             file = fn;
> +                                             continue;
> +                                     }
> +                             }
> +                             warn("parse file %s", file);
> +                     }
>               }
>  
> -             /* pass back at least type and filename */
> +             /* pass back at least type, repoid and filename */
>               b = io_new_buffer();
>               io_simple_buffer(b, &entp->type, sizeof(entp->type));
>               io_str_buffer(b, file);
> Index: repo.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/rpki-client/repo.c,v
> retrieving revision 1.24
> diff -u -p -r1.24 repo.c
> --- repo.c    13 Jan 2022 14:57:02 -0000      1.24
> +++ repo.c    13 Jan 2022 15:50:40 -0000
> @@ -58,8 +58,6 @@ struct rrdprepo {
>       SLIST_ENTRY(rrdprepo)    entry;
>       char                    *notifyuri;
>       char                    *basedir;
> -     char                    *temp;
> -     struct filepath_tree     added;
>       struct filepath_tree     deleted;
>       unsigned int             id;
>       enum repo_state          state;
> @@ -92,6 +90,7 @@ struct repo {
>       SLIST_ENTRY(repo)        entry;
>       char                    *repouri;
>       char                    *notifyuri;
> +     char                    *basedir;

repo_free() leaks both notifyuri and basedir.

>       const struct rrdprepo   *rrdp;
>       const struct rsyncrepo  *rsync;
>       const struct tarepo     *ta;
> @@ -105,7 +104,7 @@ static SLIST_HEAD(, repo) repos = SLIST_
>  /* counter for unique repo id */
>  unsigned int         repoid;
>  
> -static struct rsyncrepo      *rsync_get(const char *, int);
> +static struct rsyncrepo      *rsync_get(const char *, const char *);
>  static void           remove_contents(char *);
>  
>  /*
> @@ -168,26 +167,6 @@ filepath_exists(struct filepath_tree *tr
>  }
>  
>  /*
> - * Return true if a filepath entry exists that starts with path.
> - */
> -static int
> -filepath_dir_exists(struct filepath_tree *tree, char *path)
> -{
> -     struct filepath needle;
> -     struct filepath *res;
> -
> -     needle.file = path;
> -     res = RB_NFIND(filepath_tree, tree, &needle);
> -     while (res != NULL && strstr(res->file, path) == res->file) {
> -             /* make sure that filepath actually is in that path */
> -             if (res->file[strlen(path)] == '/')
> -                     return 1;
> -             res = RB_NEXT(filepath_tree, tree, res);
> -     }
> -     return 0;
> -}
> -
> -/*
>   * Remove entry from tree and free it.
>   */
>  static void
> @@ -284,7 +263,8 @@ repo_state(struct repo *rp)
>               return rp->rsync->state;
>       if (rp->rrdp)
>               return rp->rrdp->state;
> -     errx(1, "%s: bad repo", rp->repouri);
> +     /* No backend so sync is by definition done. */
> +     return REPO_DONE;
>  }
>  
>  /*
> @@ -305,7 +285,8 @@ repo_done(const void *vp, int ok)
>                       if (!ok) {
>                               /* try to fall back to rsync */
>                               rp->rrdp = NULL;
> -                             rp->rsync = rsync_get(rp->repouri, 0);
> +                             rp->rsync = rsync_get(rp->repouri,
> +                                 rp->basedir);
>                               /* need to check if it was already loaded */
>                               if (repo_state(rp) != REPO_LOADING)
>                                       entityq_flush(&rp->queue, rp);
> @@ -362,7 +343,7 @@ ta_fetch(struct tarepo *tr)
>                * Create destination location.
>                * Build up the tree to this point.
>                */
> -             rsync_fetch(tr->id, tr->uri[tr->uriidx], tr->basedir);
> +             rsync_fetch(tr->id, tr->uri[tr->uriidx], tr->basedir, NULL);
>       } else {
>               int fd;
>  
> @@ -387,9 +368,6 @@ ta_get(struct tal *tal)
>  
>       /* no need to look for possible other repo */
>  
> -     if (tal->urisz == 0)
> -             errx(1, "TAL %s has no URI", tal->descr);
> -
>       if ((tr = calloc(1, sizeof(*tr))) == NULL)
>               err(1, NULL);
>       tr->id = ++repoid;
> @@ -405,17 +383,7 @@ ta_get(struct tal *tal)
>       tal->urisz = 0;
>       tal->uri = NULL;
>  
> -     if (noop) {
> -             tr->state = REPO_DONE;
> -             logx("ta/%s: using cache", tr->descr);
> -             repo_done(tr, 0);
> -     } else {
> -             /* try to create base directory */
> -             if (mkpath(tr->basedir) == -1)
> -                     warn("mkpath %s", tr->basedir);
> -
> -             ta_fetch(tr);
> -     }
> +     ta_fetch(tr);
>  
>       return tr;
>  }
> @@ -447,7 +415,7 @@ ta_free(void)
>  }
>  
>  static struct rsyncrepo *
> -rsync_get(const char *uri, int nofetch)
> +rsync_get(const char *uri, const char *validdir)
>  {
>       struct rsyncrepo *rr;
>       char *repo;
> @@ -470,22 +438,16 @@ rsync_get(const char *uri, int nofetch)
>       rr->repouri = repo;
>       rr->basedir = repo_dir(repo, "rsync", 0);
>  
> -     if (noop || nofetch) {
> -             rr->state = REPO_DONE;
> -             logx("%s: using cache", rr->basedir);
> -             repo_done(rr, 0);
> -     } else {
> -             /* create base directory */
> -             if (mkpath(rr->basedir) == -1) {
> -                     warn("mkpath %s", rr->basedir);
> -                     rsync_finish(rr->id, 0);
> -                     return rr;
> -             }
> -
> -             logx("%s: pulling from %s", rr->basedir, rr->repouri);
> -             rsync_fetch(rr->id, rr->repouri, rr->basedir);
> +     /* create base directory */
> +     if (mkpath(rr->basedir) == -1) {
> +             warn("mkpath %s", rr->basedir);
> +             rsync_finish(rr->id, 0);
> +             return rr;
>       }
>  
> +     logx("%s: pulling from %s", rr->basedir, rr->repouri);
> +     rsync_fetch(rr->id, rr->repouri, rr->basedir, validdir);
> +
>       return rr;
>  }
>  
> @@ -513,25 +475,19 @@ rsync_free(void)
>       }
>  }
>  
> -static int rrdprepo_fetch(struct rrdprepo *);
> -
>  /*
>   * Build local file name base on the URI and the rrdprepo info.
>   */
>  static char *
> -rrdp_filename(const struct rrdprepo *rr, const char *uri, int temp)
> +rrdp_filename(const struct rrdprepo *rr, const char *uri, int valid)
>  {
>       char *nfile;
> -     char *dir = rr->basedir;
> -
> -     if (temp)
> -             dir = rr->temp;
> -
> -     if (!valid_uri(uri, strlen(uri), "rsync://")) {
> -             warnx("%s: bad URI %s", rr->basedir, uri);
> -             return NULL;
> -     }
> +     const char *dir = rr->basedir;
>  
> +     if (valid)
> +             dir = "valid";
> +     if (!valid_uri(uri, strlen(uri), "rsync://"))
> +             errx(1, "%s: bad URI %s", rr->basedir, uri);
>       uri += strlen("rsync://");      /* skip proto */
>       if (asprintf(&nfile, "%s/%s", dir, uri) == -1)
>               err(1, NULL);
> @@ -575,28 +531,49 @@ rrdp_free(void)
>  
>               free(rr->notifyuri);
>               free(rr->basedir);
> -             free(rr->temp);
>  
> -             filepath_free(&rr->added);
>               filepath_free(&rr->deleted);
>  
>               free(rr);
>       }
>  }
>  
> -static struct rrdprepo *
> -rrdp_basedir(const char *dir)
> +/*
> + * Check if a directory is an active rrdp repository.
> + * Returns 1 if found else 0.
> + */
> +static int
> +rrdp_is_active(const char *dir)
>  {
>       struct rrdprepo *rr;
>  
>       SLIST_FOREACH(rr, &rrdprepos, entry)
>               if (strcmp(dir, rr->basedir) == 0) {
>                       if (rr->state == REPO_FAILED)
> -                             return NULL;
> -                     return rr;
> +                             return 0;
> +                     return 1;

perhaps save a few lines:

                if (strcmp(dir, rr->basedir) == 0)
                        return rr->state != REPO_FAILED;


>               }
>  
> -     return NULL;
> +     return 0;
> +}
> +
> +/*
> + * Check if the URI is actually covered by one of the repositories
> + * that depend on this RRDP repository.
> + * Returns 1 if the URI is valid, 0 if no repouri matches the URI.
> + */
> +static int
> +rrdp_uri_valid(struct rrdprepo *rr, const char *uri)
> +{
> +     struct repo *rp;
> +
> +     SLIST_FOREACH(rp, &repos, entry) {
> +             if (rp->rrdp != rr)
> +                     continue;
> +             if (strncmp(uri, rp->repouri, strlen(rp->repouri)) == 0)
> +                     return 1;
> +     }
> +     return 0;
>  }
>  
>  /*
> @@ -740,8 +717,9 @@ fail:
>  }
>  
>  static struct rrdprepo *
> -rrdp_get(const char *uri, int nofetch)
> +rrdp_get(const char *uri)
>  {
> +     struct rrdp_session state = { 0 };
>       struct rrdprepo *rr;
>  
>       SLIST_FOREACH(rr, &rrdprepos, entry)
> @@ -761,28 +739,24 @@ rrdp_get(const char *uri, int nofetch)
>               err(1, NULL);
>       rr->basedir = repo_dir(uri, "rrdp", 1);
>  
> -     RB_INIT(&rr->added);
>       RB_INIT(&rr->deleted);
>  
> -     if (noop || nofetch) {
> -             rr->state = REPO_DONE;
> -             logx("%s: using cache", rr->notifyuri);
> -             repo_done(rr, 0);
> -     } else {
> -             /* create base directory */
> -             if (mkpath(rr->basedir) == -1) {
> -                     warn("mkpath %s", rr->basedir);
> -                     rrdp_finish(rr->id, 0);
> -                     return rr;
> -             }
> -             if (rrdprepo_fetch(rr) == -1) {
> -                     rrdp_finish(rr->id, 0);
> -                     return rr;
> -             }
>  
> -             logx("%s: pulling from %s", rr->notifyuri, "network");
> +     /* create base directory */
> +     if (mkpath(rr->basedir) == -1) {
> +             warn("mkpath %s", rr->basedir);
> +             rrdp_finish(rr->id, 0);
> +             return rr;
>       }
>  
> +     /* parse state and start the sync */
> +     rrdp_parse_state(rr, &state);
> +     rrdp_fetch(rr->id, rr->notifyuri, rr->notifyuri, &state);
> +     free(state.session_id);
> +     free(state.last_mod);
> +
> +     logx("%s: pulling from %s", rr->notifyuri, "network");
> +
>       return rr;
>  }
>  
> @@ -800,7 +774,6 @@ rrdp_clear(unsigned int id)
>  
>       /* remove rrdp repository contents */
>       remove_contents(rr->basedir);
> -     remove_contents(rr->temp);
>  }
>  
>  /*
> @@ -816,8 +789,8 @@ rrdp_handle_file(unsigned int id, enum p
>       struct rrdprepo *rr;
>       struct filepath *fp;
>       ssize_t s;
> -     char *fn;
> -     int fd = -1;
> +     char *fn = NULL;
> +     int fd = -1, try = 0;
>  
>       rr = rrdp_find(id);
>       if (rr == NULL)
> @@ -825,21 +798,20 @@ rrdp_handle_file(unsigned int id, enum p
>       if (rr->state == REPO_FAILED)
>               return -1;
>  
> +     /* check hash of original file for updates and deletes */
>       if (pt == PUB_UPD || pt == PUB_DEL) {
>               if (filepath_exists(&rr->deleted, uri)) {
>                       warnx("%s: already deleted", uri);
>                       return 0;
>               }
> -             fp = filepath_find(&rr->added, uri);
> -             if (fp == NULL) {
> -                     if ((fn = rrdp_filename(rr, uri, 0)) == NULL)
> -                             return 0;
> -             } else {
> -                     filepath_put(&rr->added, fp);
> -                     if ((fn = rrdp_filename(rr, uri, 1)) == NULL)
> +             /* try to open file first in rrdp then in valid repo */
> +             do {
> +                     free(fn);
> +                     if ((fn = rrdp_filename(rr, uri, try++)) == NULL)
>                               return 0;
> -             }
> -             fd = open(fn, O_RDONLY);
> +                     fd = open(fn, O_RDONLY);
> +             } while (fd == -1 && try < 2);
> +
>               if (!valid_filehash(fd, hash, hlen)) {
>                       warnx("%s: bad file digest for %s", rr->notifyuri, fn);
>                       free(fn);
> @@ -848,6 +820,7 @@ rrdp_handle_file(unsigned int id, enum p
>               free(fn);
>       }
>  
> +     /* write new content or mark uri as deleted. */
>       if (pt == PUB_DEL) {
>               filepath_add(&rr->deleted, uri);
>       } else {
> @@ -855,8 +828,8 @@ rrdp_handle_file(unsigned int id, enum p
>               if (fp != NULL)
>                       filepath_put(&rr->deleted, fp);
>  
> -             /* add new file to temp dir */
> -             if ((fn = rrdp_filename(rr, uri, 1)) == NULL)
> +             /* add new file to rrdp dir */
> +             if ((fn = rrdp_filename(rr, uri, 0)) == NULL)
>                       return 0;
>  
>               if (repo_mkpath(fn) == -1)
> @@ -876,7 +849,6 @@ rrdp_handle_file(unsigned int id, enum p
>               if ((size_t)s != dlen)  /* impossible */
>                       errx(1, "short write %s", fn);
>               free(fn);
> -             filepath_add(&rr->added, uri);
>       }
>  
>       return 1;
> @@ -890,66 +862,6 @@ fail:
>  }
>  
>  /*
> - * Initiate a RRDP sync, create the required temporary directory and
> - * parse a possible state file before sending the request to the RRDP 
> process.
> - */
> -static int
> -rrdprepo_fetch(struct rrdprepo *rr)
> -{
> -     struct rrdp_session state = { 0 };
> -
> -     if (asprintf(&rr->temp, "%s.XXXXXXXX", rr->basedir) == -1)
> -             err(1, NULL);
> -     if (mkdtemp(rr->temp) == NULL) {
> -             warn("mkdtemp %s", rr->temp);
> -             return -1;
> -     }
> -
> -     rrdp_parse_state(rr, &state);
> -     rrdp_fetch(rr->id, rr->notifyuri, rr->notifyuri, &state);
> -
> -     free(state.session_id);
> -     free(state.last_mod);
> -
> -     return 0;
> -}
> -
> -static int
> -rrdp_merge_repo(struct rrdprepo *rr)
> -{
> -     struct filepath *fp, *nfp;
> -     char *fn, *rfn;
> -
> -     RB_FOREACH_SAFE(fp, filepath_tree, &rr->added, nfp) {
> -             fn = rrdp_filename(rr, fp->file, 1);
> -             rfn = rrdp_filename(rr, fp->file, 0);
> -
> -             if (fn == NULL || rfn == NULL)
> -                     errx(1, "bad filepath");        /* should not happen */
> -
> -             if (repo_mkpath(rfn) == -1) {
> -                     goto fail;
> -             }
> -
> -             if (rename(fn, rfn) == -1) {
> -                     warn("rename %s", rfn);
> -                     goto fail;
> -             }
> -
> -             free(rfn);
> -             free(fn);
> -             filepath_put(&rr->added, fp);
> -     }
> -
> -     return 1;
> -
> -fail:
> -     free(rfn);
> -     free(fn);
> -     return 0;
> -}
> -
> -/*
>   * RSYNC sync finished, either with or without success.
>   */
>  void
> @@ -993,6 +905,8 @@ rsync_finish(unsigned int id, int ok)
>                   rr->basedir);
>               stats.rsync_fails++;
>               rr->state = REPO_FAILED;
> +             /* clear rsync repo since it failed */
> +             remove_contents(rr->basedir);
>       }
>  
>       repo_done(rr, ok);
> @@ -1013,19 +927,20 @@ rrdp_finish(unsigned int id, int ok)
>       if (rr->state != REPO_LOADING)
>               return;
>  
> -     if (ok && rrdp_merge_repo(rr)) {
> +     if (ok) {
>               logx("%s: loaded from network", rr->notifyuri);
>               stats.rrdp_repos++;
>               rr->state = REPO_DONE;
> -             repo_done(rr, ok);
>       } else {
>               logx("%s: load from network failed, fallback to rsync",
>                   rr->notifyuri);
>               stats.rrdp_fails++;
>               rr->state = REPO_FAILED;
> -             remove_contents(rr->temp);
> -             repo_done(rr, 0);
> +             /* clear the RRDP repo since it failed */
> +             remove_contents(rr->basedir);
>       }
> +
> +     repo_done(rr, ok);
>  }
>  
>  /*
> @@ -1082,6 +997,9 @@ ta_lookup(int id, struct tal *tal)
>  {
>       struct repo     *rp;
>  
> +     if (tal->urisz == 0)
> +             errx(1, "TAL %s has no URI", tal->descr);
> +
>       /* Look up in repository table. (Lookup should actually fail here) */
>       SLIST_FOREACH(rp, &repos, entry) {
>               if (strcmp(rp->repouri, tal->descr) == 0)
> @@ -1089,11 +1007,24 @@ ta_lookup(int id, struct tal *tal)
>       }
>  
>       rp = repo_alloc(id);
> +     rp->basedir = repo_dir(tal->descr, "ta", 0);
>       if ((rp->repouri = strdup(tal->descr)) == NULL)
>               err(1, NULL);
>  
> +     /* try to create base directory */
> +     if (mkpath(rp->basedir) == -1)
> +             warn("mkpath %s", rp->basedir);
> +
> +     /* check if sync disabled ... */
> +     if (noop) {
> +             logx("ta/%s: using cache", rp->repouri);
> +             entityq_flush(&rp->queue, rp);
> +             return rp;
> +     }
> +
>       rp->ta = ta_get(tal);
>  
> +     /* need to check if it was already loaded */
>       if (repo_state(rp) != REPO_LOADING)
>               entityq_flush(&rp->queue, rp);
>  
> @@ -1130,6 +1061,7 @@ repo_lookup(int talid, const char *uri, 
>       }
>  
>       rp = repo_alloc(talid);
> +     rp->basedir = repo_dir(repouri, "valid", 0);
>       rp->repouri = repouri;
>       if (notify != NULL)
>               if ((rp->notifyuri = strdup(notify)) == NULL)
> @@ -1141,12 +1073,24 @@ repo_lookup(int talid, const char *uri, 
>               nofetch = 1;
>       }
>  
> -     /* try RRDP first if available */
> +     /* try to create base directory */
> +     if (mkpath(rp->basedir) == -1)
> +             warn("mkpath %s", rp->basedir);
> +
> +     /* check if sync disabled ... */
> +     if (noop || nofetch) {
> +             logx("%s: using cache", rp->basedir);
> +             entityq_flush(&rp->queue, rp);
> +             return rp;
> +     }
> +
> +     /* ... else try RRDP first if available then rsync */
>       if (notify != NULL)
> -             rp->rrdp = rrdp_get(notify, nofetch);
> +             rp->rrdp = rrdp_get(notify);
>       if (rp->rrdp == NULL)
> -             rp->rsync = rsync_get(uri, nofetch);
> +             rp->rsync = rsync_get(uri, rp->basedir);
>  
> +     /* need to check if it was already loaded */
>       if (repo_state(rp) != REPO_LOADING)
>               entityq_flush(&rp->queue, rp);
>  
> @@ -1169,24 +1113,29 @@ repo_byid(unsigned int id)
>  }
>  
>  /*
> - * Return the repository base directory.
> + * Return the repository base or alternate directory.
>   * Returned string must be freed by caller.
>   */
>  char *
> -repo_basedir(const struct repo *rp)
> +repo_basedir(const struct repo *rp, int wantvalid)
>  {
> -     char *path;
> +     char *path = NULL;
>  
> -     if (rp->ta) {
> -             if ((path = strdup(rp->ta->basedir)) == NULL)
> -                     err(1, NULL);
> -     } else if (rp->rsync) {
> -             if ((path = strdup(rp->rsync->basedir)) == NULL)
> +     if (wantvalid == 0) {

To match the style used elsewhere:

        if (!wantvalid) {
                if (rp->ta != NULL) {

etc


> +             if (rp->ta) {
> +                     if ((path = strdup(rp->ta->basedir)) == NULL)
> +                             err(1, NULL);
> +             } else if (rp->rsync) {
> +                     if ((path = strdup(rp->rsync->basedir)) == NULL)
> +                             err(1, NULL);
> +             } else if (rp->rrdp) {
> +                     path = rrdp_filename(rp->rrdp, rp->repouri, 0);
> +             } else
> +                     path = NULL;    /* only valid repo available */
> +     } else if (rp->basedir != NULL) {
> +             if ((path = strdup(rp->basedir)) == NULL)
>                       err(1, NULL);
> -     } else if (rp->rrdp) {
> -             path = rrdp_filename(rp->rrdp, rp->repouri, 0);
> -     } else
> -             errx(1, "%s: bad repo", rp->repouri);
> +     }
>  
>       return path;
>  }
> @@ -1289,64 +1238,173 @@ add_to_del(char **del, size_t *dsz, char
>       return del;
>  }
>  
> +/*
> + * Delayed delete of files from RRDP. Since RRDP has no security built-in
> + * this code needs to check if this RRDP repository is actually allowed to
> + * remove the file referenced by the URI.
> + */
>  static char **
> -repo_rrdp_cleanup(struct filepath_tree *tree, struct rrdprepo *rr,
> -    char **del, size_t *delsz)
> +repo_cleanup_rrdp(struct filepath_tree *tree, char **del, size_t *delsz)
>  {
> +     struct rrdprepo *rr;
>       struct filepath *fp, *nfp;
>       char *fn;
> -
> -     RB_FOREACH_SAFE(fp, filepath_tree, &rr->deleted, nfp) {
> -             fn = rrdp_filename(rr, fp->file, 0);
> -             /* temp dir will be cleaned up by repo_cleanup() */
> -
> -             if (fn == NULL)
> -                     errx(1, "bad filepath");        /* should not happen */
> -
> -             if (!filepath_exists(tree, fn))
> +     
> +     SLIST_FOREACH(rr, &rrdprepos, entry) {
> +             RB_FOREACH_SAFE(fp, filepath_tree, &rr->deleted, nfp) {
> +                     if (!rrdp_uri_valid(rr, fp->file)) {
> +                             warnx("%s: external URI %s", rr->notifyuri,
> +                                 fp->file);
> +                             filepath_put(&rr->deleted, fp);
> +                             continue;
> +                     }
> +                     /* try to remove file from rrdp repo ... */
> +                     fn = rrdp_filename(rr, fp->file, 0);
>                       del = add_to_del(del, delsz, fn);
> -             else
> -                     warnx("%s: referenced file supposed to be deleted", fn);
> +                     free(fn);
>  
> -             free(fn);
> -             filepath_put(&rr->deleted, fp);
> +                     /* ... and from the valid repository if unused. */
> +                     fn = rrdp_filename(rr, fp->file, 1);
> +                     if (!filepath_exists(tree, fn))
> +                             del = add_to_del(del, delsz, fn);
> +                     else
> +                             warnx("%s: referenced file supposed to be "
> +                                 "deleted", fn);
> +
> +                     free(fn);
> +                     filepath_put(&rr->deleted, fp);
> +             }
>       }
>  
>       return del;
>  }
>  
> +/*
> + * All files in tree are valid and should be moved to the valid repository
> + * if not already there. Rename the files to the new path and readd the
> + * filepath entry with the new path if successful.
> + */
> +static void
> +repo_move_valid(struct filepath_tree *tree)
> +{
> +     struct filepath *fp, *nfp;
> +     size_t rsyncsz = strlen("rsync/");
> +     size_t rrdpsz = strlen("rrdp/");
> +     char *fn, *base;
> +
> +     RB_FOREACH_SAFE(fp, filepath_tree, tree, nfp) {
> +             if (strncmp(fp->file, "rsync/", rsyncsz) != 0 &&
> +                 strncmp(fp->file, "rrdp/", rrdpsz) != 0)
> +                     continue; /* not a temporary file path */
> +
> +             if (strncmp(fp->file, "rsync/", rsyncsz) == 0) {
> +                     if (asprintf(&fn, "valid/%s", fp->file + rsyncsz) == -1)
> +                             err(1, NULL);
> +             } else {
> +                     base = strchr(fp->file + rrdpsz, '/');
> +                     assert(base != NULL);
> +                     if (asprintf(&fn, "valid/%s", base + 1) == -1)
> +                             err(1, NULL);
> +             }
> +
> +             if (repo_mkpath(fn) == -1) {
> +                     free(fn);
> +                     continue;
> +             }
> +
> +             if (rename(fp->file, fn) == -1) {
> +                     warn("rename %s", fn);
> +                     free(fn);
> +                     continue;
> +             }
> +
> +             /* switch filepath node to new path */
> +             RB_REMOVE(filepath_tree, tree, fp);
> +             free(fp->file);
> +             fp->file = fn;
> +             if (RB_INSERT(filepath_tree, tree, fp) != NULL)
> +                     errx(1, "both possibilities of file present");
> +     }
> +}
> +
> +#define      BASE_DIR        (void *)0x62617365
> +#define      RSYNC_DIR       (void *)0x73796e63
> +#define      RRDP_DIR        (void *)0x52524450
> +
>  void
>  repo_cleanup(struct filepath_tree *tree)
>  {
>       size_t i, cnt, delsz = 0, dirsz = 0;
>       char **del = NULL, **dir = NULL;
> -     char *argv[4] = { "ta", "rsync", "rrdp", NULL };
> -     struct rrdprepo *rr;
> +     char *argv[5] = { "ta", "rsync", "rrdp", "valid", NULL };
>       FTS *fts;
>       FTSENT *e;
>  
> +     /* first move temp files which have been used to valid dir */
> +     repo_move_valid(tree);
> +     /* then delete files requested by rrdp */ 
> +     del = repo_cleanup_rrdp(tree, del, &delsz);
> +
>       if ((fts = fts_open(argv, FTS_PHYSICAL | FTS_NOSTAT, NULL)) == NULL)
>               err(1, "fts_open");
>       errno = 0;
>       while ((e = fts_read(fts)) != NULL) {
>               switch (e->fts_info) {
>               case FTS_NSOK:
> -                     if (!filepath_exists(tree, e->fts_path))
> +                     /* handle rrdp .state files explicitly */
> +                     if (e->fts_parent->fts_pointer == RRDP_DIR &&
> +                         e->fts_level == 2 &&
> +                         strcmp(e->fts_name, ".state") == 0) {
> +                             e->fts_parent->fts_number++;
> +                     } else if (filepath_exists(tree, e->fts_path)) {
> +                             e->fts_parent->fts_number++;
> +                     } else if (e->fts_parent->fts_pointer == RRDP_DIR) {
> +                             /* can't delete these extra files */
> +                             e->fts_parent->fts_number++;
> +                             stats.extra_files++;
> +                             if (verbose > 1)
> +                                     logx("superfluous %s", e->fts_path);
> +                     } else {
> +                             if (e->fts_parent->fts_pointer == RSYNC_DIR) {
> +                                     /* no need to keep rsync files */
> +                                     stats.extra_files++;
> +                                     if (verbose > 1)
> +                                             logx("superfluous %s",
> +                                                 e->fts_path);
> +                             }
>                               del = add_to_del(del, &delsz,
>                                   e->fts_path);
> +                     }
>                       break;
>               case FTS_D:
> -                     /* special cleanup for rrdp directories */
> -                     if ((rr = rrdp_basedir(e->fts_path)) != NULL) {
> -                             del = repo_rrdp_cleanup(tree, rr, del, &delsz);
> -                             if (fts_set(fts, e, FTS_SKIP) == -1)
> -                                     err(1, "fts_set");
> +                     if (e->fts_level == FTS_ROOTLEVEL) {
> +                             if (strcmp("rsync", e->fts_path) == 0)
> +                                     e->fts_pointer = RSYNC_DIR;
> +                             else if (strcmp("rrdp", e->fts_path) == 0)
> +                                     e->fts_pointer = RRDP_DIR;
> +                             else
> +                                     e->fts_pointer = BASE_DIR;
> +                     } else
> +                             e->fts_pointer = e->fts_parent->fts_pointer;
> +
> +                     /*
> +                      * special handling for rrdp directories,
> +                      * clear them if they are not used anymore but
> +                      * only if rrdp is active.
> +                      */
> +                     if (e->fts_pointer == RRDP_DIR && !noop && rrdpon &&
> +                         e->fts_level == 1) {
> +                             if (!rrdp_is_active(e->fts_path))
> +                                     e->fts_pointer = NULL;
>                       }
>                       break;
>               case FTS_DP:
> -                     if (!filepath_dir_exists(tree, e->fts_path))
> -                             dir = add_to_del(dir, &dirsz,
> -                                 e->fts_path);
> +                     if (e->fts_level == FTS_ROOTLEVEL)
> +                             break;
> +                     if (e->fts_number == 0)
> +                             dir = add_to_del(dir, &dirsz, e->fts_path);
> +
> +                     e->fts_parent->fts_number += e->fts_number;
>                       break;
>               case FTS_SL:
>               case FTS_SLNONE:
> @@ -1356,8 +1414,7 @@ repo_cleanup(struct filepath_tree *tree)
>               case FTS_NS:
>               case FTS_ERR:
>                       if (e->fts_errno == ENOENT &&
> -                         (strcmp(e->fts_path, "rsync") == 0 ||
> -                         strcmp(e->fts_path, "rrdp") == 0))
> +                         e->fts_level == FTS_ROOTLEVEL)
>                               continue;
>                       warnx("fts_read %s: %s", e->fts_path,
>                           strerror(e->fts_errno));
> @@ -1388,7 +1445,7 @@ repo_cleanup(struct filepath_tree *tree)
>               free(del[i]);
>       }
>       free(del);
> -     stats.del_files = cnt;
> +     stats.del_files += cnt;
>  
>       cnt = 0;
>       for (i = 0; i < dirsz; i++) {
> @@ -1399,7 +1456,7 @@ repo_cleanup(struct filepath_tree *tree)
>               free(dir[i]);
>       }
>       free(dir);
> -     stats.del_dirs = cnt;
> +     stats.del_dirs += cnt;
>  }
>  
>  void
> @@ -1418,6 +1475,9 @@ repo_free(void)
>       rsync_free();
>  }
>  
> +/*
> + * Remove all files and directories under base but do not remove base itself.
> + */
>  static void
>  remove_contents(char *base)
>  {
> 

Reply via email to