On Thu, Jan 13, 2022 at 05:05:57PM +0100, Claudio Jeker wrote:
> This diff adds a new cache subdir called "valid". This is the place where
> all verified and good files are stored after a run. It makes -n work a lot
> better since -n will now only look at what's inside "valid" and ignore
> "rsync" and "rrdp".
>
> The trust anchors are still stored in "ta" even if valid.
> The rsync repo will only hold temporary files and be empty otherwise.
> The rrdp repo still may contain some superfluous files that people
> included in their snapshots. Not keeping them could result in extra
> snapshot downloads since delta updates could refer to these files.
> Since there is a valid repo, rrdp no longer needs an additional temp
> directory for its sync.
>
> With this rsync will use the --compare-dest feature and use the valid
> cache as a base.
>
> The file cleanup at the end got more complex. There is now
> repo_move_valid() and repo_cleanup_rrdp() to do two initial steps of
> cleanup before the tree traversal starts. The fts function now uses
> some fts features to find a) empty directories and b) superfluous files.
>
> I think the code changes outside of repo.c should be straight forward.
>
> To the rpki connoisseur, this currently only implements a simple newest
> file wins startegy. The handling of .mft and their serial number will
> follow once this is in.
I have made a first pass over this. I will need to sleep on this and
think through the repo.c changes in more detail with a fresh head.
Couple leaks and a few stylistic remarks noted inline.
> --
> :wq Claudio
>
> ? obj
> Index: extern.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
> retrieving revision 1.103
> diff -u -p -r1.103 extern.h
> --- extern.h 13 Jan 2022 13:46:03 -0000 1.103
> +++ extern.h 13 Jan 2022 15:50:40 -0000
> @@ -341,7 +341,7 @@ enum publish_type {
> struct entity {
> TAILQ_ENTRY(entity) entries;
> char *path; /* path relative to repository */
> - char *file; /* filename */
> + char *file; /* filename or valid repo path */
> unsigned char *data; /* optional data blob */
> size_t datasz; /* length of optional data blob */
> unsigned int repoid; /* repository identifier */
> @@ -380,6 +380,7 @@ struct stats {
> size_t vrps; /* total number of vrps */
> size_t uniqs; /* number of unique vrps */
> size_t del_files; /* number of files removed in cleanup */
> + size_t extra_files; /* number of superfluous files */
> size_t del_dirs; /* number of directories removed in cleanup */
> size_t brks; /* number of BGPsec Router Key (BRK) certificates */
> struct timeval elapsed_time;
> @@ -506,7 +507,7 @@ void rrdp_clear(unsigned int);
> void rrdp_save_state(unsigned int, struct rrdp_session *);
> int rrdp_handle_file(unsigned int, enum publish_type, char *,
> char *, size_t, char *, size_t);
> -char *repo_basedir(const struct repo *);
> +char *repo_basedir(const struct repo *, int);
> unsigned int repo_id(const struct repo *);
> const char *repo_uri(const struct repo *);
> struct repo *ta_lookup(int, struct tal *);
> @@ -520,7 +521,8 @@ void rsync_finish(unsigned int, int);
> void http_finish(unsigned int, enum http_result, const char *);
> void rrdp_finish(unsigned int, int);
>
> -void rsync_fetch(unsigned int, const char *, const char *);
> +void rsync_fetch(unsigned int, const char *, const char *,
> + const char *);
> void http_fetch(unsigned int, const char *, const char *, int);
> void rrdp_fetch(unsigned int, const char *, const char *,
> struct rrdp_session *);
> Index: main.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
> retrieving revision 1.175
> diff -u -p -r1.175 main.c
> --- main.c 13 Jan 2022 13:18:41 -0000 1.175
> +++ main.c 13 Jan 2022 15:50:40 -0000
> @@ -151,17 +151,18 @@ entity_write_repo(struct repo *rp)
> struct ibuf *b;
> enum rtype type = RTYPE_REPO;
> unsigned int repoid;
> - char *path;
> + char *path, *altpath;
> int talid = 0;
>
> repoid = repo_id(rp);
> - path = repo_basedir(rp);
> + path = repo_basedir(rp, 0);
> + altpath = repo_basedir(rp, 1);
> b = io_new_buffer();
> io_simple_buffer(b, &type, sizeof(type));
> io_simple_buffer(b, &repoid, sizeof(repoid));
> io_simple_buffer(b, &talid, sizeof(talid));
> io_str_buffer(b, path);
> - io_str_buffer(b, NULL);
> + io_str_buffer(b, altpath);
> io_buf_buffer(b, NULL, 0);
> io_close_buffer(&procq, b);
> free(path);
Don't leak altpath
free(altpath);
> @@ -254,14 +255,15 @@ rrdp_fetch(unsigned int id, const char *
> * Request a repository sync via rsync URI to directory local.
> */
> void
> -rsync_fetch(unsigned int id, const char *uri, const char *local)
> +rsync_fetch(unsigned int id, const char *uri, const char *local,
> + const char *base)
> {
> struct ibuf *b;
>
> b = io_new_buffer();
> io_simple_buffer(b, &id, sizeof(id));
> io_str_buffer(b, local);
> - io_str_buffer(b, NULL);
> + io_str_buffer(b, base);
> io_str_buffer(b, uri);
> io_close_buffer(&rsyncq, b);
> }
> @@ -1246,8 +1248,8 @@ main(int argc, char *argv[])
> logx("Certificate revocation lists: %zu", stats.crls);
> logx("Ghostbuster records: %zu", stats.gbrs);
> logx("Repositories: %zu", stats.repos);
> - logx("Cleanup: removed %zu files, %zu directories",
> - stats.del_files, stats.del_dirs);
> + logx("Cleanup: removed %zu files, %zu directories, %zu superfluous",
> + stats.del_files, stats.del_dirs, stats.extra_files);
> logx("VRP Entries: %zu (%zu unique)", stats.vrps, stats.uniqs);
>
> /* Memory cleanup. */
> Index: output-json.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/rpki-client/output-json.c,v
> retrieving revision 1.22
> diff -u -p -r1.22 output-json.c
> --- output-json.c 4 Nov 2021 11:32:55 -0000 1.22
> +++ output-json.c 13 Jan 2022 15:50:40 -0000
> @@ -78,6 +78,7 @@ outputheader_json(FILE *out, struct stat
> "\t\t\"vrps\": %zu,\n"
> "\t\t\"uniquevrps\": %zu,\n"
> "\t\t\"cachedir_del_files\": %zu,\n"
> + "\t\t\"cachedir_superfluous_files\": %zu,\n"
> "\t\t\"cachedir_del_dirs\": %zu\n"
> "\t},\n\n",
> st->mfts, st->mfts_fail, st->mfts_stale,
> @@ -85,7 +86,7 @@ outputheader_json(FILE *out, struct stat
> st->gbrs,
> st->repos,
> st->vrps, st->uniqs,
> - st->del_files, st->del_dirs) < 0)
> + st->del_files, st->extra_files, st->del_dirs) < 0)
> return -1;
> return 0;
> }
> Index: parser.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/rpki-client/parser.c,v
> retrieving revision 1.36
> diff -u -p -r1.36 parser.c
> --- parser.c 13 Jan 2022 14:58:21 -0000 1.36
> +++ parser.c 13 Jan 2022 15:50:40 -0000
> @@ -50,6 +50,7 @@ static struct crl_tree crlt = RB_INITIA
> struct parse_repo {
> RB_ENTRY(parse_repo) entry;
> char *path;
> + char *validpath;
> unsigned int id;
> };
>
> @@ -72,20 +73,77 @@ repo_get(unsigned int id)
> }
>
> static void
> -repo_add(unsigned int id, char *path)
> +repo_add(unsigned int id, char *path, char *validpath)
> {
> struct parse_repo *rp;
>
> - if ((rp = malloc(sizeof(*rp))) == NULL)
> + if ((rp = calloc(1, sizeof(*rp))) == NULL)
> err(1, NULL);
> rp->id = id;
> - if ((rp->path = strdup(path)) == NULL)
> - err(1, NULL);
> + if (path)
The rest of the function tests against NULL, so I would do the same here, too
if (path != NULL)
> + if ((rp->path = strdup(path)) == NULL)
> + err(1, NULL);
> + if (validpath)
ditto
> + if ((rp->validpath = strdup(validpath)) == NULL)
> + err(1, NULL);
>
> if (RB_INSERT(repo_tree, &repos, rp) != NULL)
> errx(1, "repository already added: id %d, %s", id, path);
> }
>
> +/*
> + * Build access path to file based on repoid, path and file values.
> + * If wantalt == 1 the function can return NULL, if wantalt == 0 it
> + * can not fail.
> + */
> +static char *
> +parse_filepath(unsigned int repoid, const char *path, const char *file,
> + int wantalt)
> +{
> + struct parse_repo *rp;
> + char *fn, *repopath;
> +
> + /* build file path based on repoid, entity path and filename */
> + rp = repo_get(repoid);
> + if (rp == NULL) {
> + /* no repo so no alternative path. */
> + if (wantalt == 1)
I would treat wantalt as a Boolean
if (wantalt)
> + return NULL;
> +
> + if (path == NULL) {
> + if ((fn = strdup(file)) == NULL)
> + err(1, NULL);
wrong indent
> + } else {
> + if (asprintf(&fn, "%s/%s", path, file) == -1)
> + err(1, NULL);
> + }
> + } else {
> + if (wantalt == 1)
again
if (wantalt)
> + repopath = rp->validpath;
> + else if (rp->path == NULL)
trailing whitespace
> + repopath = rp->validpath;
> + else
> + repopath = rp->path;
Consider dropping the 'else if':
if (wantalt || rp->path == NULL)
repopath = rp->validpath;
else
repopath = rp->path;
> +
> + if (repopath == NULL)
> + return NULL;
> +
> + if (path == NULL) {
> + if (asprintf(&fn, "%s/%s", repopath, file) == -1)
> + err(1, NULL);
> + } else {
> + if (asprintf(&fn, "%s/%s/%s", repopath, path,
> + file) == -1)
> + err(1, NULL);
> + }
> + }
> + return fn;
> +}
> +
> +/*
> + * Callback for X509_verify_cert() to handle critical extensions in old
> + * LibreSSL libraries or OpenSSL libs without RFC3779 support.
> + */
> static int
> verify_cb(int ok, X509_STORE_CTX *store_ctx)
> {
> @@ -229,13 +287,8 @@ int
> mft_check(const char *fn, struct mft *p)
> {
> size_t i;
> - int fd, rc = 1;
> - char *cp, *h, *path = NULL;
> -
> - /* Check hash of file now, but first build path for it */
> - cp = strrchr(fn, '/');
> - assert(cp != NULL);
> - assert(cp - fn < INT_MAX);
> + int fd, try, rc = 1;
> + char *h, *path;
>
> for (i = 0; i < p->filesz; i++) {
> const struct mftfile *m = &p->files[i];
> @@ -246,15 +299,24 @@ mft_check(const char *fn, struct mft *p)
> free(h);
> continue;
> }
> - if (asprintf(&path, "%.*s/%s", (int)(cp - fn), fn,
> - m->file) == -1)
> - err(1, NULL);
> - fd = open(path, O_RDONLY);
> +
> + fd = -1;
> + try = 0;
> + path = NULL;
> + do {
> + free(path);
> + if ((path = parse_filepath(p->repoid, p->path, m->file,
> + try++)) == NULL)
> + break;
> + fd = open(path, O_RDONLY);
> + } while (fd == -1 && try < 2);
> +
> + free(path);
> +
> if (!valid_filehash(fd, m->hash, sizeof(m->hash))) {
> warnx("%s: bad message digest for %s", fn, m->file);
> rc = 0;
> }
> - free(path);
> }
>
> return rc;
> @@ -630,37 +692,6 @@ build_crls(const struct crl *crl, STACK_
> err(1, "sk_X509_CRL_push");
> }
>
> -static char *
> -parse_filepath(struct entity *entp)
> -{
> - struct parse_repo *rp;
> - char *file;
> -
> - /* build file path based on repoid, entity path and filename */
> - rp = repo_get(entp->repoid);
> - if (rp == NULL) {
> - if (entp->path == NULL) {
> - if ((file = strdup(entp->file)) == NULL)
> - err(1, NULL);
> - } else {
> - if (asprintf(&file, "%s/%s", entp->path,
> - entp->file) == -1)
> - err(1, NULL);
> - }
> - } else {
> - if (entp->path == NULL) {
> - if (asprintf(&file, "%s/%s", rp->path,
> - entp->file) == -1)
> - err(1, NULL);
> - } else {
> - if (asprintf(&file, "%s/%s/%s", rp->path,
> - entp->path, entp->file) == -1)
> - err(1, NULL);
> - }
> - }
> - return file;
> -}
> -
> static void
> parse_entity(struct entityq *q, struct msgbuf *msgq)
> {
> @@ -673,27 +704,41 @@ parse_entity(struct entityq *q, struct m
> unsigned char *f;
> size_t flen;
> char *file;
> - int c;
> + int c, try;
>
> while ((entp = TAILQ_FIRST(q)) != NULL) {
> TAILQ_REMOVE(q, entp, entries);
>
> /* handle RTYPE_REPO first */
> if (entp->type == RTYPE_REPO) {
> - repo_add(entp->repoid, entp->path);
> + repo_add(entp->repoid, entp->path, entp->file);
> entity_free(entp);
> continue;
> }
>
> f = NULL;
> - file = parse_filepath(entp);
> + file = parse_filepath(entp->repoid, entp->path, entp->file, 0);
> if (entp->type != RTYPE_TAL) {
> - f = load_file(file, &flen);
> - if (f == NULL)
> - warn("%s", file);
> + for (try = 0; try <= 1; try++) {
> + f = load_file(file, &flen);
> + if (f != NULL)
> + break;
Maybe this for loop could be factored into a helper function? I find the control
flow tricky to follow. In a function you could use early returns and perhaps
duplicate the three lines loading the file.
If you want to keep a loop, consider turning it into a do-while loop to match
what mft_check() and rrdp_handle_file() do more closely.
> +
> + if (errno == ENOENT && try == 0) {
> + char *fn;
> + fn = parse_filepath(entp->repoid,
> + entp->path, entp->file, 1);
> + if (fn != NULL) {
> + free(file);
> + file = fn;
> + continue;
> + }
> + }
> + warn("parse file %s", file);
> + }
> }
>
> - /* pass back at least type and filename */
> + /* pass back at least type, repoid and filename */
> b = io_new_buffer();
> io_simple_buffer(b, &entp->type, sizeof(entp->type));
> io_str_buffer(b, file);
> Index: repo.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/rpki-client/repo.c,v
> retrieving revision 1.24
> diff -u -p -r1.24 repo.c
> --- repo.c 13 Jan 2022 14:57:02 -0000 1.24
> +++ repo.c 13 Jan 2022 15:50:40 -0000
> @@ -58,8 +58,6 @@ struct rrdprepo {
> SLIST_ENTRY(rrdprepo) entry;
> char *notifyuri;
> char *basedir;
> - char *temp;
> - struct filepath_tree added;
> struct filepath_tree deleted;
> unsigned int id;
> enum repo_state state;
> @@ -92,6 +90,7 @@ struct repo {
> SLIST_ENTRY(repo) entry;
> char *repouri;
> char *notifyuri;
> + char *basedir;
repo_free() leaks both notifyuri and basedir.
> const struct rrdprepo *rrdp;
> const struct rsyncrepo *rsync;
> const struct tarepo *ta;
> @@ -105,7 +104,7 @@ static SLIST_HEAD(, repo) repos = SLIST_
> /* counter for unique repo id */
> unsigned int repoid;
>
> -static struct rsyncrepo *rsync_get(const char *, int);
> +static struct rsyncrepo *rsync_get(const char *, const char *);
> static void remove_contents(char *);
>
> /*
> @@ -168,26 +167,6 @@ filepath_exists(struct filepath_tree *tr
> }
>
> /*
> - * Return true if a filepath entry exists that starts with path.
> - */
> -static int
> -filepath_dir_exists(struct filepath_tree *tree, char *path)
> -{
> - struct filepath needle;
> - struct filepath *res;
> -
> - needle.file = path;
> - res = RB_NFIND(filepath_tree, tree, &needle);
> - while (res != NULL && strstr(res->file, path) == res->file) {
> - /* make sure that filepath actually is in that path */
> - if (res->file[strlen(path)] == '/')
> - return 1;
> - res = RB_NEXT(filepath_tree, tree, res);
> - }
> - return 0;
> -}
> -
> -/*
> * Remove entry from tree and free it.
> */
> static void
> @@ -284,7 +263,8 @@ repo_state(struct repo *rp)
> return rp->rsync->state;
> if (rp->rrdp)
> return rp->rrdp->state;
> - errx(1, "%s: bad repo", rp->repouri);
> + /* No backend so sync is by definition done. */
> + return REPO_DONE;
> }
>
> /*
> @@ -305,7 +285,8 @@ repo_done(const void *vp, int ok)
> if (!ok) {
> /* try to fall back to rsync */
> rp->rrdp = NULL;
> - rp->rsync = rsync_get(rp->repouri, 0);
> + rp->rsync = rsync_get(rp->repouri,
> + rp->basedir);
> /* need to check if it was already loaded */
> if (repo_state(rp) != REPO_LOADING)
> entityq_flush(&rp->queue, rp);
> @@ -362,7 +343,7 @@ ta_fetch(struct tarepo *tr)
> * Create destination location.
> * Build up the tree to this point.
> */
> - rsync_fetch(tr->id, tr->uri[tr->uriidx], tr->basedir);
> + rsync_fetch(tr->id, tr->uri[tr->uriidx], tr->basedir, NULL);
> } else {
> int fd;
>
> @@ -387,9 +368,6 @@ ta_get(struct tal *tal)
>
> /* no need to look for possible other repo */
>
> - if (tal->urisz == 0)
> - errx(1, "TAL %s has no URI", tal->descr);
> -
> if ((tr = calloc(1, sizeof(*tr))) == NULL)
> err(1, NULL);
> tr->id = ++repoid;
> @@ -405,17 +383,7 @@ ta_get(struct tal *tal)
> tal->urisz = 0;
> tal->uri = NULL;
>
> - if (noop) {
> - tr->state = REPO_DONE;
> - logx("ta/%s: using cache", tr->descr);
> - repo_done(tr, 0);
> - } else {
> - /* try to create base directory */
> - if (mkpath(tr->basedir) == -1)
> - warn("mkpath %s", tr->basedir);
> -
> - ta_fetch(tr);
> - }
> + ta_fetch(tr);
>
> return tr;
> }
> @@ -447,7 +415,7 @@ ta_free(void)
> }
>
> static struct rsyncrepo *
> -rsync_get(const char *uri, int nofetch)
> +rsync_get(const char *uri, const char *validdir)
> {
> struct rsyncrepo *rr;
> char *repo;
> @@ -470,22 +438,16 @@ rsync_get(const char *uri, int nofetch)
> rr->repouri = repo;
> rr->basedir = repo_dir(repo, "rsync", 0);
>
> - if (noop || nofetch) {
> - rr->state = REPO_DONE;
> - logx("%s: using cache", rr->basedir);
> - repo_done(rr, 0);
> - } else {
> - /* create base directory */
> - if (mkpath(rr->basedir) == -1) {
> - warn("mkpath %s", rr->basedir);
> - rsync_finish(rr->id, 0);
> - return rr;
> - }
> -
> - logx("%s: pulling from %s", rr->basedir, rr->repouri);
> - rsync_fetch(rr->id, rr->repouri, rr->basedir);
> + /* create base directory */
> + if (mkpath(rr->basedir) == -1) {
> + warn("mkpath %s", rr->basedir);
> + rsync_finish(rr->id, 0);
> + return rr;
> }
>
> + logx("%s: pulling from %s", rr->basedir, rr->repouri);
> + rsync_fetch(rr->id, rr->repouri, rr->basedir, validdir);
> +
> return rr;
> }
>
> @@ -513,25 +475,19 @@ rsync_free(void)
> }
> }
>
> -static int rrdprepo_fetch(struct rrdprepo *);
> -
> /*
> * Build local file name base on the URI and the rrdprepo info.
> */
> static char *
> -rrdp_filename(const struct rrdprepo *rr, const char *uri, int temp)
> +rrdp_filename(const struct rrdprepo *rr, const char *uri, int valid)
> {
> char *nfile;
> - char *dir = rr->basedir;
> -
> - if (temp)
> - dir = rr->temp;
> -
> - if (!valid_uri(uri, strlen(uri), "rsync://")) {
> - warnx("%s: bad URI %s", rr->basedir, uri);
> - return NULL;
> - }
> + const char *dir = rr->basedir;
>
> + if (valid)
> + dir = "valid";
> + if (!valid_uri(uri, strlen(uri), "rsync://"))
> + errx(1, "%s: bad URI %s", rr->basedir, uri);
> uri += strlen("rsync://"); /* skip proto */
> if (asprintf(&nfile, "%s/%s", dir, uri) == -1)
> err(1, NULL);
> @@ -575,28 +531,49 @@ rrdp_free(void)
>
> free(rr->notifyuri);
> free(rr->basedir);
> - free(rr->temp);
>
> - filepath_free(&rr->added);
> filepath_free(&rr->deleted);
>
> free(rr);
> }
> }
>
> -static struct rrdprepo *
> -rrdp_basedir(const char *dir)
> +/*
> + * Check if a directory is an active rrdp repository.
> + * Returns 1 if found else 0.
> + */
> +static int
> +rrdp_is_active(const char *dir)
> {
> struct rrdprepo *rr;
>
> SLIST_FOREACH(rr, &rrdprepos, entry)
> if (strcmp(dir, rr->basedir) == 0) {
> if (rr->state == REPO_FAILED)
> - return NULL;
> - return rr;
> + return 0;
> + return 1;
perhaps save a few lines:
if (strcmp(dir, rr->basedir) == 0)
return rr->state != REPO_FAILED;
> }
>
> - return NULL;
> + return 0;
> +}
> +
> +/*
> + * Check if the URI is actually covered by one of the repositories
> + * that depend on this RRDP repository.
> + * Returns 1 if the URI is valid, 0 if no repouri matches the URI.
> + */
> +static int
> +rrdp_uri_valid(struct rrdprepo *rr, const char *uri)
> +{
> + struct repo *rp;
> +
> + SLIST_FOREACH(rp, &repos, entry) {
> + if (rp->rrdp != rr)
> + continue;
> + if (strncmp(uri, rp->repouri, strlen(rp->repouri)) == 0)
> + return 1;
> + }
> + return 0;
> }
>
> /*
> @@ -740,8 +717,9 @@ fail:
> }
>
> static struct rrdprepo *
> -rrdp_get(const char *uri, int nofetch)
> +rrdp_get(const char *uri)
> {
> + struct rrdp_session state = { 0 };
> struct rrdprepo *rr;
>
> SLIST_FOREACH(rr, &rrdprepos, entry)
> @@ -761,28 +739,24 @@ rrdp_get(const char *uri, int nofetch)
> err(1, NULL);
> rr->basedir = repo_dir(uri, "rrdp", 1);
>
> - RB_INIT(&rr->added);
> RB_INIT(&rr->deleted);
>
> - if (noop || nofetch) {
> - rr->state = REPO_DONE;
> - logx("%s: using cache", rr->notifyuri);
> - repo_done(rr, 0);
> - } else {
> - /* create base directory */
> - if (mkpath(rr->basedir) == -1) {
> - warn("mkpath %s", rr->basedir);
> - rrdp_finish(rr->id, 0);
> - return rr;
> - }
> - if (rrdprepo_fetch(rr) == -1) {
> - rrdp_finish(rr->id, 0);
> - return rr;
> - }
>
> - logx("%s: pulling from %s", rr->notifyuri, "network");
> + /* create base directory */
> + if (mkpath(rr->basedir) == -1) {
> + warn("mkpath %s", rr->basedir);
> + rrdp_finish(rr->id, 0);
> + return rr;
> }
>
> + /* parse state and start the sync */
> + rrdp_parse_state(rr, &state);
> + rrdp_fetch(rr->id, rr->notifyuri, rr->notifyuri, &state);
> + free(state.session_id);
> + free(state.last_mod);
> +
> + logx("%s: pulling from %s", rr->notifyuri, "network");
> +
> return rr;
> }
>
> @@ -800,7 +774,6 @@ rrdp_clear(unsigned int id)
>
> /* remove rrdp repository contents */
> remove_contents(rr->basedir);
> - remove_contents(rr->temp);
> }
>
> /*
> @@ -816,8 +789,8 @@ rrdp_handle_file(unsigned int id, enum p
> struct rrdprepo *rr;
> struct filepath *fp;
> ssize_t s;
> - char *fn;
> - int fd = -1;
> + char *fn = NULL;
> + int fd = -1, try = 0;
>
> rr = rrdp_find(id);
> if (rr == NULL)
> @@ -825,21 +798,20 @@ rrdp_handle_file(unsigned int id, enum p
> if (rr->state == REPO_FAILED)
> return -1;
>
> + /* check hash of original file for updates and deletes */
> if (pt == PUB_UPD || pt == PUB_DEL) {
> if (filepath_exists(&rr->deleted, uri)) {
> warnx("%s: already deleted", uri);
> return 0;
> }
> - fp = filepath_find(&rr->added, uri);
> - if (fp == NULL) {
> - if ((fn = rrdp_filename(rr, uri, 0)) == NULL)
> - return 0;
> - } else {
> - filepath_put(&rr->added, fp);
> - if ((fn = rrdp_filename(rr, uri, 1)) == NULL)
> + /* try to open file first in rrdp then in valid repo */
> + do {
> + free(fn);
> + if ((fn = rrdp_filename(rr, uri, try++)) == NULL)
> return 0;
> - }
> - fd = open(fn, O_RDONLY);
> + fd = open(fn, O_RDONLY);
> + } while (fd == -1 && try < 2);
> +
> if (!valid_filehash(fd, hash, hlen)) {
> warnx("%s: bad file digest for %s", rr->notifyuri, fn);
> free(fn);
> @@ -848,6 +820,7 @@ rrdp_handle_file(unsigned int id, enum p
> free(fn);
> }
>
> + /* write new content or mark uri as deleted. */
> if (pt == PUB_DEL) {
> filepath_add(&rr->deleted, uri);
> } else {
> @@ -855,8 +828,8 @@ rrdp_handle_file(unsigned int id, enum p
> if (fp != NULL)
> filepath_put(&rr->deleted, fp);
>
> - /* add new file to temp dir */
> - if ((fn = rrdp_filename(rr, uri, 1)) == NULL)
> + /* add new file to rrdp dir */
> + if ((fn = rrdp_filename(rr, uri, 0)) == NULL)
> return 0;
>
> if (repo_mkpath(fn) == -1)
> @@ -876,7 +849,6 @@ rrdp_handle_file(unsigned int id, enum p
> if ((size_t)s != dlen) /* impossible */
> errx(1, "short write %s", fn);
> free(fn);
> - filepath_add(&rr->added, uri);
> }
>
> return 1;
> @@ -890,66 +862,6 @@ fail:
> }
>
> /*
> - * Initiate a RRDP sync, create the required temporary directory and
> - * parse a possible state file before sending the request to the RRDP
> process.
> - */
> -static int
> -rrdprepo_fetch(struct rrdprepo *rr)
> -{
> - struct rrdp_session state = { 0 };
> -
> - if (asprintf(&rr->temp, "%s.XXXXXXXX", rr->basedir) == -1)
> - err(1, NULL);
> - if (mkdtemp(rr->temp) == NULL) {
> - warn("mkdtemp %s", rr->temp);
> - return -1;
> - }
> -
> - rrdp_parse_state(rr, &state);
> - rrdp_fetch(rr->id, rr->notifyuri, rr->notifyuri, &state);
> -
> - free(state.session_id);
> - free(state.last_mod);
> -
> - return 0;
> -}
> -
> -static int
> -rrdp_merge_repo(struct rrdprepo *rr)
> -{
> - struct filepath *fp, *nfp;
> - char *fn, *rfn;
> -
> - RB_FOREACH_SAFE(fp, filepath_tree, &rr->added, nfp) {
> - fn = rrdp_filename(rr, fp->file, 1);
> - rfn = rrdp_filename(rr, fp->file, 0);
> -
> - if (fn == NULL || rfn == NULL)
> - errx(1, "bad filepath"); /* should not happen */
> -
> - if (repo_mkpath(rfn) == -1) {
> - goto fail;
> - }
> -
> - if (rename(fn, rfn) == -1) {
> - warn("rename %s", rfn);
> - goto fail;
> - }
> -
> - free(rfn);
> - free(fn);
> - filepath_put(&rr->added, fp);
> - }
> -
> - return 1;
> -
> -fail:
> - free(rfn);
> - free(fn);
> - return 0;
> -}
> -
> -/*
> * RSYNC sync finished, either with or without success.
> */
> void
> @@ -993,6 +905,8 @@ rsync_finish(unsigned int id, int ok)
> rr->basedir);
> stats.rsync_fails++;
> rr->state = REPO_FAILED;
> + /* clear rsync repo since it failed */
> + remove_contents(rr->basedir);
> }
>
> repo_done(rr, ok);
> @@ -1013,19 +927,20 @@ rrdp_finish(unsigned int id, int ok)
> if (rr->state != REPO_LOADING)
> return;
>
> - if (ok && rrdp_merge_repo(rr)) {
> + if (ok) {
> logx("%s: loaded from network", rr->notifyuri);
> stats.rrdp_repos++;
> rr->state = REPO_DONE;
> - repo_done(rr, ok);
> } else {
> logx("%s: load from network failed, fallback to rsync",
> rr->notifyuri);
> stats.rrdp_fails++;
> rr->state = REPO_FAILED;
> - remove_contents(rr->temp);
> - repo_done(rr, 0);
> + /* clear the RRDP repo since it failed */
> + remove_contents(rr->basedir);
> }
> +
> + repo_done(rr, ok);
> }
>
> /*
> @@ -1082,6 +997,9 @@ ta_lookup(int id, struct tal *tal)
> {
> struct repo *rp;
>
> + if (tal->urisz == 0)
> + errx(1, "TAL %s has no URI", tal->descr);
> +
> /* Look up in repository table. (Lookup should actually fail here) */
> SLIST_FOREACH(rp, &repos, entry) {
> if (strcmp(rp->repouri, tal->descr) == 0)
> @@ -1089,11 +1007,24 @@ ta_lookup(int id, struct tal *tal)
> }
>
> rp = repo_alloc(id);
> + rp->basedir = repo_dir(tal->descr, "ta", 0);
> if ((rp->repouri = strdup(tal->descr)) == NULL)
> err(1, NULL);
>
> + /* try to create base directory */
> + if (mkpath(rp->basedir) == -1)
> + warn("mkpath %s", rp->basedir);
> +
> + /* check if sync disabled ... */
> + if (noop) {
> + logx("ta/%s: using cache", rp->repouri);
> + entityq_flush(&rp->queue, rp);
> + return rp;
> + }
> +
> rp->ta = ta_get(tal);
>
> + /* need to check if it was already loaded */
> if (repo_state(rp) != REPO_LOADING)
> entityq_flush(&rp->queue, rp);
>
> @@ -1130,6 +1061,7 @@ repo_lookup(int talid, const char *uri,
> }
>
> rp = repo_alloc(talid);
> + rp->basedir = repo_dir(repouri, "valid", 0);
> rp->repouri = repouri;
> if (notify != NULL)
> if ((rp->notifyuri = strdup(notify)) == NULL)
> @@ -1141,12 +1073,24 @@ repo_lookup(int talid, const char *uri,
> nofetch = 1;
> }
>
> - /* try RRDP first if available */
> + /* try to create base directory */
> + if (mkpath(rp->basedir) == -1)
> + warn("mkpath %s", rp->basedir);
> +
> + /* check if sync disabled ... */
> + if (noop || nofetch) {
> + logx("%s: using cache", rp->basedir);
> + entityq_flush(&rp->queue, rp);
> + return rp;
> + }
> +
> + /* ... else try RRDP first if available then rsync */
> if (notify != NULL)
> - rp->rrdp = rrdp_get(notify, nofetch);
> + rp->rrdp = rrdp_get(notify);
> if (rp->rrdp == NULL)
> - rp->rsync = rsync_get(uri, nofetch);
> + rp->rsync = rsync_get(uri, rp->basedir);
>
> + /* need to check if it was already loaded */
> if (repo_state(rp) != REPO_LOADING)
> entityq_flush(&rp->queue, rp);
>
> @@ -1169,24 +1113,29 @@ repo_byid(unsigned int id)
> }
>
> /*
> - * Return the repository base directory.
> + * Return the repository base or alternate directory.
> * Returned string must be freed by caller.
> */
> char *
> -repo_basedir(const struct repo *rp)
> +repo_basedir(const struct repo *rp, int wantvalid)
> {
> - char *path;
> + char *path = NULL;
>
> - if (rp->ta) {
> - if ((path = strdup(rp->ta->basedir)) == NULL)
> - err(1, NULL);
> - } else if (rp->rsync) {
> - if ((path = strdup(rp->rsync->basedir)) == NULL)
> + if (wantvalid == 0) {
To match the style used elsewhere:
if (!wantvalid) {
if (rp->ta != NULL) {
etc
> + if (rp->ta) {
> + if ((path = strdup(rp->ta->basedir)) == NULL)
> + err(1, NULL);
> + } else if (rp->rsync) {
> + if ((path = strdup(rp->rsync->basedir)) == NULL)
> + err(1, NULL);
> + } else if (rp->rrdp) {
> + path = rrdp_filename(rp->rrdp, rp->repouri, 0);
> + } else
> + path = NULL; /* only valid repo available */
> + } else if (rp->basedir != NULL) {
> + if ((path = strdup(rp->basedir)) == NULL)
> err(1, NULL);
> - } else if (rp->rrdp) {
> - path = rrdp_filename(rp->rrdp, rp->repouri, 0);
> - } else
> - errx(1, "%s: bad repo", rp->repouri);
> + }
>
> return path;
> }
> @@ -1289,64 +1238,173 @@ add_to_del(char **del, size_t *dsz, char
> return del;
> }
>
> +/*
> + * Delayed delete of files from RRDP. Since RRDP has no security built-in
> + * this code needs to check if this RRDP repository is actually allowed to
> + * remove the file referenced by the URI.
> + */
> static char **
> -repo_rrdp_cleanup(struct filepath_tree *tree, struct rrdprepo *rr,
> - char **del, size_t *delsz)
> +repo_cleanup_rrdp(struct filepath_tree *tree, char **del, size_t *delsz)
> {
> + struct rrdprepo *rr;
> struct filepath *fp, *nfp;
> char *fn;
> -
> - RB_FOREACH_SAFE(fp, filepath_tree, &rr->deleted, nfp) {
> - fn = rrdp_filename(rr, fp->file, 0);
> - /* temp dir will be cleaned up by repo_cleanup() */
> -
> - if (fn == NULL)
> - errx(1, "bad filepath"); /* should not happen */
> -
> - if (!filepath_exists(tree, fn))
> +
> + SLIST_FOREACH(rr, &rrdprepos, entry) {
> + RB_FOREACH_SAFE(fp, filepath_tree, &rr->deleted, nfp) {
> + if (!rrdp_uri_valid(rr, fp->file)) {
> + warnx("%s: external URI %s", rr->notifyuri,
> + fp->file);
> + filepath_put(&rr->deleted, fp);
> + continue;
> + }
> + /* try to remove file from rrdp repo ... */
> + fn = rrdp_filename(rr, fp->file, 0);
> del = add_to_del(del, delsz, fn);
> - else
> - warnx("%s: referenced file supposed to be deleted", fn);
> + free(fn);
>
> - free(fn);
> - filepath_put(&rr->deleted, fp);
> + /* ... and from the valid repository if unused. */
> + fn = rrdp_filename(rr, fp->file, 1);
> + if (!filepath_exists(tree, fn))
> + del = add_to_del(del, delsz, fn);
> + else
> + warnx("%s: referenced file supposed to be "
> + "deleted", fn);
> +
> + free(fn);
> + filepath_put(&rr->deleted, fp);
> + }
> }
>
> return del;
> }
>
> +/*
> + * All files in tree are valid and should be moved to the valid repository
> + * if not already there. Rename the files to the new path and readd the
> + * filepath entry with the new path if successful.
> + */
> +static void
> +repo_move_valid(struct filepath_tree *tree)
> +{
> + struct filepath *fp, *nfp;
> + size_t rsyncsz = strlen("rsync/");
> + size_t rrdpsz = strlen("rrdp/");
> + char *fn, *base;
> +
> + RB_FOREACH_SAFE(fp, filepath_tree, tree, nfp) {
> + if (strncmp(fp->file, "rsync/", rsyncsz) != 0 &&
> + strncmp(fp->file, "rrdp/", rrdpsz) != 0)
> + continue; /* not a temporary file path */
> +
> + if (strncmp(fp->file, "rsync/", rsyncsz) == 0) {
> + if (asprintf(&fn, "valid/%s", fp->file + rsyncsz) == -1)
> + err(1, NULL);
> + } else {
> + base = strchr(fp->file + rrdpsz, '/');
> + assert(base != NULL);
> + if (asprintf(&fn, "valid/%s", base + 1) == -1)
> + err(1, NULL);
> + }
> +
> + if (repo_mkpath(fn) == -1) {
> + free(fn);
> + continue;
> + }
> +
> + if (rename(fp->file, fn) == -1) {
> + warn("rename %s", fn);
> + free(fn);
> + continue;
> + }
> +
> + /* switch filepath node to new path */
> + RB_REMOVE(filepath_tree, tree, fp);
> + free(fp->file);
> + fp->file = fn;
> + if (RB_INSERT(filepath_tree, tree, fp) != NULL)
> + errx(1, "both possibilities of file present");
> + }
> +}
> +
> +#define BASE_DIR (void *)0x62617365
> +#define RSYNC_DIR (void *)0x73796e63
> +#define RRDP_DIR (void *)0x52524450
> +
> void
> repo_cleanup(struct filepath_tree *tree)
> {
> size_t i, cnt, delsz = 0, dirsz = 0;
> char **del = NULL, **dir = NULL;
> - char *argv[4] = { "ta", "rsync", "rrdp", NULL };
> - struct rrdprepo *rr;
> + char *argv[5] = { "ta", "rsync", "rrdp", "valid", NULL };
> FTS *fts;
> FTSENT *e;
>
> + /* first move temp files which have been used to valid dir */
> + repo_move_valid(tree);
> + /* then delete files requested by rrdp */
> + del = repo_cleanup_rrdp(tree, del, &delsz);
> +
> if ((fts = fts_open(argv, FTS_PHYSICAL | FTS_NOSTAT, NULL)) == NULL)
> err(1, "fts_open");
> errno = 0;
> while ((e = fts_read(fts)) != NULL) {
> switch (e->fts_info) {
> case FTS_NSOK:
> - if (!filepath_exists(tree, e->fts_path))
> + /* handle rrdp .state files explicitly */
> + if (e->fts_parent->fts_pointer == RRDP_DIR &&
> + e->fts_level == 2 &&
> + strcmp(e->fts_name, ".state") == 0) {
> + e->fts_parent->fts_number++;
> + } else if (filepath_exists(tree, e->fts_path)) {
> + e->fts_parent->fts_number++;
> + } else if (e->fts_parent->fts_pointer == RRDP_DIR) {
> + /* can't delete these extra files */
> + e->fts_parent->fts_number++;
> + stats.extra_files++;
> + if (verbose > 1)
> + logx("superfluous %s", e->fts_path);
> + } else {
> + if (e->fts_parent->fts_pointer == RSYNC_DIR) {
> + /* no need to keep rsync files */
> + stats.extra_files++;
> + if (verbose > 1)
> + logx("superfluous %s",
> + e->fts_path);
> + }
> del = add_to_del(del, &delsz,
> e->fts_path);
> + }
> break;
> case FTS_D:
> - /* special cleanup for rrdp directories */
> - if ((rr = rrdp_basedir(e->fts_path)) != NULL) {
> - del = repo_rrdp_cleanup(tree, rr, del, &delsz);
> - if (fts_set(fts, e, FTS_SKIP) == -1)
> - err(1, "fts_set");
> + if (e->fts_level == FTS_ROOTLEVEL) {
> + if (strcmp("rsync", e->fts_path) == 0)
> + e->fts_pointer = RSYNC_DIR;
> + else if (strcmp("rrdp", e->fts_path) == 0)
> + e->fts_pointer = RRDP_DIR;
> + else
> + e->fts_pointer = BASE_DIR;
> + } else
> + e->fts_pointer = e->fts_parent->fts_pointer;
> +
> + /*
> + * special handling for rrdp directories,
> + * clear them if they are not used anymore but
> + * only if rrdp is active.
> + */
> + if (e->fts_pointer == RRDP_DIR && !noop && rrdpon &&
> + e->fts_level == 1) {
> + if (!rrdp_is_active(e->fts_path))
> + e->fts_pointer = NULL;
> }
> break;
> case FTS_DP:
> - if (!filepath_dir_exists(tree, e->fts_path))
> - dir = add_to_del(dir, &dirsz,
> - e->fts_path);
> + if (e->fts_level == FTS_ROOTLEVEL)
> + break;
> + if (e->fts_number == 0)
> + dir = add_to_del(dir, &dirsz, e->fts_path);
> +
> + e->fts_parent->fts_number += e->fts_number;
> break;
> case FTS_SL:
> case FTS_SLNONE:
> @@ -1356,8 +1414,7 @@ repo_cleanup(struct filepath_tree *tree)
> case FTS_NS:
> case FTS_ERR:
> if (e->fts_errno == ENOENT &&
> - (strcmp(e->fts_path, "rsync") == 0 ||
> - strcmp(e->fts_path, "rrdp") == 0))
> + e->fts_level == FTS_ROOTLEVEL)
> continue;
> warnx("fts_read %s: %s", e->fts_path,
> strerror(e->fts_errno));
> @@ -1388,7 +1445,7 @@ repo_cleanup(struct filepath_tree *tree)
> free(del[i]);
> }
> free(del);
> - stats.del_files = cnt;
> + stats.del_files += cnt;
>
> cnt = 0;
> for (i = 0; i < dirsz; i++) {
> @@ -1399,7 +1456,7 @@ repo_cleanup(struct filepath_tree *tree)
> free(dir[i]);
> }
> free(dir);
> - stats.del_dirs = cnt;
> + stats.del_dirs += cnt;
> }
>
> void
> @@ -1418,6 +1475,9 @@ repo_free(void)
> rsync_free();
> }
>
> +/*
> + * Remove all files and directories under base but do not remove base itself.
> + */
> static void
> remove_contents(char *base)
> {
>