From: Liu Yuan <tailai...@taobao.com> We allow group plug, group unplug and disks failure during (un)plugging.
Also add disk information function for collie. Signed-off-by: Liu Yuan <tailai...@taobao.com> --- collie/collie.c | 2 +- include/internal_proto.h | 16 +++ include/sheepdog_proto.h | 2 + sheep/md.c | 263 ++++++++++++++++++++++++++++++++-------------- sheep/ops.c | 45 ++++++++ sheep/sheep_priv.h | 5 +- sheep/store.c | 3 +- 7 files changed, 253 insertions(+), 83 deletions(-) diff --git a/collie/collie.c b/collie/collie.c index 08c78eb..19085b4 100644 --- a/collie/collie.c +++ b/collie/collie.c @@ -19,7 +19,7 @@ #include "util.h" static const char program_name[] = "collie"; -const char *sdhost = "localhost"; +const char *sdhost = "127.0.0.1"; int sdport = SD_LISTEN_PORT; bool highlight = true; bool raw_output; diff --git a/include/internal_proto.h b/include/internal_proto.h index 6f1fdb3..c43855b 100644 --- a/include/internal_proto.h +++ b/include/internal_proto.h @@ -69,6 +69,9 @@ #define SD_OP_FLUSH_PEER 0xAE #define SD_OP_NOTIFY_VDI_ADD 0xAF #define SD_OP_DELETE_CACHE 0xB0 +#define SD_OP_MD_INFO 0xB1 +#define SD_OP_MD_PLUG 0xB2 +#define SD_OP_MD_UNPLUG 0xB3 /* internal flags for hdr.flags, must be above 0x80 */ #define SD_FLAG_CMD_RECOVERY 0x0080 @@ -229,4 +232,17 @@ struct vdi_op_message { uint8_t data[0]; }; +struct md_info { + int idx; + uint64_t size; + uint64_t used; + char path[PATH_MAX]; +}; + +#define MD_MAX_DISK 64 /* FIXME remove roof and make it dynamic */ +struct sd_md_info { + struct md_info disk[MD_MAX_DISK]; + int nr; +}; + #endif /* __INTERNAL_PROTO_H__ */ diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index fe3738b..94baede 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -13,6 +13,8 @@ #include <inttypes.h> #include <stdint.h> +#include <linux/limits.h> + #include "util.h" #define SD_PROTO_VER 0x02 diff --git a/sheep/md.c b/sheep/md.c index 821a391..124f2ba 100644 --- a/sheep/md.c +++ b/sheep/md.c @@ -21,11 +21,12 @@ #include <sys/xattr.h> #include <dirent.h> #include <pthread.h> +#include <string.h> #include "sheep_priv.h" +#include "util.h" #define MD_DEFAULT_VDISKS 128 -#define MD_MAX_DISK 64 /* FIXME remove roof and make it dynamic */ #define MD_MAX_VDISK (MD_MAX_DISK * MD_DEFAULT_VDISKS) struct disk { @@ -123,20 +124,33 @@ static inline struct vdisk *oid_to_vdisk(uint64_t oid) return oid_to_vdisk_from(md_vds, md_nr_vds, oid); } -int md_init_disk(char *path) +static int path_to_disk_idx(char *path) { + int i; + + for (i = 0; i < md_nr_disks; i++) + if (strcmp(md_disks[i].path, path) == 0) + return i; + + return -1; +} + +void md_add_disk(char *path) +{ + if (path_to_disk_idx(path) != -1) { + sd_eprintf("duplicate path %s", path); + return; + } + md_nr_disks++; - if (xmkdir(path, def_dmode) < 0) - panic("%s, %m", path); pstrcpy(md_disks[md_nr_disks - 1].path, PATH_MAX, path); - sd_iprintf("%s added to md, nr %d", md_disks[md_nr_disks - 1].path, + sd_iprintf("%s, nr %d", md_disks[md_nr_disks - 1].path, md_nr_disks); - return 0; } static inline void calculate_vdisks(struct disk *disks, int nr_disks, - uint64_t total) + uint64_t total) { uint64_t avg_size = total / nr_disks; float factor; @@ -154,6 +168,79 @@ static inline void calculate_vdisks(struct disk *disks, int nr_disks, #define MDNAME "user.md.size" #define MDSIZE sizeof(uint64_t) +static int get_total_object_size(uint64_t oid, char *ignore, void *total) +{ + uint64_t *t = total; + *t += get_objsize(oid); + + return SD_RES_SUCCESS; +} + +/* If cleanup is true, temporary objects will be removed */ +static int for_each_object_in_path(char *path, + int (*func)(uint64_t, char *, void *), + bool cleanup, void *arg) +{ + DIR *dir; + struct dirent *d; + uint64_t oid; + int ret = SD_RES_SUCCESS; + char p[PATH_MAX]; + + dir = opendir(path); + if (!dir) { + sd_eprintf("failed to open %s, %m", path); + return SD_RES_EIO; + } + + while ((d = readdir(dir))) { + if (!strncmp(d->d_name, ".", 1)) + continue; + + oid = strtoull(d->d_name, NULL, 16); + if (oid == 0 || oid == ULLONG_MAX) + continue; + + /* don't call callback against temporary objects */ + if (strlen(d->d_name) == 20 && + strcmp(d->d_name + 16, ".tmp") == 0) { + if (cleanup) { + snprintf(p, PATH_MAX, "%s/%016"PRIx64".tmp", + path, oid); + sd_dprintf("remove tmp object %s", p); + unlink(p); + } + continue; + } + + ret = func(oid, path, arg); + if (ret != SD_RES_SUCCESS) + break; + } + closedir(dir); + return ret; +} + +static uint64_t get_path_size(char *path, uint64_t *used) +{ + struct statvfs fs; + uint64_t size; + + if (statvfs(path, &fs) < 0) { + sd_eprintf("get disk %s space failed %m", path); + return 0; + } + size = (int64_t)fs.f_frsize * fs.f_bfree; + + if (!used) + goto out; + if (for_each_object_in_path(path, get_total_object_size, false, used) + != SD_RES_SUCCESS) + return 0; +out: + return size; +} + /* * If path is broken during initilization or not support xattr return 0. We can * safely use 0 to represent failure case because 0 space path can be @@ -161,9 +248,13 @@ static inline void calculate_vdisks(struct disk *disks, int nr_disks, */ static uint64_t init_path_space(char *path) { - struct statvfs fs; uint64_t size; + if (xmkdir(path, def_dmode) < 0) { + sd_eprintf("%s, %m", path); + goto broken_path; + } + if (!is_xattr_enabled(path)) { sd_iprintf("multi-disk support need xattr feature"); goto broken_path; @@ -180,11 +271,9 @@ static uint64_t init_path_space(char *path) return size; create: - if (statvfs(path, &fs) < 0) { - sd_eprintf("get disk %s space failed %m", path); + size = get_path_size(path, NULL); + if (!size) goto broken_path; - } - size = (int64_t)fs.f_frsize * fs.f_bfree; if (setxattr(path, MDNAME, &size, MDSIZE, 0) < 0) { sd_eprintf("%s, %m", path); goto broken_path; @@ -229,7 +318,8 @@ reinit: } calculate_vdisks(md_disks, md_nr_disks, total); md_nr_vds = disks_to_vdisks(md_disks, md_nr_disks, md_vds); - sys->enable_md = true; + if (!sys->enable_md) + sys->enable_md = true; return total; } @@ -259,51 +349,6 @@ static char *get_object_path_nolock(uint64_t oid) return md_disks[vd->idx].path; } -/* If cleanup is true, temporary objects will be removed */ -static int for_each_object_in_path(char *path, - int (*func)(uint64_t, char *, void *), - bool cleanup, void *arg) -{ - DIR *dir; - struct dirent *d; - uint64_t oid; - int ret = SD_RES_SUCCESS; - char p[PATH_MAX]; - - dir = opendir(path); - if (!dir) { - sd_eprintf("failed to open %s, %m", path); - return SD_RES_EIO; - } - - while ((d = readdir(dir))) { - if (!strncmp(d->d_name, ".", 1)) - continue; - - oid = strtoull(d->d_name, NULL, 16); - if (oid == 0 || oid == ULLONG_MAX) - continue; - - /* don't call callback against temporary objects */ - if (strlen(d->d_name) == 20 && - strcmp(d->d_name + 16, ".tmp") == 0) { - if (cleanup) { - snprintf(p, PATH_MAX, "%s/%016"PRIx64".tmp", - path, oid); - sd_dprintf("remove tmp object %s", p); - unlink(p); - } - continue; - } - - ret = func(oid, path, arg); - if (ret != SD_RES_SUCCESS) - break; - } - closedir(dir); - return ret; -} - int for_each_object_in_wd(int (*func)(uint64_t oid, char *path, void *arg), bool cleanup, void *arg) { @@ -345,17 +390,6 @@ struct md_work { char path[PATH_MAX]; }; -static int path_to_disk_idx(char *path) -{ - int i; - - for (i = 0; i < md_nr_disks; i++) - if (strcmp(md_disks[i].path, path) == 0) - return i; - - return -1; -} - static inline void kick_recover(void) { struct vnode_info *vinfo = get_vnode_info(); @@ -364,15 +398,6 @@ static inline void kick_recover(void) put_vnode_info(vinfo); } -static void unplug_disk(int idx) -{ - - remove_disk(idx); - sys->disk_space = md_init_space(); - if (md_nr_disks > 0) - kick_recover(); -} - static void md_do_recover(struct work *work) { struct md_work *mw = container_of(work, struct md_work, work); @@ -383,7 +408,10 @@ static void md_do_recover(struct work *work) if (idx < 0) /* Just ignore the duplicate EIO of the same path */ goto out; - unplug_disk(idx); + remove_disk(idx); + sys->disk_space = md_init_space(); + if (md_nr_disks > 0) + kick_recover(); out: pthread_rwlock_unlock(&md_lock); free(mw); @@ -500,3 +528,80 @@ int md_get_stale_path(uint64_t oid, uint32_t epoch, char *path) return SD_RES_NO_OBJ; } + +uint32_t md_get_info(struct sd_md_info *info) +{ + uint32_t ret = sizeof(*info); + int i; + + memset(info, 0, ret); + pthread_rwlock_rdlock(&md_lock); + for (i = 0; i < md_nr_disks; i++) { + info->disk[i].idx = i; + pstrcpy(info->disk[i].path, PATH_MAX, md_disks[i].path); + info->disk[i].size = get_path_size(info->disk[i].path, + &info->disk[i].used); + if (!info->disk[i].size) { + ret = 0; + break; + } + } + info->nr = md_nr_disks; + pthread_rwlock_unlock(&md_lock); + return ret; +} + +static inline void md_del_disk(char *path) +{ + int idx = path_to_disk_idx(path); + + if (idx < 0) { + sd_eprintf("invalid path %s", path); + return; + } + remove_disk(idx); +} + +static int do_plug_unplug(char *disks, bool plug) +{ + char *path; + int old_nr, ret = SD_RES_UNKNOWN; + + pthread_rwlock_wrlock(&md_lock); + old_nr = md_nr_disks; + path = strtok(disks, ","); + do { + if (plug) + md_add_disk(path); + else + md_del_disk(path); + } while ((path = strtok(NULL, ","))); + + /* If no disks change, bail out */ + if (old_nr == md_nr_disks) + goto out; + + sys->disk_space = md_init_space(); + /* + * We have to kick recover aggressively because there is possibility + * that nr of disks are removed during md_init_space() happens to equal + * nr of disks we added. + */ + if (md_nr_disks > 0) + kick_recover(); + + ret = SD_RES_SUCCESS; +out: + pthread_rwlock_unlock(&md_lock); + return ret; +} + +int md_plug_disks(char *disks) +{ + return do_plug_unplug(disks, true); +} + +int md_unplug_disks(char *disks) +{ + return do_plug_unplug(disks, false); +} diff --git a/sheep/ops.c b/sheep/ops.c index 8cba70d..3839437 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -667,6 +667,33 @@ static int local_set_cache_size(const struct sd_req *req, struct sd_rsp *rsp, return SD_RES_SUCCESS; } +static int local_md_info(struct request *request) +{ + struct sd_rsp *rsp = &request->rp; + struct sd_req *req = &request->rq; + + assert(req->data_length == sizeof(struct sd_md_info)); + rsp->data_length = md_get_info((struct sd_md_info *)request->data); + + return rsp->data_length ? SD_RES_SUCCESS : SD_RES_UNKNOWN; +} + +static int local_md_plug(const struct sd_req *req, struct sd_rsp *rsp, + void *data) +{ + char *disks = (char *)data; + + return md_plug_disks(disks); +} + +static int local_md_unplug(const struct sd_req *req, struct sd_rsp *rsp, + void *data) +{ + char *disks = (char *)data; + + return md_unplug_disks(disks); +} + static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp, void *data) { @@ -1110,6 +1137,24 @@ static struct sd_op_template sd_ops[] = { .process_main = local_set_cache_size, }, + [SD_OP_MD_INFO] = { + .name = "MD_INFO", + .type = SD_OP_TYPE_LOCAL, + .process_work = local_md_info, + }, + + [SD_OP_MD_PLUG] = { + .name = "MD_PLUG_DISKS", + .type = SD_OP_TYPE_LOCAL, + .process_main = local_md_plug, + }, + + [SD_OP_MD_UNPLUG] = { + .name = "MD_UNPLUG_DISKS", + .type = SD_OP_TYPE_LOCAL, + .process_main = local_md_unplug, + }, + /* gateway I/O operations */ [SD_OP_CREATE_AND_WRITE_OBJ] = { .name = "CREATE_AND_WRITE_OBJ", diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 652fd3a..098a7bb 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -417,11 +417,14 @@ int journal_file_init(const char *path, size_t size, bool skip); int journal_file_write(uint64_t oid, const char *buf, size_t size, off_t, bool); /* md.c */ -int md_init_disk(char *path); +void md_add_disk(char *path); uint64_t md_init_space(void); char *get_object_path(uint64_t oid); int md_handle_eio(char *); bool md_exist(uint64_t oid); int md_get_stale_path(uint64_t oid, uint32_t epoch, char *path); +uint32_t md_get_info(struct sd_md_info *info); +int md_plug_disks(char *disks); +int md_unplug_disks(char *disks); #endif diff --git a/sheep/store.c b/sheep/store.c index 58303fa..cbf24dc 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -269,8 +269,7 @@ static int init_obj_path(const char *base_path, char *argp) /* Eat up the first component */ strtok(argp, ","); while ((p = strtok(NULL, ","))) - if (md_init_disk(p) < 0) - return -1; + md_add_disk(p); return init_path(obj_path, NULL); } -- 1.7.9.5