I noticed procd regression caused by libubox commit 75e300aeec25 ("blobmsg: fix wrong payload len passed from blobmsg_check_array").
Calling "/etc/init.d/network restart" over and over will eventually result in procd not starting netifd. Sometimes failure doesn't occur and reboot is required. This makes it sound like some random memory problem (accessing uninitialized data?). I added some some debugging to procd's instance_fill_array() - diff attached. Backtrace for this failure is: instance_fill_array() instance_config_parse() instance_init() service_instance_add() Successful restart: Fri May 22 09:21:53 2020 daemon.err procd: [instance_fill_array] blobmsg_type(cur): 2 (BLOBMSG_TYPE_TABLE) Fri May 22 09:21:53 2020 daemon.err procd: [instance_fill_array] BEFORE 75e300aeec25: Fri May 22 09:21:53 2020 daemon.err procd: [instance_fill_array] blob_len(cur): 36 Fri May 22 09:21:53 2020 daemon.err procd: [instance_fill_array] blobmsg_check_attr_len(cur, false, 36): 1 Fri May 22 09:21:53 2020 daemon.err procd: [instance_fill_array] [for_each_attr] blobmsg_name(sub): core blobmsg_type(sub): 3 (BLOBMSG_TYPE_STRING) Fri May 22 09:21:53 2020 daemon.err procd: [instance_fill_array] AFTER 75e300aeec25: Fri May 22 09:21:53 2020 daemon.err procd: [instance_fill_array] blobmsg_len(cur): 24 Fri May 22 09:21:53 2020 daemon.err procd: [instance_fill_array] blobmsg_check_attr_len(cur, false, 24): 1 Fri May 22 09:21:53 2020 daemon.err procd: [instance_fill_array] [for_each_attr] blobmsg_name(sub): core blobmsg_type(sub): 3 (BLOBMSG_TYPE_STRING) Fri May 22 09:21:53 2020 daemon.err procd: [instance_fill_array] blob_attr *cur as JSON: { "core": "unlimited" } Failed restart: Fri May 22 09:22:05 2020 daemon.err procd: [instance_fill_array] blobmsg_type(cur): 2 (BLOBMSG_TYPE_TABLE) Fri May 22 09:22:05 2020 daemon.err procd: [instance_fill_array] BEFORE 75e300aeec25: Fri May 22 09:22:05 2020 daemon.err procd: [instance_fill_array] blob_len(cur): 36 Fri May 22 09:22:05 2020 daemon.err procd: [instance_fill_array] blobmsg_check_attr_len(cur, false, 36): 1 Fri May 22 09:22:05 2020 daemon.err procd: [instance_fill_array] [for_each_attr] blobmsg_name(sub): core blobmsg_type(sub): 3 (BLOBMSG_TYPE_STRING) Fri May 22 09:22:05 2020 daemon.err procd: [instance_fill_array] [for_each_attr] blobmsg_name(sub): blobmsg_type(sub): 5 (BLOBMSG_TYPE_INT32) Fri May 22 09:22:05 2020 daemon.err procd: [instance_fill_array] AFTER 75e300aeec25: Fri May 22 09:22:05 2020 daemon.err procd: [instance_fill_array] blobmsg_len(cur): 24 Fri May 22 09:22:05 2020 daemon.err procd: [instance_fill_array] blobmsg_check_attr_len(cur, false, 24): 1 Fri May 22 09:22:05 2020 daemon.err procd: [instance_fill_array] [for_each_attr] blobmsg_name(sub): core blobmsg_type(sub): 3 (BLOBMSG_TYPE_STRING) Fri May 22 09:22:05 2020 daemon.err procd: [instance_fill_array] blob_attr *cur as JSON: { "core": "unlimited" } Fri May 22 09:22:05 2020 daemon.err procd: [instance_fill_array] blobmsg_check_attr_list() failed As you can see, libubox fix resulted in blobmsg_check_array_len() actually iterating over sub-attr-s but also failing occasionally. [0] https://git.openwrt.org/?p=project/libubox.git;a=commitdiff;h=75e300aeec25e032a9778bea34c713969960d1f0 diff --git a/service/instance.c b/service/instance.c index e5615d5..95154c2 100644 --- a/service/instance.c +++ b/service/instance.c @@ -771,16 +771,64 @@ instance_fill_any(struct blobmsg_list *l, struct blob_attr *cur) blobmsg_list_fill(l, blobmsg_data(cur), blobmsg_data_len(cur), false); } +#include <libubox/blobmsg_json.h> +#include <libubox/blobmsg.h> + +static char *blobmsg_type_name(int type) +{ + switch (type) { + case BLOBMSG_TYPE_UNSPEC: return "BLOBMSG_TYPE_UNSPEC"; + case BLOBMSG_TYPE_ARRAY: return "BLOBMSG_TYPE_ARRAY"; + case BLOBMSG_TYPE_TABLE: return "BLOBMSG_TYPE_TABLE"; + case BLOBMSG_TYPE_STRING: return "BLOBMSG_TYPE_STRING"; + case BLOBMSG_TYPE_INT64: return "BLOBMSG_TYPE_INT64"; + case BLOBMSG_TYPE_INT32: return "BLOBMSG_TYPE_INT32"; + case BLOBMSG_TYPE_INT16: return "BLOBMSG_TYPE_INT16"; + case BLOBMSG_TYPE_INT8: return "BLOBMSG_TYPE_INT8"; + case BLOBMSG_TYPE_DOUBLE: return "BLOBMSG_TYPE_DOUBLE"; + default: return "ERROR"; + } +} + static bool instance_fill_array(struct blobmsg_list *l, struct blob_attr *cur, blobmsg_update_cb cb, bool array) { struct blobmsg_list_node *node; + struct blob_attr *sub; + char *json = NULL; + size_t len; if (!cur) return true; - if (!blobmsg_check_attr_list(cur, BLOBMSG_TYPE_STRING)) + ERROR("[%s] blobmsg_type(cur): %d (%s)\n", __func__, blobmsg_type(cur), blobmsg_type_name(blobmsg_type(cur))); + + len = blob_len(cur); + ERROR("[%s] BEFORE 75e300aeec25:\n", __func__); + ERROR("[%s] blob_len(cur): %d\n", __func__, len); + ERROR("[%s] blobmsg_check_attr_len(cur, false, %d): %d\n", __func__, len, blobmsg_check_attr_len(cur, false, len)); + __blobmsg_for_each_attr(sub, cur, len) { + ERROR("[%s] [for_each_attr] blobmsg_name(sub): %s blobmsg_type(sub): %d (%s)\n", __func__, blobmsg_name(sub), blobmsg_type(sub), blobmsg_type_name(blobmsg_type(sub))); + } + + len = blobmsg_len(cur); + ERROR("[%s] AFTER 75e300aeec25:\n", __func__); + ERROR("[%s] blobmsg_len(cur): %d\n", __func__, len); + ERROR("[%s] blobmsg_check_attr_len(cur, false, %d): %d\n", __func__, len, blobmsg_check_attr_len(cur, false, len)); + __blobmsg_for_each_attr(sub, cur, len) { + ERROR("[%s] [for_each_attr] blobmsg_name(sub): %s blobmsg_type(sub): %d (%s)\n", __func__, blobmsg_name(sub), blobmsg_type(sub), blobmsg_type_name(blobmsg_type(sub))); + } + + json = blobmsg_format_json_indent(cur, true, 0); + if (json) { + ERROR("[%s] blob_attr *cur as JSON: %s\n", __func__, json); + free(json); + } + + if (!blobmsg_check_attr_list(cur, BLOBMSG_TYPE_STRING)) { + ERROR("[%s] blobmsg_check_attr_list() failed\n", __func__); return false; + } blobmsg_list_fill(l, blobmsg_data(cur), blobmsg_data_len(cur), array); if (cb) { _______________________________________________ openwrt-devel mailing list openwrt-devel@lists.openwrt.org https://lists.openwrt.org/mailman/listinfo/openwrt-devel