Quoting Dan Smith ([email protected]):
> Guilt dropped the new checkpoint_dev.c file when I switched to the
> newer branch.  Sorry about that.  Updated patch included below.

(Just a few comments on a cursory look.  Will take a closer look
later)

> +int ckpt_netdev_inet_addrs(struct in_device *indev,
> +                        struct ckpt_netdev_addr *_abuf[])
> +{
> +     struct ckpt_netdev_addr *abuf = NULL;
> +     struct in_ifaddr *addr = indev->ifa_list;
> +     int pages = 0;
> +     int addrs = 0;
> +     int max;
> +
> +     read_lock(&dev_base_lock);
> + retry:
> +     if (++pages > 4) {
> +             addrs = -ENOMEM;
> +             goto out;
> +     }
> +
> +     *_abuf = krealloc(abuf, PAGE_SIZE * pages, GFP_KERNEL);

rw_lockt is effectively a spinlock, so I don't think you can sleep
here.

> +     if (*_abuf == NULL) {
> +             addrs = -ENOMEM;
> +             goto out;
> +     }
> +     abuf = *_abuf;
> +
> +     max = (pages * PAGE_SIZE) / sizeof(*abuf);
> +     while (addr) {
> +             abuf[addrs].type = CKPT_NETDEV_ADDR_IPV4; /* Only IPv4 now */
> +             abuf[addrs].inet4_local = addr->ifa_local;
> +             abuf[addrs].inet4_address = addr->ifa_address;
> +             abuf[addrs].inet4_mask = addr->ifa_mask;
> +             abuf[addrs].inet4_broadcast = addr->ifa_broadcast;
> +
> +             addr = addr->ifa_next;
> +             if (++addrs >= max)
> +                     goto retry;
> +     }
> +
> + out:
> +     read_unlock(&dev_base_lock);
> +
> +     if (addrs < 0) {
> +             kfree(abuf);
> +             *_abuf = NULL;
> +     }
> +
> +     return addrs;
> +}
> +
> +struct ckpt_hdr_netdev *ckpt_netdev_base(struct ckpt_ctx *ctx,
> +                                      struct net_device *dev,
> +                                      struct ckpt_netdev_addr *addrs[])
> +{
> +     struct ckpt_hdr_netdev *h;
> +     int ret;
> +
> +     h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_NETDEV);
> +     if (!h)
> +             return ERR_PTR(-ENOMEM);
> +
> +     ret = ckpt_netdev_hwaddr(dev, h);
> +     if (ret < 0)
> +             goto out;
> +
> +     *addrs = NULL;
> +     ret = h->inet_addrs = ckpt_netdev_inet_addrs(dev->ip_ptr, addrs);
> +     if (ret < 0)
> +             goto out;
> +
> +     ret = h->netns_ref = checkpoint_obj(ctx, dev->nd_net, CKPT_OBJ_NET_NS);
> + out:
> +     if (ret < 0) {
> +             ckpt_hdr_put(ctx, h);
> +             h = ERR_PTR(ret);
> +             if (*addrs)
> +                     kfree(*addrs);
> +     }
> +
> +     return h;
> +}
> +
> +int checkpoint_netdev(struct ckpt_ctx *ctx, void *ptr)
> +{
> +     struct net_device *dev = (struct net_device *)ptr;
> +
> +     if (!dev->netdev_ops->ndo_checkpoint)
> +             return -EINVAL;
> +
> +     ckpt_debug("checkpointing netdev %s\n", dev->name);
> +
> +     return dev->netdev_ops->ndo_checkpoint(ctx, dev);
> +}
> +
> +int checkpoint_netns(struct ckpt_ctx *ctx, void *ptr)
> +{
> +     struct net *net = ptr;
> +     struct net_device *dev;
> +     struct ckpt_hdr_netns *h;
> +     int ret;
> +
> +     h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_NET_NS);
> +     if (!h)
> +             return -ENOMEM;
> +
> +     h->this_ref = ckpt_obj_lookup(ctx, net, CKPT_OBJ_NET_NS);
> +     BUG_ON(h->this_ref == 0);
> +
> +     ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
> +     if (ret < 0)
> +             goto out;
> +
> +     for_each_netdev(net, dev) {
> +             if (!dev->netdev_ops->ndo_checkpoint)
> +                     continue;

Won't the checkpoint_obj() call checkpoint_netdev(), which will return
-EINVAL if ndo_checkpoint is not defined?  But here you skip the
checkpoint_obj() call (which seems wrong to me).  Which do you want to
have happen?

> +             ret = checkpoint_obj(ctx, dev, CKPT_OBJ_NETDEV);
> +             if (ret < 0)
> +                     break;
> +     }
> + out:
> +     ckpt_hdr_put(ctx, h);
> +
> +     return ret;
> +}
> +
> +static int restore_in_addrs(struct ckpt_ctx *ctx,
> +                         __u32 naddrs,
> +                         struct net *net,
> +                         struct net_device *dev)
> +{
> +     __u32 i;
> +     int ret = 0;
> +     int len = naddrs * sizeof(struct ckpt_netdev_addr);
> +     struct ckpt_netdev_addr *addrs = NULL;
> +
> +     addrs = kmalloc(len, GFP_KERNEL);
> +     if (!addrs)
> +             return -ENOMEM;
> +
> +     ret = _ckpt_read_buffer(ctx, addrs, len);
> +     if (ret < 0)
> +             goto out;
> +
> +     for (i = 0; i < naddrs; i++) {
> +             struct ckpt_netdev_addr *addr = &addrs[i];
> +             struct ifreq req;
> +             struct sockaddr_in *inaddr;
> +
> +             if (addr->type != CKPT_NETDEV_ADDR_IPV4) {
> +                     ret = -EINVAL;
> +                     ckpt_err(ctx, ret, "Unsupported netdev addr type %i\n",
> +                              addr->type);
> +                     break;
> +             }
> +
> +             ckpt_debug("restoring %s: %x/%x/%x\n", dev->name,
> +                        addr->inet4_address,
> +                        addr->inet4_mask,
> +                        addr->inet4_broadcast);
> +
> +             memcpy(req.ifr_name, dev->name, IFNAMSIZ);
> +
> +             inaddr = (struct sockaddr_in *)&req.ifr_addr;
> +             inaddr->sin_addr.s_addr = addr->inet4_address;
> +             inaddr->sin_family = AF_INET;
> +             ret = __kern_devinet_ioctl(net, SIOCSIFADDR, &req);
> +             if (ret < 0) {
> +                     ckpt_err(ctx, ret, "Failed to set address\n");
> +                     break;
> +             }
> +
> +             inaddr = (struct sockaddr_in *)&req.ifr_addr;
> +             inaddr->sin_addr.s_addr = addr->inet4_mask;
> +             inaddr->sin_family = AF_INET;
> +             ret = __kern_devinet_ioctl(net, SIOCSIFNETMASK, &req);
> +             if (ret < 0) {
> +                     ckpt_err(ctx, ret, "Failed to set netmask\n");
> +                     break;
> +             }
> +
> +             inaddr = (struct sockaddr_in *)&req.ifr_addr;
> +             inaddr->sin_addr.s_addr = addr->inet4_broadcast;
> +             inaddr->sin_family = AF_INET;
> +             ret = __kern_devinet_ioctl(net, SIOCSIFBRDADDR, &req);
> +             if (ret < 0) {
> +                     ckpt_err(ctx, ret, "Failed to set broadcast\n");
> +                     break;
> +             }
> +     }
> +
> + out:
> +     kfree(addrs);
> +
> +     return ret;
> +}
> +
> +static int veth_peer_data(struct sk_buff *skb, char *peer_name)
> +{
> +     struct nlattr *linkdata;
> +     struct ifinfomsg ifm;
> +
> +     linkdata = nla_nest_start(skb, IFLA_INFO_DATA);
> +     if (!linkdata)
> +             return -ENOMEM;
> +
> +     nla_put(skb, VETH_INFO_PEER, sizeof(ifm), &ifm);
> +     nla_put_string(skb, IFLA_IFNAME, peer_name);
> +
> +     nla_nest_end(skb, linkdata);
> +
> +     return 0;
> +}
> +
> +static struct sk_buff *new_link_message(char *this_name, char *peer_name)
> +{
> +     int ret = -ENOMEM;
> +     int flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ACK;
> +     struct nlmsghdr *nlh;
> +     struct sk_buff *skb;
> +     struct ifinfomsg *ifm;
> +     struct nlattr *linkinfo;
> +
> +     skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> +     if (!skb)
> +             goto out;
> +
> +     nlh = nlmsg_put(skb, 0, 0, RTM_NEWLINK, sizeof(*ifm), flags);
> +     if (!nlh)
> +             goto out;
> +
> +     ifm = nlmsg_data(nlh);
> +     memset(ifm, 0, sizeof(*ifm));
> +
> +     ret = nla_put_string(skb, IFLA_IFNAME, this_name);
> +     if (ret)
> +             goto out;
> +
> +     ret = -ENOMEM;
> +
> +     linkinfo = nla_nest_start(skb, IFLA_LINKINFO);
> +     if (!linkinfo)
> +             goto out;
> +
> +     if (nla_put_string(skb, IFLA_INFO_KIND, "veth") < 0)
> +             goto out;
> +
> +     ret = veth_peer_data(skb, peer_name);

By hard-coding veth stuff into generic-sounding functions in
net/checkpoint_dev.c you seem to be assuming that only veth will
ever be supported for checkpoint/restart?  what about macvlan?
(Not to mention that eventually we intend to support moving
physical nics into containers)

-serge
_______________________________________________
Containers mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel

Reply via email to