On 07/22/2015 06:55 PM, Yang Hongyang wrote: > This patch add a net filter between network backend and NIC devices. > All packets will pass by this filter. > TODO: > multiqueue support. > plugin support. > > +--------------+ +-------------+ > +----------+ | filter | |frontend(NIC)| > | real | | | | | > | network <--+backend <-------+ | > | backend | | peer +-------> peer | > +----------+ +--------------+ +-------------+ > > Usage: > -netdev tap,id=bn0 # you can use whatever backend as needed > -netdev filter,id=f0,backend=bn0,plugin=dump > -device e1000,netdev=f0 > > Signed-off-by: Yang Hongyang <yan...@cn.fujitsu.com>
Hi: Several questions: - Looks like we can do more than filter, so may be something like traffic control or other is more suitable? - What's the advantages of introducing a new type of netdev? As far as I can see, just replace the dump function in Tomas' series with a configurable function pointer will do the trick? (Probably with some monitor commands). And then you won't even need to deal with vnet hder and offload stuffs? - I'm not sure the value of doing this especially consider host (linux) has much more functional and powerful traffic control system. Thanks. > --- > include/net/net.h | 3 + > net/Makefile.objs | 1 + > net/clients.h | 3 + > net/filter.c | 200 > ++++++++++++++++++++++++++++++++++++++++++++++++++++++ > net/net.c | 6 +- > qapi-schema.json | 23 ++++++- > 6 files changed, 233 insertions(+), 3 deletions(-) > create mode 100644 net/filter.c > > diff --git a/include/net/net.h b/include/net/net.h > index 6a6cbef..250f365 100644 > --- a/include/net/net.h > +++ b/include/net/net.h > @@ -45,6 +45,8 @@ typedef void (NetPoll)(NetClientState *, bool enable); > typedef int (NetCanReceive)(NetClientState *); > typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); > typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); > +typedef ssize_t (NetReceiveFilter)(NetClientState *, NetClientState *, > + unsigned, const uint8_t *, size_t); > typedef void (NetCleanup) (NetClientState *); > typedef void (LinkStatusChanged)(NetClientState *); > typedef void (NetClientDestructor)(NetClientState *); > @@ -64,6 +66,7 @@ typedef struct NetClientInfo { > NetReceive *receive; > NetReceive *receive_raw; > NetReceiveIOV *receive_iov; > + NetReceiveFilter *receive_filter; > NetCanReceive *can_receive; > NetCleanup *cleanup; > LinkStatusChanged *link_status_changed; > diff --git a/net/Makefile.objs b/net/Makefile.objs > index ec19cb3..914aec0 100644 > --- a/net/Makefile.objs > +++ b/net/Makefile.objs > @@ -13,3 +13,4 @@ common-obj-$(CONFIG_HAIKU) += tap-haiku.o > common-obj-$(CONFIG_SLIRP) += slirp.o > common-obj-$(CONFIG_VDE) += vde.o > common-obj-$(CONFIG_NETMAP) += netmap.o > +common-obj-y += filter.o > diff --git a/net/clients.h b/net/clients.h > index d47530e..bcfb34b 100644 > --- a/net/clients.h > +++ b/net/clients.h > @@ -62,4 +62,7 @@ int net_init_netmap(const NetClientOptions *opts, const > char *name, > int net_init_vhost_user(const NetClientOptions *opts, const char *name, > NetClientState *peer, Error **errp); > > +int net_init_filter(const NetClientOptions *opts, const char *name, > + NetClientState *peer, Error **errp); > + > #endif /* QEMU_NET_CLIENTS_H */ > diff --git a/net/filter.c b/net/filter.c > new file mode 100644 > index 0000000..006c64a > --- /dev/null > +++ b/net/filter.c > @@ -0,0 +1,200 @@ > +/* > + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) > + * (a.k.a. Fault Tolerance or Continuous Replication) > + * > + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO., LTD. > + * Copyright (c) 2015 FUJITSU LIMITED > + * Copyright (c) 2015 Intel Corporation > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or > + * later. See the COPYING file in the top-level directory. > + */ > + > +#include "net/net.h" > +#include "clients.h" > +#include "qemu-common.h" > +#include "qemu/error-report.h" > + > +typedef struct FILTERState { > + NetClientState nc; > + NetClientState *backend; > +} FILTERState; > + > +static ssize_t filter_receive(NetClientState *nc, NetClientState *sender, > + unsigned flags, const uint8_t *data, size_t > size) > +{ > + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); > + NetClientState *issued_nc = NULL; > + ssize_t ret; > + > + if (sender->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { > + /* packet received from NIC */ > + printf("packet received from NIC!!!\n"); > + issued_nc = s->backend; > + } else { > + /* packet received from backend */ > + printf("packet received from backend!!!\n"); > + issued_nc = nc->peer; > + } > + > + if (flags & QEMU_NET_PACKET_FLAG_RAW && issued_nc->info->receive_raw) { > + ret = issued_nc->info->receive_raw(issued_nc, data, size); > + } else { > + ret = issued_nc->info->receive(issued_nc, data, size); > + } > + > + return ret; > +} > + > +static void filter_cleanup(NetClientState *nc) > +{ > + return; > +} > + > +static bool filter_has_ufo(NetClientState *nc) > +{ > + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); > + NetClientState *backend = s->backend; > + > + if (!backend->info->has_ufo) { > + return false; > + } > + > + return backend->info->has_ufo(backend); > +} > + > +static bool filter_has_vnet_hdr(NetClientState *nc) > +{ > + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); > + NetClientState *backend = s->backend; > + > + if (!backend->info->has_vnet_hdr) { > + return false; > + } > + > + return backend->info->has_vnet_hdr(backend); > +} > + > +static bool filter_has_vnet_hdr_len(NetClientState *nc, int len) > +{ > + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); > + NetClientState *backend = s->backend; > + > + if (!backend->info->has_vnet_hdr_len) { > + return false; > + } > + > + return backend->info->has_vnet_hdr_len(backend, len); > +} > + > +static void filter_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr) > +{ > + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); > + NetClientState *backend = s->backend; > + > + if (!backend->info->using_vnet_hdr) { > + return; > + } > + > + backend->info->using_vnet_hdr(backend, using_vnet_hdr); > +} > + > +static void filter_set_offload(NetClientState *nc, int csum, int tso4, > + int tso6, int ecn, int ufo) > +{ > + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); > + NetClientState *backend = s->backend; > + > + if (!backend->info->set_offload) { > + return; > + } > + > + backend->info->set_offload(backend, csum, tso4, tso6, ecn, ufo); > +} > + > +static void filter_set_vnet_hdr_len(NetClientState *nc, int len) > +{ > + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); > + NetClientState *backend = s->backend; > + > + if (!backend->info->set_vnet_hdr_len) { > + return; > + } > + > + backend->info->set_vnet_hdr_len(backend, len); > +} > + > +static int filter_set_vnet_le(NetClientState *nc, bool is_le) > +{ > + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); > + NetClientState *backend = s->backend; > + > + if (!backend->info->set_vnet_le) { > + return -ENOSYS; > + } > + > + return backend->info->set_vnet_le(backend, is_le); > +} > + > +static int filter_set_vnet_be(NetClientState *nc, bool is_be) > +{ > + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); > + NetClientState *backend = s->backend; > + > + if (!backend->info->set_vnet_be) { > + return -ENOSYS; > + } > + > + return backend->info->set_vnet_be(backend, is_be); > +} > + > +static NetClientInfo net_filter_info = { > + .type = NET_CLIENT_OPTIONS_KIND_FILTER, > + .size = sizeof(FILTERState), > + .receive_filter = filter_receive, > + .cleanup = filter_cleanup, > + .has_ufo = filter_has_ufo, > + .has_vnet_hdr = filter_has_vnet_hdr, > + .has_vnet_hdr_len = filter_has_vnet_hdr_len, > + .using_vnet_hdr = filter_using_vnet_hdr, > + .set_offload = filter_set_offload, > + .set_vnet_hdr_len = filter_set_vnet_hdr_len, > + .set_vnet_le = filter_set_vnet_le, > + .set_vnet_be = filter_set_vnet_be, > +}; > + > +int net_init_filter(const NetClientOptions *opts, const char *name, > + NetClientState *peer, Error **errp) > +{ > + NetClientState *nc; > + FILTERState *s; > + const NetdevFilterOptions *filter; > + char *backend_id = NULL; > + /* char *plugin = NULL; */ > + > + assert(opts->kind == NET_CLIENT_OPTIONS_KIND_FILTER); > + filter = opts->filter; > + assert(filter->has_backend); > + > + backend_id = filter->backend; > + /* plugin = filter->has_plugin ? filter->plugin : NULL; */ > + > + nc = qemu_new_net_client(&net_filter_info, peer, "filter", name); > + /* > + * TODO: Both backend and frontend packets will use this queue, we > + * double this queue's maxlen > + */ > + s = DO_UPCAST(FILTERState, nc, nc); > + s->backend = qemu_find_netdev(backend_id); > + if (!s->backend) { > + error_setg(errp, "invalid backend name specified"); > + return -1; > + } > + > + s->backend->peer = nc; > + /* > + * TODO: > + * init filter plugin > + */ > + return 0; > +} > diff --git a/net/net.c b/net/net.c > index 28a5597..466c6ff 100644 > --- a/net/net.c > +++ b/net/net.c > @@ -57,6 +57,7 @@ const char *host_net_devices[] = { > "tap", > "socket", > "dump", > + "filter", > #ifdef CONFIG_NET_BRIDGE > "bridge", > #endif > @@ -571,7 +572,9 @@ ssize_t qemu_deliver_packet(NetClientState *sender, > return 0; > } > > - if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { > + if (nc->info->receive_filter) { > + ret = nc->info->receive_filter(nc, sender, flags, data, size); > + } else if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { > ret = nc->info->receive_raw(nc, data, size); > } else { > ret = nc->info->receive(nc, data, size); > @@ -886,6 +889,7 @@ static int (* const > net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])( > const char *name, > NetClientState *peer, Error **errp) = { > [NET_CLIENT_OPTIONS_KIND_NIC] = net_init_nic, > + [NET_CLIENT_OPTIONS_KIND_FILTER] = net_init_filter, > #ifdef CONFIG_SLIRP > [NET_CLIENT_OPTIONS_KIND_USER] = net_init_slirp, > #endif > diff --git a/qapi-schema.json b/qapi-schema.json > index a0a45f7..3329973 100644 > --- a/qapi-schema.json > +++ b/qapi-schema.json > @@ -2063,7 +2063,7 @@ > # Add a network backend. > # > # @type: the type of network backend. Current valid values are 'user', > 'tap', > -# 'vde', 'socket', 'dump' and 'bridge' > +# 'vde', 'socket', 'dump' , 'bridge' and 'filter' > # > # @id: the name of the new network backend > # > @@ -2474,6 +2474,24 @@ > '*vhostforce': 'bool' } } > > ## > +# @NetdevFilterOptions > +# > +# A net filter between network backend and NIC device > +# > +# @plugin: #optional a plugin represent a set of filter rules, > +# by default, if no plugin is supplied, the net filter will do > +# nothing but pass all packets to network backend. > +# > +# @backend: the network backend. > +# > +# Since 2.5 > +## > +{ 'struct': 'NetdevFilterOptions', > + 'data': { > + '*plugin': 'str', > + '*backend': 'str' } } > + > +## > # @NetClientOptions > # > # A discriminated record of network device traits. > @@ -2496,7 +2514,8 @@ > 'bridge': 'NetdevBridgeOptions', > 'hubport': 'NetdevHubPortOptions', > 'netmap': 'NetdevNetmapOptions', > - 'vhost-user': 'NetdevVhostUserOptions' } } > + 'vhost-user': 'NetdevVhostUserOptions', > + 'filter': 'NetdevFilterOptions'} } > > ## > # @NetLegacy