This patch add a net filter between network backend and NIC devices. All packets will pass by this filter. TODO: multiqueue support.
+--------------+ +-------------+ +----------+ | filter | |frontend(NIC)| | peer+--> | | | | network <--+backend <-------+ peer | | backend | | peer +-------> | +----------+ +--------------+ +-------------+ Usage: -netdev tap,id=bn0 # you can use whatever backend as needed -netdev filter,id=f0,backend=bn0 -netdev filter-<plugin>,id=p0,filter=f0 -device e1000,netdev=f0 NOTE: You can attach multiple plugins to the filter, dynamically add/remove filter and filter-<plugin>. A filter without plugin supplied will do nothing except pass by all packets, a plugin like dump for example, will dump all packets into a file. Or other plugins like a netbuffer plugin, will simply buffer the packets, release the packets when needed. You can also implement whatever plugin you needed based on this filter. Signed-off-by: Yang Hongyang <yan...@cn.fujitsu.com> --- include/net/filter.h | 18 +++ include/net/net.h | 3 + net/Makefile.objs | 1 + net/clients.h | 3 + net/filter.c | 317 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/net.c | 20 +++- qapi-schema.json | 22 +++- 7 files changed, 377 insertions(+), 7 deletions(-) create mode 100644 include/net/filter.h create mode 100644 net/filter.c diff --git a/include/net/filter.h b/include/net/filter.h new file mode 100644 index 0000000..44ba10f --- /dev/null +++ b/include/net/filter.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2015 FUJITSU LIMITED + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_NET_FILTER_H +#define QEMU_NET_FILTER_H + +#include "qemu-common.h" +//#include "qapi-types.h" + +NetClientState *filter_backend(NetClientState *nc); +int filter_add_plugin(NetClientState *nc, NetClientState *plugin); +int filter_del_plugin(NetClientState *nc, NetClientState *plugin); + +#endif /* QEMU_NET_FILTER_H */ diff --git a/include/net/net.h b/include/net/net.h index 6a6cbef..250f365 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -45,6 +45,8 @@ typedef void (NetPoll)(NetClientState *, bool enable); typedef int (NetCanReceive)(NetClientState *); typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); +typedef ssize_t (NetReceiveFilter)(NetClientState *, NetClientState *, + unsigned, const uint8_t *, size_t); typedef void (NetCleanup) (NetClientState *); typedef void (LinkStatusChanged)(NetClientState *); typedef void (NetClientDestructor)(NetClientState *); @@ -64,6 +66,7 @@ typedef struct NetClientInfo { NetReceive *receive; NetReceive *receive_raw; NetReceiveIOV *receive_iov; + NetReceiveFilter *receive_filter; NetCanReceive *can_receive; NetCleanup *cleanup; LinkStatusChanged *link_status_changed; diff --git a/net/Makefile.objs b/net/Makefile.objs index ec19cb3..914aec0 100644 --- a/net/Makefile.objs +++ b/net/Makefile.objs @@ -13,3 +13,4 @@ common-obj-$(CONFIG_HAIKU) += tap-haiku.o common-obj-$(CONFIG_SLIRP) += slirp.o common-obj-$(CONFIG_VDE) += vde.o common-obj-$(CONFIG_NETMAP) += netmap.o +common-obj-y += filter.o diff --git a/net/clients.h b/net/clients.h index d47530e..bcfb34b 100644 --- a/net/clients.h +++ b/net/clients.h @@ -62,4 +62,7 @@ int net_init_netmap(const NetClientOptions *opts, const char *name, int net_init_vhost_user(const NetClientOptions *opts, const char *name, NetClientState *peer, Error **errp); +int net_init_filter(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp); + #endif /* QEMU_NET_CLIENTS_H */ diff --git a/net/filter.c b/net/filter.c new file mode 100644 index 0000000..89ee830 --- /dev/null +++ b/net/filter.c @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2015 FUJITSU LIMITED + * Author: Yang Hongyang <yan...@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "net/net.h" +#include "clients.h" +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "net/filter.h" + +typedef struct FilterPlugin FilterPlugin; +struct FilterPlugin { + QLIST_ENTRY(FilterPlugin) next; + NetClientState *plugin; +}; + +typedef struct FILTERState { + NetClientState nc; + NetClientState *backend; + QLIST_HEAD(, FilterPlugin) plugins; +} FILTERState; + +static ssize_t filter_receive(NetClientState *nc, NetClientState *sender, + unsigned flags, const uint8_t *data, size_t size) +{ + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); + NetClientState *backend = s->backend; + NetClientState *plugin = NULL; + FilterPlugin *plug = NULL; + ssize_t ret; + + /* + * let plugins handle the packet first, plugins return: + * 0: finished handling the packet, we should continue + * size: plugin stolen this packet, we stop pass this + * packet further + */ + QLIST_FOREACH(plug, &s->plugins, next) { + plugin = plug->plugin; + ret = plugin->info->receive_filter(plugin, sender, flags, data, size); + if (ret == 0) { + continue; + } else if (ret == size) { + /* plugin will take care of this packet */ + goto out; + } + } + + if (sender->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { + /* + * packet received from NIC, I'm a backend myself, so call receive + * directly + */ + if (flags & QEMU_NET_PACKET_FLAG_RAW && backend->info->receive_raw) { + ret = backend->info->receive_raw(backend, data, size); + } else { + ret = backend->info->receive(backend, data, size); + } + } else { + /* packet received from backend, pass the packet to NIC */ + if (flags & QEMU_NET_PACKET_FLAG_RAW) { + qemu_send_packet_raw(nc, data, size); + } else { + qemu_send_packet_async(nc, data, size, NULL); + } + /* + * packet been passed to NIC queue, we just return sucess for this + * deliver + */ + ret = size; + } + +out: + return ret; +} + +static void filter_cleanup(NetClientState *nc) +{ + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); + + if (s->backend && nc->peer) { + qemu_flush_queued_packets(nc); + s->backend->peer = nc->peer; + nc->peer->peer = s->backend; + } else if (s->backend) { + qemu_flush_queued_packets(nc); + s->backend->peer = NULL; + } else if (nc->peer) { + nc->peer->peer = NULL; + } + nc->peer = NULL; + + return; +} + +static bool filter_has_ufo(NetClientState *nc) +{ + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); + NetClientState *backend = s->backend; + + if (!backend->info->has_ufo) { + return false; + } + + return backend->info->has_ufo(backend); +} + +static bool filter_has_vnet_hdr(NetClientState *nc) +{ + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); + NetClientState *backend = s->backend; + + if (!backend->info->has_vnet_hdr) { + return false; + } + + return backend->info->has_vnet_hdr(backend); +} + +static bool filter_has_vnet_hdr_len(NetClientState *nc, int len) +{ + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); + NetClientState *backend = s->backend; + + if (!backend->info->has_vnet_hdr_len) { + return false; + } + + return backend->info->has_vnet_hdr_len(backend, len); +} + +static void filter_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr) +{ + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); + NetClientState *backend = s->backend; + + if (!backend->info->using_vnet_hdr) { + return; + } + + backend->info->using_vnet_hdr(backend, using_vnet_hdr); +} + +static void filter_set_offload(NetClientState *nc, int csum, int tso4, + int tso6, int ecn, int ufo) +{ + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); + NetClientState *backend = s->backend; + + if (!backend->info->set_offload) { + return; + } + + backend->info->set_offload(backend, csum, tso4, tso6, ecn, ufo); +} + +static void filter_set_vnet_hdr_len(NetClientState *nc, int len) +{ + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); + NetClientState *backend = s->backend; + + if (!backend->info->set_vnet_hdr_len) { + return; + } + + backend->info->set_vnet_hdr_len(backend, len); +} + +static int filter_set_vnet_le(NetClientState *nc, bool is_le) +{ + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); + NetClientState *backend = s->backend; + + if (!backend->info->set_vnet_le) { + return -ENOSYS; + } + + return backend->info->set_vnet_le(backend, is_le); +} + +static int filter_set_vnet_be(NetClientState *nc, bool is_be) +{ + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); + NetClientState *backend = s->backend; + + if (!backend->info->set_vnet_be) { + return -ENOSYS; + } + + return backend->info->set_vnet_be(backend, is_be); +} + +static NetClientInfo net_filter_info = { + .type = NET_CLIENT_OPTIONS_KIND_FILTER, + .size = sizeof(FILTERState), + .receive_filter = filter_receive, + .cleanup = filter_cleanup, + .has_ufo = filter_has_ufo, + .has_vnet_hdr = filter_has_vnet_hdr, + .has_vnet_hdr_len = filter_has_vnet_hdr_len, + .using_vnet_hdr = filter_using_vnet_hdr, + .set_offload = filter_set_offload, + .set_vnet_hdr_len = filter_set_vnet_hdr_len, + .set_vnet_le = filter_set_vnet_le, + .set_vnet_be = filter_set_vnet_be, +}; + +int net_init_filter(const NetClientOptions *opts, const char *name, + NetClientState *peer, Error **errp) +{ + NetClientState *nc; + NetClientState *ncs[MAX_QUEUE_NUM]; + FILTERState *s; + const NetdevFilterOptions *filter; + char *backend_id = NULL; + NetClientState *backend; + int queues; + + assert(opts->kind == NET_CLIENT_OPTIONS_KIND_FILTER); + filter = opts->filter; + assert(filter->has_backend); + + backend_id = filter->backend; + queues = qemu_find_net_clients_except(backend_id, ncs, + NET_CLIENT_OPTIONS_KIND_NIC, + MAX_QUEUE_NUM); + if (queues > 1) { + error_setg(errp, "multiqueues is not supported by now"); + return -1; + } else if (queues < 1) { + error_setg(errp, "invalid backend name specified"); + return -1; + } + + backend = ncs[0]; + if (backend->peer) { + switch (backend->peer->info->type) { + case NET_CLIENT_OPTIONS_KIND_FILTER: + error_setg(errp, "a filter already attached to the backend"); + return -1; + case NET_CLIENT_OPTIONS_KIND_NIC: + /* + * We only support filter between NIC and network + * backend. + */ + break; + default: + error_setg(errp, "not supported"); + return -1; + } + } + + nc = qemu_new_net_client(&net_filter_info, peer, "filter", name); + s = DO_UPCAST(FILTERState, nc, nc); + s->backend = backend; + + /* +--------------+ +-------------+ + * +----------+ | filter | |frontend(NIC)| + * | peer+--> | | | + * | network <--+backend <-------+ peer | + * | backend | | peer +-------> | + * +----------+ +--------------+ +-------------+ + */ + if (backend->peer) { + nc->peer = backend->peer; + nc->peer->peer = nc; + backend->peer = nc; + /* + * we need to flush backend incoming queue, because it is + * no longer used when filter inserted + */ + qemu_flush_queued_packets(backend); + } else { + backend->peer = nc; + } + + QLIST_INIT(&s->plugins); + + return 0; +} + +/* Public APIs */ +NetClientState *filter_backend(NetClientState *nc) +{ + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); + + return s->backend; +} + +int filter_add_plugin(NetClientState *nc, NetClientState *plugin) +{ + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); + FilterPlugin *plug; + + plug = g_malloc(sizeof(*plug)); + plug->plugin = plugin; + QLIST_INSERT_HEAD(&s->plugins, plug, next); + return 0; +} + +int filter_del_plugin(NetClientState *nc, NetClientState *plugin) +{ + FILTERState *s = DO_UPCAST(FILTERState, nc, nc); + FilterPlugin *plug = NULL; + + QLIST_FOREACH(plug, &s->plugins, next) { + if (plug->plugin == plugin) + break; + } + QLIST_REMOVE(plug, next); + g_free(plug); + return 0; +} diff --git a/net/net.c b/net/net.c index 28a5597..c273981 100644 --- a/net/net.c +++ b/net/net.c @@ -57,6 +57,7 @@ const char *host_net_devices[] = { "tap", "socket", "dump", + "filter", #ifdef CONFIG_NET_BRIDGE "bridge", #endif @@ -396,7 +397,8 @@ void qemu_del_net_client(NetClientState *nc) assert(queues != 0); /* If there is a peer NIC, delete and cleanup client, but do not free. */ - if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { + if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_NIC + && nc->info->type != NET_CLIENT_OPTIONS_KIND_FILTER) { NICState *nic = qemu_get_nic(nc->peer); if (nic->peer_deleted) { return; @@ -571,7 +573,9 @@ ssize_t qemu_deliver_packet(NetClientState *sender, return 0; } - if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { + if (nc->info->receive_filter) { + ret = nc->info->receive_filter(nc, sender, flags, data, size); + } else if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { ret = nc->info->receive_raw(nc, data, size); } else { ret = nc->info->receive(nc, data, size); @@ -659,7 +663,8 @@ ssize_t qemu_send_packet_raw(NetClientState *nc, const uint8_t *buf, int size) buf, size, NULL); } -static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov, +static ssize_t nc_sendv_compat(NetClientState *nc, NetClientState *sender, + unsigned flags, const struct iovec *iov, int iovcnt) { uint8_t buffer[NET_BUFSIZE]; @@ -667,7 +672,11 @@ static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov, offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer)); - return nc->info->receive(nc, buffer, offset); + if (nc->info->receive_filter) { + return nc->info->receive_filter(nc, sender, flags, buffer, offset); + } else { + return nc->info->receive(nc, buffer, offset); + } } ssize_t qemu_deliver_packet_iov(NetClientState *sender, @@ -690,7 +699,7 @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender, if (nc->info->receive_iov) { ret = nc->info->receive_iov(nc, iov, iovcnt); } else { - ret = nc_sendv_compat(nc, iov, iovcnt); + ret = nc_sendv_compat(nc, sender, flags, iov, iovcnt); } if (ret == 0) { @@ -886,6 +895,7 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])( const char *name, NetClientState *peer, Error **errp) = { [NET_CLIENT_OPTIONS_KIND_NIC] = net_init_nic, + [NET_CLIENT_OPTIONS_KIND_FILTER] = net_init_filter, #ifdef CONFIG_SLIRP [NET_CLIENT_OPTIONS_KIND_USER] = net_init_slirp, #endif diff --git a/qapi-schema.json b/qapi-schema.json index a0a45f7..9244c88 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2063,7 +2063,7 @@ # Add a network backend. # # @type: the type of network backend. Current valid values are 'user', 'tap', -# 'vde', 'socket', 'dump' and 'bridge' +# 'vde', 'socket', 'dump' , 'bridge' and 'filter' # # @id: the name of the new network backend # @@ -2474,6 +2474,23 @@ '*vhostforce': 'bool' } } ## +# @NetdevFilterOptions +# +# A net filter between network backend and NIC device +# +# @backend: the network backend. +# +# NOTE: used with filterplugin netdevs which provide filter rules. +# by default, if no filterplugin is supplied, the net filter will do +# nothing but pass all packets to network backend. +# +# Since 2.5 +## +{ 'struct': 'NetdevFilterOptions', + 'data': { + '*backend': 'str' } } + +## # @NetClientOptions # # A discriminated record of network device traits. @@ -2496,7 +2513,8 @@ 'bridge': 'NetdevBridgeOptions', 'hubport': 'NetdevHubPortOptions', 'netmap': 'NetdevNetmapOptions', - 'vhost-user': 'NetdevVhostUserOptions' } } + 'vhost-user': 'NetdevVhostUserOptions', + 'filter': 'NetdevFilterOptions'} } ## # @NetLegacy -- 1.9.1