Re: [ovs-dev] [PATCH 4/4] User-Space MPLS actions and matches

2012-10-10 Thread Simon Horman
On Wed, Oct 10, 2012 at 12:16:15PM +0900, Isaku Yamahata wrote:
> On Tue, Oct 09, 2012 at 04:08:35PM +0900, Simon Horman wrote:
> > @@ -938,6 +990,15 @@ mf_is_value_valid(const struct mf_field *mf, const 
> > union mf_value *value)
> >  case MFF_IPV6_LABEL:
> >  return !(value->be32 & ~htonl(IPV6_LABEL_MASK));
> >  
> > +case MFF_MPLS_LABEL:
> > +return !(value->be32 & ~htonl(MPLS_LABEL_MASK >> 
> > MPLS_LABEL_SHIFT));
> > +
> > +case MFF_MPLS_TC:
> > +return !(value->u8 & ~7);
> > +
> > +case MFF_MPLS_BOS:
> > +return !(value->u8 & ~1);
> > +
> 
> Minor nitpick. MPLS_xxx_MASK >> MPLS_xxx_SHIFT.
> 
> > @@ -1794,6 +1906,18 @@ mf_random_value(const struct mf_field *mf, union 
> > mf_value *value)
> >  value->u8 &= 0x07;
> >  break;
> >  
> > +case MFF_MPLS_LABEL:
> > +value->be32 &= htonl(MPLS_LABEL_MASK >> MPLS_LABEL_SHIFT);
> > +break;
> > +
> > +case MFF_MPLS_TC:
> > +value->u8 &= 0x07;
> > +break;
> > +
> > +case MFF_MPLS_BOS:
> > +value->u8 &= 0x01;
> > +break;
> > +
> 
> ditto.

Thanks, I will fix those.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 2/4] user-space datapath: Add basic MPLS support to kernel

2012-10-10 Thread Simon Horman
On Wed, Oct 10, 2012 at 11:59:26AM +0900, Isaku Yamahata wrote:
> On Tue, Oct 09, 2012 at 04:08:33PM +0900, Simon Horman wrote:
> > This is to match the kernel implementation in the
> > patch "datapath: Add basic MPLS support to kernel"
> > by Leo Alterman.
> > 
> > Cc: Leo Alterman 
> > Signed-off-by: Simon Horman 
> > 
> > ---
> > 
> > v2.2
> > * No change
> > 
> > v2.1
> > * Initial post
> > ---
> >  lib/dpif-netdev.c |   13 +
> >  lib/odp-util.c|   43 +++
> >  2 files changed, 56 insertions(+)
> > 
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index c9e3210..6deb9ff 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -1212,6 +1212,7 @@ execute_set_action(struct ofpbuf *packet, const 
> > struct nlattr *a)
> >   case OVS_KEY_ATTR_ICMPV6:
> >   case OVS_KEY_ATTR_ARP:
> >   case OVS_KEY_ATTR_ND:
> > + case OVS_KEY_ATTR_MPLS:
> >   case __OVS_KEY_ATTR_MAX:
> >   default:
> >  NOT_REACHED();
> > @@ -1249,6 +1250,18 @@ dp_netdev_execute_actions(struct dp_netdev *dp,
> >  eth_pop_vlan(packet);
> >  break;
> >  
> > +case OVS_ACTION_ATTR_PUSH_MPLS:
> > +push_mpls(packet, nl_attr_get_be16(a));
> > +break;
> > +
> > +case OVS_ACTION_ATTR_POP_MPLS:
> > + pop_mpls(packet, nl_attr_get_be16(a));
> > + break;
> > +
> > +case OVS_ACTION_ATTR_SET_MPLS:
> > + set_mpls_lse(packet, nl_attr_get_be32(a));
> > + break;
> > +
> >  case OVS_ACTION_ATTR_SET:
> >  execute_set_action(packet, nl_attr_get(a));
> >  break;
> > diff --git a/lib/odp-util.c b/lib/odp-util.c
> > index 257d7a7..13b3248 100644
> > --- a/lib/odp-util.c
> > +++ b/lib/odp-util.c
> > @@ -73,6 +73,9 @@ odp_action_len(uint16_t type)
> >  case OVS_ACTION_ATTR_USERSPACE: return -2;
> >  case OVS_ACTION_ATTR_PUSH_VLAN: return sizeof(struct 
> > ovs_action_push_vlan);
> >  case OVS_ACTION_ATTR_POP_VLAN: return 0;
> > +case OVS_ACTION_ATTR_PUSH_MPLS: return sizeof(struct 
> > ovs_action_push_mpls);
> > +case OVS_ACTION_ATTR_POP_MPLS: return sizeof(ovs_be16);
> > +case OVS_ACTION_ATTR_SET_MPLS: return sizeof(ovs_be32);
> >  case OVS_ACTION_ATTR_SET: return -2;
> >  case OVS_ACTION_ATTR_SAMPLE: return -2;
> >  
> > @@ -106,6 +109,7 @@ ovs_key_attr_to_string(enum ovs_key_attr attr)
> >  case OVS_KEY_ATTR_ARP: return "arp";
> >  case OVS_KEY_ATTR_ND: return "nd";
> >  case OVS_KEY_ATTR_TUN_ID: return "tun_id";
> > +case OVS_KEY_ATTR_MPLS: return "mpls";
> >  
> >  case __OVS_KEY_ATTR_MAX:
> >  default:
> > @@ -273,6 +277,17 @@ format_vlan_tci(struct ds *ds, ovs_be16 vlan_tci)
> >  }
> >  
> >  static void
> > +format_mpls_lse(struct ds *ds, ovs_be32 mpls_lse)
> > +{
> > +ds_put_format(ds, "label=%"PRIu32",tc=%d,ttl=%d,bos=%d",
> > +  mpls_lse_to_label(mpls_lse),
> > +  mpls_lse_to_tc(mpls_lse),
> > +  mpls_lse_to_ttl(mpls_lse),
> > +  mpls_lse_to_stack(mpls_lse));
> > +}
> > +
> > +
> > +static void
> >  format_odp_action(struct ds *ds, const struct nlattr *a)
> >  {
> >  int expected_len;
> > @@ -311,6 +326,25 @@ format_odp_action(struct ds *ds, const struct nlattr 
> > *a)
> >  case OVS_ACTION_ATTR_POP_VLAN:
> >  ds_put_cstr(ds, "pop_vlan");
> >  break;
> > +case OVS_ACTION_ATTR_PUSH_MPLS: {
> > +const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
> > +ds_put_cstr(ds, "push_mpls(");
> > +format_mpls_lse(ds, mpls->mpls_label);
> > +ds_put_format(ds, "eth_type=0x%"PRIx16")", 
> > ntohs(mpls->mpls_ethertype));
> > +break;
> > +}
> > +case OVS_ACTION_ATTR_POP_MPLS: {
> > +ovs_be16 ethertype = nl_attr_get_be16(a);
> > +ds_put_format(ds, "pop_mpls(eth_type=0x%"PRIx16")", 
> > ntohs(ethertype));
> > +break;
> > +   }
> 
> Minor nitpick. the position of '}'
> 
> thanks,

Oops, I'll fix that.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 1/4] datapath: Add basic MPLS support to kernel

2012-10-10 Thread Simon Horman
On Wed, Oct 10, 2012 at 11:57:15AM +0900, Isaku Yamahata wrote:
> On Tue, Oct 09, 2012 at 04:08:32PM +0900, Simon Horman wrote:
> > +static int push_mpls(struct sk_buff *skb, const struct 
> > ovs_action_push_mpls *mpls)
> > +{
> > +   u32 l2_size;
> > +   __be32 *new_mpls_label;
> > +
> > +   if (skb_cow_head(skb, MPLS_HLEN) < 0) {
> > +   kfree_skb(skb);
> > +   return -ENOMEM;
> > +   }
> > +
> > +   l2_size = skb_cb_mpls_stack_offset(skb);
> > +   skb_push(skb, MPLS_HLEN);
> > +   memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), l2_size);
> > +   skb_reset_mac_header(skb);
> > +
> > +   new_mpls_label = (__be32 *)(skb_mac_header(skb) + l2_size);
> > +   *new_mpls_label = mpls->mpls_label;
> > +
> > +   set_ethertype(skb, mpls->mpls_ethertype);
> > +   return 0;
> > +}
> > +
> > +static int pop_mpls(struct sk_buff *skb, const __be16 *ethertype)
> > +{
> > +   __be16 current_ethertype = get_ethertype(skb);
> > +   if (current_ethertype == htons(ETH_P_MPLS_UC) ||
> > +   current_ethertype == htons(ETH_P_MPLS_MC)) {
> > +   u32 l2_size = skb_cb_mpls_stack_offset(skb);
> > +
> > +   memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), 
> > l2_size);
> > +
> > +   skb_pull(skb, MPLS_HLEN);
> > +   skb_reset_mac_header(skb);
> > +
> > +   set_ethertype(skb, *ethertype);
> > +   }
> > +   return 0;
> > +}
> > +
> > +static int set_mpls(struct sk_buff *skb, const __be32 *mpls_label)
> > +{
> > +   __be16 current_ethertype = get_ethertype(skb);
> > +   if (current_ethertype == htons(ETH_P_MPLS_UC) ||
> > +   current_ethertype == htons(ETH_P_MPLS_MC)) {
> > +   memcpy(skb_cb_mpls_stack(skb), mpls_label, sizeof(__be32));
> > +   }
> > +   return 0;
> > +}
> > +
> 
> push_mpls and set_mpls overwrite BOS bit. So it can produce corrupted packet.
> Is this by design?

My personal opinion is that it is up to the caller of set_mpls()
to provide a valid label. However, I'm happy to discuss other ideas.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] Changing proc title on FreeBSD

2012-10-10 Thread Ed Maste
On 9 October 2012 18:07, Ben Pfaff  wrote:
> On Tue, Oct 09, 2012 at 06:03:32PM -0400, Ed Maste wrote:
>>...
>>
>> Interesting - I didn't notice this code before.
>>
>> On FreeBSD we have setproctitle(const char *fmt, ...) for this
>> functionality, which has the same interface as proctitle_set.
>> Unfortunately there's no decent way to pass the varargs through.
>>
>> I could add an #elif defined(__FreeBSD__) case to command-line.c and
>> use a temporary buffer in my proctitle_set implementation, or could
>> just #define proctitle_set setproctitle in command-line.h.  Thoughts
>> on which is preferable?
>
> I'm happy enough with the latter; it's simpler, as long as it works.

Ahh, so there's one small caveat.  setproctitle internally prepends
the "program_name: " to the provided string so the resulting proctitle
ends up like:

ovs-vswitchd: ovs-vswitchd: worker process for pid 24853 (ovs-vswitchd)

The current proctitle_set callers already use this format and it seems
to be a reasonable restriction, so it would be possible to move the
program_name logic into proctitle_set for Linux as well.  If that's
undesirable I'll use temporary buffer in the FreeBSD implementation
and strip the program name back off before calling setproctitle.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] Changing proc title on FreeBSD

2012-10-10 Thread Ben Pfaff
On Wed, Oct 10, 2012 at 09:34:29AM -0400, Ed Maste wrote:
> On 9 October 2012 18:07, Ben Pfaff  wrote:
> > On Tue, Oct 09, 2012 at 06:03:32PM -0400, Ed Maste wrote:
> >>...
> >>
> >> Interesting - I didn't notice this code before.
> >>
> >> On FreeBSD we have setproctitle(const char *fmt, ...) for this
> >> functionality, which has the same interface as proctitle_set.
> >> Unfortunately there's no decent way to pass the varargs through.
> >>
> >> I could add an #elif defined(__FreeBSD__) case to command-line.c and
> >> use a temporary buffer in my proctitle_set implementation, or could
> >> just #define proctitle_set setproctitle in command-line.h.  Thoughts
> >> on which is preferable?
> >
> > I'm happy enough with the latter; it's simpler, as long as it works.
> 
> Ahh, so there's one small caveat.  setproctitle internally prepends
> the "program_name: " to the provided string so the resulting proctitle
> ends up like:
> 
> ovs-vswitchd: ovs-vswitchd: worker process for pid 24853 (ovs-vswitchd)
> 
> The current proctitle_set callers already use this format and it seems
> to be a reasonable restriction, so it would be possible to move the
> program_name logic into proctitle_set for Linux as well.  If that's
> undesirable I'll use temporary buffer in the FreeBSD implementation
> and strip the program name back off before calling setproctitle.

It's OK with me to modify proctitle_set to use the setproctitle
convention.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] build: Add support for building RPMs for Fedora Linux

2012-10-10 Thread Kyle Mestery (kmestery)
On Oct 4, 2012, at 12:56 PM, Ben Pfaff  wrote:
> Thanks, Ralf.  I will add
>Signed-off-by: Ralf Spenneberg 
> to the commit log.
> 
Hi Ben:

Are you waiting for my feedback on this patch? Just curious as to it's state
and if you see anything else you would like me to address.

Thanks,
Kyle

> On Thu, Oct 04, 2012 at 07:40:55AM +0200, Ralf Spenneberg wrote:
>> Hi,
>> 
>> sorry for the delay. Yesterday was a national holiday in Germany. Yes I sure 
>> sign off this work.
>> 
>> Kind regards,
>> 
>> Ralf
>> Am Dienstag, 2. Oktober 2012, 12:30:00 schrieb Ben Pfaff:
>>> Ralf, I understand that much of this is based on your work.  Are you
>>> willing to give us a
>>>Signed-off-by: Ralf Spenneberg 
>>> or other appropriate sign-off?  By doing this, you are agreeing to the
>>> Developer's Certificate of Origin (see below for more details).
>>> 
>>> (Maybe Kyle already asked you privately.  I'm not sure about that.)
>>> 
>>> Thanks,
>>> 
>>> Ben.
>>> 
>>> Developer's Certificate of Origin
>>> -
>>> 
>>> To help track the author of a patch as well as the submission chain,
>>> and be clear that the developer has authority to submit a patch for
>>> inclusion in openvswitch please sign off your work.  The sign off
>>> certifies the following:
>>> 
>>>Developer's Certificate of Origin 1.1
>>> 
>>>By making a contribution to this project, I certify that:
>>> 
>>>(a) The contribution was created in whole or in part by me and I
>>>have the right to submit it under the open source license
>>>indicated in the file; or
>>> 
>>>(b) The contribution is based upon previous work that, to the best
>>>of my knowledge, is covered under an appropriate open source
>>>license and I have the right under that license to submit that
>>>work with modifications, whether created in whole or in part
>>>by me, under the same open source license (unless I am
>>>permitted to submit under a different license), as indicated
>>>in the file; or
>>> 
>>>(c) The contribution was provided directly to me by some other
>>>person who certified (a), (b) or (c) and I have not modified
>>>it.
>>> 
>>>(d) I understand and agree that this project and the contribution
>>>are public and that a record of the contribution (including all
>>>personal information I submit with it, including my sign-off) is
>>>maintained indefinitely and may be redistributed consistent with
>>>this project or the open source license(s) involved.


___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] userspace datapath performance (was: Re: Threaded userspace datapath)

2012-10-10 Thread Ben Pfaff
More than anything I'd like to see an analysis, based on at least
simple measurements, probably including some conjecture, of the reason
or reasons that the threaded datapath yields performance that is so
much better.  If it comes down to something not fundamental to having
threads, then we can talk about the trade-offs between working on
those issues directly versus introducing threading.

One issue that's been brought up is the fact that Open vSwitch throws
away the information provided by "poll" regarding the file descriptors
that are ready for read or write.  If that's a significant cost that
contributes to slowness (I'm not convinced yet, but I could be
convinced by measurements), then that's something that we can work on;
I'm happy to do that.  I think that we can work on that without
completely changing how the poll loop works.

I don't remember other proposed reasons why the userspace datapath is
slow.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] build: Add support for building RPMs for Fedora Linux

2012-10-10 Thread Ben Pfaff
On Wed, Oct 10, 2012 at 03:19:47PM +, Kyle Mestery (kmestery) wrote:
> On Oct 4, 2012, at 12:56 PM, Ben Pfaff  wrote:
> > Thanks, Ralf.  I will add
> >Signed-off-by: Ralf Spenneberg 
> > to the commit log.
> > 
> Hi Ben:
> 
> Are you waiting for my feedback on this patch? Just curious as to it's state
> and if you see anything else you would like me to address.

I've just been swamped.  I applied this to master.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] build: Add support for building RPMs for Fedora Linux

2012-10-10 Thread Kyle Mestery (kmestery)
On Oct 10, 2012, at 11:47 AM, Ben Pfaff 
 wrote:
> On Wed, Oct 10, 2012 at 03:19:47PM +, Kyle Mestery (kmestery) wrote:
>> On Oct 4, 2012, at 12:56 PM, Ben Pfaff  wrote:
>>> Thanks, Ralf.  I will add
>>>   Signed-off-by: Ralf Spenneberg 
>>> to the commit log.
>>> 
>> Hi Ben:
>> 
>> Are you waiting for my feedback on this patch? Just curious as to it's state
>> and if you see anything else you would like me to address.
> 
> I've just been swamped.  I applied this to master.

No worries, just wanted to make sure I wasn't missing something on my end.

Thanks for applying it to master!

Kyle
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 02/11] ofp-msgs: Open Flow 1.1 and 1.2 Port Status Messages

2012-10-10 Thread Ben Pfaff
On Sat, Oct 06, 2012 at 07:39:49PM +0900, Simon Horman wrote:
> On Thu, Oct 04, 2012 at 10:18:26AM -0700, Ben Pfaff wrote:
> > On Thu, Sep 20, 2012 at 01:10:41PM +0900, Simon Horman wrote:
> > > This allows for encoding and decoding Open Flow 1.1 and 1.2 Port Stats
> > > Request and Reply message
> > > 
> > > Signed-off-by: Simon Horman 
> > 
> > I see a few careless errors here.  print_port_stat() should have its
> > return type on a separate line.  ofputil_parse_key_value() shouldn't
> > be changed at all.  ofputlil_dump_ports() has a typo in its name.  So
> > does ofptutil_decode_one_port_reply().  So does
> > ofptutil_decode_port_stats_request().
> > 
> > I see some incorrect uses of assert, that could fire in production if
> > an unsupported OF1.1 port number is used.  They would also drop the
> > port numbers from the output if NDEBUG was defined.
> > 
> > I'm not really fond of the use of ofp11_port_stats as the common
> > format.  I think it would be better to define a host-byte-order
> > structure.  It could have the same fields as ofp_port_stats, or it
> > could have a uint16_t port number and a "struct netdev_stats".  (The
> > latter would be convenient for ofproto.)
> 
> Hi Ben,
> 
> I believe that the revised patch below addresses the concerns that
> you raised above. I have taken the uint16_t + struct netdev_stats
> approach that you suggested.

I made some changes and applied to master as follows.

--8<--cut here-->8--

From: Simon Horman 
Date: Sat, 6 Oct 2012 19:39:49 +0900
Subject: [PATCH] ofp-msgs: Open Flow 1.1 and 1.2 Port Status Messages

This allows for encoding and decoding Open Flow 1.1 and 1.2 Port Stats
Request and Reply message

Signed-off-by: Simon Horman 
[b...@nicira.com added ofputil_count_port_stas(), simplified
 interface of ofputil_decode_port_stats(), style changes]
Signed-off-by: Ben Pfaff 
---
 lib/ofp-msgs.h|   14 ++-
 lib/ofp-print.c   |   78 +---
 lib/ofp-util.c|  242 +
 lib/ofp-util.h|   15 +++
 ofproto/ofproto.c |   35 +++-
 tests/ofp-print.at|   59 -
 utilities/ovs-ofctl.c |6 +-
 7 files changed, 384 insertions(+), 65 deletions(-)

diff --git a/lib/ofp-msgs.h b/lib/ofp-msgs.h
index 752d12c..6420c5d 100644
--- a/lib/ofp-msgs.h
+++ b/lib/ofp-msgs.h
@@ -216,10 +216,14 @@ enum ofpraw {
 OFPRAW_OFPST12_TABLE_REPLY,
 
 /* OFPST 1.0 (4): struct ofp10_port_stats_request. */
-OFPRAW_OFPST_PORT_REQUEST,
+OFPRAW_OFPST10_PORT_REQUEST,
+/* OFPST 1.1+ (4): struct ofp11_port_stats_request. */
+OFPRAW_OFPST11_PORT_REQUEST,
 
 /* OFPST 1.0 (4): struct ofp10_port_stats[]. */
-OFPRAW_OFPST_PORT_REPLY,
+OFPRAW_OFPST10_PORT_REPLY,
+/* OFPST 1.1+ (4): struct ofp11_port_stats[]. */
+OFPRAW_OFPST11_PORT_REPLY,
 
 /* OFPST 1.0 (5): struct ofp10_queue_stats_request. */
 OFPRAW_OFPST_QUEUE_REQUEST,
@@ -382,8 +386,10 @@ enum ofptype {
 OFPTYPE_TABLE_STATS_REPLY,   /* OFPRAW_OFPST10_TABLE_REPLY.
   * OFPRAW_OFPST11_TABLE_REPLY.
   * OFPRAW_OFPST12_TABLE_REPLY. */
-OFPTYPE_PORT_STATS_REQUEST,  /* OFPRAW_OFPST_PORT_REQUEST. */
-OFPTYPE_PORT_STATS_REPLY,/* OFPRAW_OFPST_PORT_REPLY. */
+OFPTYPE_PORT_STATS_REQUEST,  /* OFPRAW_OFPST10_PORT_REQUEST.
+  * OFPRAW_OFPST11_PORT_REQUEST. */
+OFPTYPE_PORT_STATS_REPLY,/* OFPRAW_OFPST10_PORT_REPLY.
+  * OFPRAW_OFPST11_PORT_REPLY. */
 OFPTYPE_QUEUE_STATS_REQUEST, /* OFPRAW_OFPST_QUEUE_REQUEST. */
 OFPTYPE_QUEUE_STATS_REPLY,   /* OFPRAW_OFPST_QUEUE_REPLY. */
 OFPTYPE_PORT_DESC_STATS_REQUEST, /* OFPRAW_OFPST_PORT_DESC_REQUEST. */
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 707699e..8eb5de4 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -1065,11 +1065,9 @@ ofp_print_aggregate_stats_reply(struct ds *string, const 
struct ofp_header *oh)
 ds_put_format(string, " flow_count=%"PRIu32, as.flow_count);
 }
 
-static void print_port_stat(struct ds *string, const char *leader,
-const ovs_32aligned_be64 *statp, int more)
+static void
+print_port_stat(struct ds *string, const char *leader, uint64_t stat, int more)
 {
-uint64_t stat = ntohll(get_32aligned_be64(statp));
-
 ds_put_cstr(string, leader);
 if (stat != UINT64_MAX) {
 ds_put_format(string, "%"PRIu64, stat);
@@ -1086,50 +1084,59 @@ static void print_port_stat(struct ds *string, const 
char *leader,
 static void
 ofp_print_ofpst_port_request(struct ds *string, const struct ofp_header *oh)
 {
-const struct ofp10_port_stats_request *psr = ofpmsg_body(oh);
-ds_put_format(string, " port_no=%"PRIu16, ntohs(psr->port_no));
+uint16_t ofp10_port;
+enum ofperr error;
+
+error = ofputil_decode_port_stats_request(oh, &o

Re: [ovs-dev] userspace datapath performance (was: Re: Threaded userspace datapath)

2012-10-10 Thread Luigi Rizzo
On Wed, Oct 10, 2012 at 09:20:19AM -0700, Ben Pfaff wrote:
> More than anything I'd like to see an analysis, based on at least
> simple measurements, probably including some conjecture, of the reason
> or reasons that the threaded datapath yields performance that is so
> much better.  If it comes down to something not fundamental to having
> threads, then we can talk about the trade-offs between working on
> those issues directly versus introducing threading.
> 
> One issue that's been brought up is the fact that Open vSwitch throws
> away the information provided by "poll" regarding the file descriptors
> that are ready for read or write.  If that's a significant cost that
> contributes to slowness (I'm not convinced yet, but I could be
> convinced by measurements), then that's something that we can work on;

For measurements see our infocom paper at

http://ieeexplore.ieee.org/xpl/articleDetails.jsp?tp=&arnumber=6195638

(a preliminary version and other data are available on the netmap page).

We saw 50Kpps tops with the single event loop, and about 300Kpps
with split threads, in both cases processing one packet per iteration,
and using bpf (FreeBSD) or PF_PACKET (?) on linux.

Increasing the number of packets per iteration approx. doubles the
throughput for split threads (the bottleneck here is the system
call to do the packet I/O), whereas gives a negligible improvement
in the single event loop case (suggesting that there is a ton of
work done at each iteration).

> I'm happy to do that.  I think that we can work on that without
> completely changing how the poll loop works.

I am sure you can get some speedup with some relatively simple
optimizations, I am just unclear by how much (increasing the batch
size, which was another problem, did not help in the single-thread
version).

Another problem, which i suspect is harder to fix, is that the code
before select()/poll() to determine which descriptors we should
look at has a lot more work to do than simply marking the descriptors
associated to open interfaces/tunnels.

Not to mention that if the single thread is busy communicating with
the controller, logging, resolving names or whatnot, latency and
throughput become extremely hard to control.

I do buy the concerns in using a second thread as opposed to a
process. However, using a thread at least avoids having to pass
around information (updates to the forwarding table, file descriptors
for interfaces, etc.) which would require additional complications.
True, this comes at the price of adding some locking around data
structures, but updates to these data structures should be relatively
infrequent and so even a crude locking scheme should work well and
be reasonably easy to audit.

And, to be perfectly frank, we do not have neither time nor motivation
to try and accelerate a single threaded version, which we believe
is inherently limited for the reasons mentioned above.
We would rather put some work on integrate the openflow forwarding
code within our VALE switch

http://info.iet.unipi.it/~luigi/vale/

which is based on netmap and works on both FreeBSD and Linux,
and could be used to speedup the in-kernel forwarding.

Anyways, let's see if we can find some agreement on how to
proceed with a face-to-face meeting

cheers
luigi
~
~
~
~
~
~
~

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 1/2] datapath: Add support for tun_key to Open vSwitch datapath

2012-10-10 Thread Pravin Shelar
Patch looks good, I have few comments inlined.


On Tue, Oct 9, 2012 at 11:49 AM, Kyle Mestery  wrote:
> This is a first pass at providing a tun_key which can be
> used as the basis for flow-based tunnelling. The
> tun_key includes and replaces the tun_id in both struct
> ovs_skb_cb and struct sw_tun_key.
>
> This patch allows all existing tun_id behaviour to still work. Existing
> users of tun_id are redirected to tun_key->tun_id to retain compatibility.
> However, when the userspace code is updated to make use of the new tun_key,
> the old behaviour will be deprecated and removed.
>
> NOTE: With these changes, the tunneling code no longer assumes input and
> output keys are symmetric.  If they are not, PMTUD needs to be disabled
> for tunneling to work.
>
> Signed-off-by: Kyle Mestery 
> Cc: Simon Horman 
> Cc: Jesse Gross 
> ---
> V6:
> - Fix more comments addressed from Jesse.
> V5:
> - Address another round of comments from Jesse.
> V4:
> - Address 2 comments from Jesse:
>   - When processing actions, if OVS_CB(skb)->tun_key is NULL, point it at one
> on the stack temporarily. This goes away when we remove the ability to set
> tun_id outside the scope of tun_key.
>   - Move tun_key to the end of struct sw_flow_key.
> V3:
> - Fix issues found during review by Jesse.
> - Add a NEWS entry around tunnel code no longer assuming symmetric input and
>   output tunnel keys.
>
> V2:
> - Fix blank line addition/removal found by Simon.
> - Fix hex printing output found by Simon.
> ---
>  NEWS|   3 ++
>  datapath/actions.c  |  38 +
>  datapath/datapath.c |  10 -
>  datapath/datapath.h |   5 ++-
>  datapath/flow.c |  64 
>  datapath/flow.h |  12 --
>  datapath/tunnel.c   | 101 
> 
>  datapath/tunnel.h   |  15 ++-
>  datapath/vport-capwap.c |  12 +++---
>  datapath/vport-gre.c|  15 ---
>  datapath/vport.c|   2 +-
>  include/linux/openvswitch.h |  18 +++-
>  lib/dpif-netdev.c   |   1 +
>  lib/odp-util.c  |  15 ++-
>  lib/odp-util.h  |   3 +-
>  15 files changed, 235 insertions(+), 79 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 29fd9f3..ae831e3 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -1,5 +1,8 @@
>  post-v1.8.0
>  
> +- The tunneling code no longer assumes input and output keys are 
> symmetric.
> +  If they are not, PMTUD needs to be disabled for tunneling to work. Note
> +  this only applies to flow-based keys.
>  - FreeBSD is now a supported platform, thanks to code contributions from
>Gaetano Catalli, Ed Maste, and Giuseppe Lettieri.
>  - ovs-bugtool: New --ovs option to report only OVS related information.
> diff --git a/datapath/actions.c b/datapath/actions.c
> index ec9b595..fa8c10d 100644
> --- a/datapath/actions.c
> +++ b/datapath/actions.c
> @@ -37,7 +37,8 @@
>  #include "vport.h"
>
>  static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> -   const struct nlattr *attr, int len, bool keep_skb);
> +   const struct nlattr *attr, int len,
> +   struct ovs_key_ipv4_tunnel *tun_key, bool keep_skb);
>
>  static int make_writable(struct sk_buff *skb, int write_len)
>  {
> @@ -329,11 +330,14 @@ static int sample(struct datapath *dp, struct sk_buff 
> *skb,
> }
>
> return do_execute_actions(dp, skb, nla_data(acts_list),
> -nla_len(acts_list), true);
> +nla_len(acts_list),
> +OVS_CB(skb)->tun_key,
> +true);
>  }
>
>  static int execute_set_action(struct sk_buff *skb,
> -const struct nlattr *nested_attr)
> +const struct nlattr *nested_attr,
> +struct ovs_key_ipv4_tunnel *tun_key)
>  {
> int err = 0;
>
> @@ -343,7 +347,21 @@ static int execute_set_action(struct sk_buff *skb,
> break;
>
> case OVS_KEY_ATTR_TUN_ID:
> -   OVS_CB(skb)->tun_id = nla_get_be64(nested_attr);
> +   if (OVS_CB(skb)->tun_key == NULL) {
> +   /* If tun_key is NULL for this skb, assign it to
> +* a value the caller passed in for action processing
> +* and output. This can disappear once we drop support
> +* for setting tun_id outside of tun_key.
> +*/
> +   memset(tun_key, 0, sizeof(struct 
> ovs_key_ipv4_tunnel));
> +   OVS_CB(skb)->tun_key = tun_key;
> +   }
> +
> +   OVS_CB(skb)->tun_key->tun_id = nla_get_be64(nested_attr);

Re: [ovs-dev] [PATCH 1/2] datapath: Add support for tun_key to Open vSwitch datapath

2012-10-10 Thread Chris Wright
* Pravin Shelar (pshe...@nicira.com) wrote:
> On Tue, Oct 9, 2012 at 11:49 AM, Kyle Mestery  wrote:
> > @@ -458,6 +477,9 @@ int ovs_execute_actions(struct datapath *dp, struct 
> > sk_buff *skb)
> > struct sw_flow_actions *acts = 
> > rcu_dereference(OVS_CB(skb)->flow->sf_acts);
> > struct loop_counter *loop;
> > int error;
> > +   struct ovs_key_ipv4_tunnel tun_key;
> > +
> > +memset(&tun_key, 0, sizeof(tun_key));
> >
> tun key will zeroed while executing tunnel set action, Do we still
> need to zero it here?

And with quoting, now shows up as whitespace damaged (if kept, should
fix...quick check shows a handful of other lines added w/ spaces rather
than tabs, sorry for content free review, but caught my eye).

thanks,
-chris
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 02/11] ofp-msgs: Open Flow 1.1 and 1.2 Port Status Messages

2012-10-10 Thread Simon Horman
On Wed, Oct 10, 2012 at 10:33:00AM -0700, Ben Pfaff wrote:
> On Sat, Oct 06, 2012 at 07:39:49PM +0900, Simon Horman wrote:
> > On Thu, Oct 04, 2012 at 10:18:26AM -0700, Ben Pfaff wrote:
> > > On Thu, Sep 20, 2012 at 01:10:41PM +0900, Simon Horman wrote:
> > > > This allows for encoding and decoding Open Flow 1.1 and 1.2 Port Stats
> > > > Request and Reply message
> > > > 
> > > > Signed-off-by: Simon Horman 
> > > 
> > > I see a few careless errors here.  print_port_stat() should have its
> > > return type on a separate line.  ofputil_parse_key_value() shouldn't
> > > be changed at all.  ofputlil_dump_ports() has a typo in its name.  So
> > > does ofptutil_decode_one_port_reply().  So does
> > > ofptutil_decode_port_stats_request().
> > > 
> > > I see some incorrect uses of assert, that could fire in production if
> > > an unsupported OF1.1 port number is used.  They would also drop the
> > > port numbers from the output if NDEBUG was defined.
> > > 
> > > I'm not really fond of the use of ofp11_port_stats as the common
> > > format.  I think it would be better to define a host-byte-order
> > > structure.  It could have the same fields as ofp_port_stats, or it
> > > could have a uint16_t port number and a "struct netdev_stats".  (The
> > > latter would be convenient for ofproto.)
> > 
> > Hi Ben,
> > 
> > I believe that the revised patch below addresses the concerns that
> > you raised above. I have taken the uint16_t + struct netdev_stats
> > approach that you suggested.
> 
> I made some changes and applied to master as follows.

Thanks. I'll use this as the basis for a reworked
queue stats patch.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 1/4] datapath: Add basic MPLS support to kernel

2012-10-10 Thread Isaku Yamahata
On Wed, Oct 10, 2012 at 07:31:25PM +0900, Simon Horman wrote:
> On Wed, Oct 10, 2012 at 11:57:15AM +0900, Isaku Yamahata wrote:
> > On Tue, Oct 09, 2012 at 04:08:32PM +0900, Simon Horman wrote:
> > > +static int push_mpls(struct sk_buff *skb, const struct 
> > > ovs_action_push_mpls *mpls)
> > > +{
> > > + u32 l2_size;
> > > + __be32 *new_mpls_label;
> > > +
> > > + if (skb_cow_head(skb, MPLS_HLEN) < 0) {
> > > + kfree_skb(skb);
> > > + return -ENOMEM;
> > > + }
> > > +
> > > + l2_size = skb_cb_mpls_stack_offset(skb);
> > > + skb_push(skb, MPLS_HLEN);
> > > + memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), l2_size);
> > > + skb_reset_mac_header(skb);
> > > +
> > > + new_mpls_label = (__be32 *)(skb_mac_header(skb) + l2_size);
> > > + *new_mpls_label = mpls->mpls_label;
> > > +
> > > + set_ethertype(skb, mpls->mpls_ethertype);
> > > + return 0;
> > > +}
> > > +
> > > +static int pop_mpls(struct sk_buff *skb, const __be16 *ethertype)
> > > +{
> > > + __be16 current_ethertype = get_ethertype(skb);
> > > + if (current_ethertype == htons(ETH_P_MPLS_UC) ||
> > > + current_ethertype == htons(ETH_P_MPLS_MC)) {
> > > + u32 l2_size = skb_cb_mpls_stack_offset(skb);
> > > +
> > > + memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), 
> > > l2_size);
> > > +
> > > + skb_pull(skb, MPLS_HLEN);
> > > + skb_reset_mac_header(skb);
> > > +
> > > + set_ethertype(skb, *ethertype);
> > > + }
> > > + return 0;
> > > +}
> > > +
> > > +static int set_mpls(struct sk_buff *skb, const __be32 *mpls_label)
> > > +{
> > > + __be16 current_ethertype = get_ethertype(skb);
> > > + if (current_ethertype == htons(ETH_P_MPLS_UC) ||
> > > + current_ethertype == htons(ETH_P_MPLS_MC)) {
> > > + memcpy(skb_cb_mpls_stack(skb), mpls_label, sizeof(__be32));
> > > + }
> > > + return 0;
> > > +}
> > > +
> > 
> > push_mpls and set_mpls overwrite BOS bit. So it can produce corrupted 
> > packet.
> > Is this by design?
> 
> My personal opinion is that it is up to the caller of set_mpls()
> to provide a valid label. However, I'm happy to discuss other ideas.

I see. I have no objection. I just wanted to make sure that it's by design.
-- 
yamahata
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH] nicira-ext: Correct and improve nx_flow_update_abbrev comment.

2012-10-10 Thread Ben Pfaff
Notifications always precede a barrier that follows a given flow_mod,
not a barrier that precedes a flow_mod as the text here previously claimed.

Also, the text about abbreviated notifications is equally applicable
to normal, unabbreviated notifications, so say that explicitly.

Reported-by: Jean Tourrilhes 
Signed-off-by: Ben Pfaff 
---
 include/openflow/nicira-ext.h |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
index 75bf6db..7e49550 100644
--- a/include/openflow/nicira-ext.h
+++ b/include/openflow/nicira-ext.h
@@ -2173,10 +2173,10 @@ OFP_ASSERT(sizeof(struct nx_flow_update_full) == 24);
  * change in future versions of Open vSwitch.
  *
  * OVS will always send the notifications for a given flow table change before
- * the reply to a OFPT_BARRIER_REQUEST request that precedes the flow table
- * change.  Thus, if the controller does not receive an abbreviated
- * notification for a flow_mod before the next OFPT_BARRIER_REPLY, it will
- * never receive one. */
+ * the reply to a OFPT_BARRIER_REQUEST request that follows the flow table
+ * change.  Thus, if the controller does not receive an abbreviated (or
+ * unabbreviated) notification for a flow_mod before the next
+ * OFPT_BARRIER_REPLY, it will never receive one. */
 struct nx_flow_update_abbrev {
 ovs_be16 length;/* Length is 8. */
 ovs_be16 event; /* NXFME_ABBREV. */
-- 
1.7.10.4

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev