The P4 language requires marking a header as valid before any of the header fields are written as opposed to after the writes are done. Hence, the optimization of replacing the sequence of instructions to generate a header by reading it from the table action data with a single DMA internal instruction are reworked from "mov all + validate -> dma" to "validate + mov all -> dma".
Signed-off-by: Cristian Dumitrescu <cristian.dumitre...@intel.com> --- examples/pipeline/examples/fib.spec | 2 +- examples/pipeline/examples/vxlan.spec | 8 +-- lib/pipeline/rte_swx_pipeline.c | 89 +++++++++++------------- lib/pipeline/rte_swx_pipeline_internal.h | 14 +++- 4 files changed, 57 insertions(+), 56 deletions(-) diff --git a/examples/pipeline/examples/fib.spec b/examples/pipeline/examples/fib.spec index 3f8c76610c..0c2d19c106 100644 --- a/examples/pipeline/examples/fib.spec +++ b/examples/pipeline/examples/fib.spec @@ -83,10 +83,10 @@ struct nexthop_action_args_t { action nexthop_action args instanceof nexthop_action_args_t { //Set Ethernet header. + validate h.ethernet mov h.ethernet.dst_addr t.ethernet_dst_addr mov h.ethernet.src_addr t.ethernet_src_addr mov h.ethernet.ethertype t.ethernet_ethertype - validate h.ethernet //Decrement the TTL and update the checksum within the IPv4 header. cksub h.ipv4.hdr_checksum h.ipv4.ttl diff --git a/examples/pipeline/examples/vxlan.spec b/examples/pipeline/examples/vxlan.spec index ac62ab2bec..4ab4309f16 100644 --- a/examples/pipeline/examples/vxlan.spec +++ b/examples/pipeline/examples/vxlan.spec @@ -115,12 +115,13 @@ struct vxlan_encap_args_t { action vxlan_encap args instanceof vxlan_encap_args_t { //Set the outer Ethernet header. + validate h.outer_ethernet mov h.outer_ethernet.dst_addr t.ethernet_dst_addr mov h.outer_ethernet.src_addr t.ethernet_src_addr mov h.outer_ethernet.ethertype t.ethernet_ethertype - validate h.outer_ethernet //Set the outer IPv4 header. + validate h.outer_ipv4 mov h.outer_ipv4.ver_ihl t.ipv4_ver_ihl mov h.outer_ipv4.diffserv t.ipv4_diffserv mov h.outer_ipv4.total_len t.ipv4_total_len @@ -131,21 +132,20 @@ action vxlan_encap args instanceof vxlan_encap_args_t { mov h.outer_ipv4.hdr_checksum t.ipv4_hdr_checksum mov h.outer_ipv4.src_addr t.ipv4_src_addr mov h.outer_ipv4.dst_addr t.ipv4_dst_addr - validate h.outer_ipv4 //Set the outer UDP header. + validate h.outer_udp mov h.outer_udp.src_port t.udp_src_port mov h.outer_udp.dst_port t.udp_dst_port mov h.outer_udp.length t.udp_length mov h.outer_udp.checksum t.udp_checksum - validate h.outer_udp //Set the outer VXLAN header. + validate h.outer_vxlan mov h.outer_vxlan.flags t.vxlan_flags mov h.outer_vxlan.reserved t.vxlan_reserved mov h.outer_vxlan.vni t.vxlan_vni mov h.outer_vxlan.reserved2 t.vxlan_reserved2 - validate h.outer_vxlan //Set the output port. mov m.port_out t.port_out diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c index 066356684e..50805ba821 100644 --- a/lib/pipeline/rte_swx_pipeline.c +++ b/lib/pipeline/rte_swx_pipeline.c @@ -2284,6 +2284,7 @@ instr_hdr_validate_translate(struct rte_swx_pipeline *p, instr->type = INSTR_HDR_VALIDATE; instr->valid.header_id = h->id; + instr->valid.struct_id = h->struct_id; return 0; } @@ -6754,7 +6755,7 @@ action_arg_src_mov_count(struct action *a, uint32_t n_instructions); static int -instr_pattern_mov_all_validate_search(struct rte_swx_pipeline *p, +instr_pattern_validate_mov_all_search(struct rte_swx_pipeline *p, struct action *a, struct instruction *instr, struct instruction_data *data, @@ -6771,51 +6772,42 @@ instr_pattern_mov_all_validate_search(struct rte_swx_pipeline *p, if (!a || !a->st) return 0; - /* First instruction: MOV_HM. */ - if (data[0].invalid || (instr[0].type != INSTR_MOV_HM)) + /* First instruction: HDR_VALIDATE. Second instruction: MOV_HM. */ + if (data[0].invalid || + (instr[0].type != INSTR_HDR_VALIDATE) || + (n_instr < 2) || + data[1].invalid || + (instr[1].type != INSTR_MOV_HM) || + instr[1].mov.src.struct_id) return 0; - h = header_find_by_struct_id(p, instr[0].mov.dst.struct_id); - if (!h || h->st->var_size) + h = header_find_by_struct_id(p, instr[0].valid.struct_id); + if (!h || + h->st->var_size || + (n_instr < 1 + h->st->n_fields)) return 0; for (src_field_id = 0; src_field_id < a->st->n_fields; src_field_id++) - if (instr[0].mov.src.offset == a->st->fields[src_field_id].offset / 8) + if (instr[1].mov.src.offset == a->st->fields[src_field_id].offset / 8) break; - if (src_field_id == a->st->n_fields) + if (src_field_id + h->st->n_fields > a->st->n_fields) return 0; - if (instr[0].mov.dst.offset || - (instr[0].mov.dst.n_bits != h->st->fields[0].n_bits) || - instr[0].mov.src.struct_id || - (instr[0].mov.src.n_bits != a->st->fields[src_field_id].n_bits) || - (instr[0].mov.dst.n_bits != instr[0].mov.src.n_bits)) - return 0; - - if ((n_instr < h->st->n_fields + 1) || - (a->st->n_fields < src_field_id + h->st->n_fields + 1)) - return 0; - - /* Subsequent instructions: MOV_HM. */ - for (i = 1; i < h->st->n_fields; i++) - if (data[i].invalid || - data[i].n_users || - (instr[i].type != INSTR_MOV_HM) || - (instr[i].mov.dst.struct_id != h->struct_id) || - (instr[i].mov.dst.offset != h->st->fields[i].offset / 8) || - (instr[i].mov.dst.n_bits != h->st->fields[i].n_bits) || - instr[i].mov.src.struct_id || - (instr[i].mov.src.offset != a->st->fields[src_field_id + i].offset / 8) || - (instr[i].mov.src.n_bits != a->st->fields[src_field_id + i].n_bits) || - (instr[i].mov.dst.n_bits != instr[i].mov.src.n_bits)) + /* Second and subsequent instructions: MOV_HM. */ + for (i = 0; i < h->st->n_fields; i++) + if (data[1 + i].invalid || + data[1 + i].n_users || + (instr[1 + i].type != INSTR_MOV_HM) || + (instr[1 + i].mov.dst.struct_id != h->struct_id) || + (instr[1 + i].mov.dst.offset != h->st->fields[i].offset / 8) || + (instr[1 + i].mov.dst.n_bits != h->st->fields[i].n_bits) || + instr[1 + i].mov.src.struct_id || + (instr[1 + i].mov.src.offset != a->st->fields[src_field_id + i].offset / 8) || + (instr[1 + i].mov.src.n_bits != a->st->fields[src_field_id + i].n_bits) || + (instr[1 + i].mov.dst.n_bits != instr[1 + i].mov.src.n_bits)) return 0; - /* Last instruction: HDR_VALIDATE. */ - if ((instr[i].type != INSTR_HDR_VALIDATE) || - (instr[i].valid.header_id != h->id)) - return 0; - /* Check that none of the action args that are used as source for this * DMA transfer are not used as source in any other mov instruction. */ @@ -6831,12 +6823,12 @@ instr_pattern_mov_all_validate_search(struct rte_swx_pipeline *p, return 0; } - *n_pattern_instr = 1 + i; + *n_pattern_instr = 1 + h->st->n_fields; return 1; } static void -instr_pattern_mov_all_validate_replace(struct rte_swx_pipeline *p, +instr_pattern_validate_mov_all_replace(struct rte_swx_pipeline *p, struct action *a, struct instruction *instr, struct instruction_data *data, @@ -6846,19 +6838,16 @@ instr_pattern_mov_all_validate_replace(struct rte_swx_pipeline *p, uint32_t src_field_id, src_offset, i; /* Read from the instructions before they are modified. */ - h = header_find_by_struct_id(p, instr[0].mov.dst.struct_id); + h = header_find_by_struct_id(p, instr[1].mov.dst.struct_id); if (!h) return; + src_offset = instr[1].mov.src.offset; + for (src_field_id = 0; src_field_id < a->st->n_fields; src_field_id++) - if (instr[0].mov.src.offset == a->st->fields[src_field_id].offset / 8) + if (src_offset == a->st->fields[src_field_id].offset / 8) break; - if (src_field_id == a->st->n_fields) - return; - - src_offset = instr[0].mov.src.offset; - /* Modify the instructions. */ instr[0].type = INSTR_DMA_HT; instr[0].dma.dst.header_id[0] = h->id; @@ -6875,7 +6864,7 @@ instr_pattern_mov_all_validate_replace(struct rte_swx_pipeline *p, } static uint32_t -instr_pattern_mov_all_validate_optimize(struct rte_swx_pipeline *p, +instr_pattern_validate_mov_all_optimize(struct rte_swx_pipeline *p, struct action *a, struct instruction *instructions, struct instruction_data *instruction_data, @@ -6892,8 +6881,8 @@ instr_pattern_mov_all_validate_optimize(struct rte_swx_pipeline *p, uint32_t n_instr = 0; int detected; - /* Mov all + validate. */ - detected = instr_pattern_mov_all_validate_search(p, + /* Validate + mov all. */ + detected = instr_pattern_validate_mov_all_search(p, a, instr, data, @@ -6903,7 +6892,7 @@ instr_pattern_mov_all_validate_optimize(struct rte_swx_pipeline *p, n_instructions, &n_instr); if (detected) { - instr_pattern_mov_all_validate_replace(p, a, instr, data, n_instr); + instr_pattern_validate_mov_all_replace(p, a, instr, data, n_instr); i += n_instr; continue; } @@ -7020,8 +7009,8 @@ instr_optimize(struct rte_swx_pipeline *p, instruction_data, n_instructions); - /* Mov all + validate. */ - n_instructions = instr_pattern_mov_all_validate_optimize(p, + /* Validate + mov all. */ + n_instructions = instr_pattern_validate_mov_all_optimize(p, a, instructions, instruction_data, diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h index 5feee8eff6..a35635efb7 100644 --- a/lib/pipeline/rte_swx_pipeline_internal.h +++ b/lib/pipeline/rte_swx_pipeline_internal.h @@ -632,6 +632,7 @@ struct instr_io { struct instr_hdr_validity { uint8_t header_id; + uint8_t struct_id; }; struct instr_table { @@ -2228,11 +2229,22 @@ __instr_hdr_validate_exec(struct rte_swx_pipeline *p __rte_unused, const struct instruction *ip) { uint32_t header_id = ip->valid.header_id; + uint32_t struct_id = ip->valid.struct_id; + uint64_t valid_headers = t->valid_headers; + struct header_runtime *h = &t->headers[header_id]; TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id); + /* If this header is already valid, then its associated t->structs[] element is also valid + * and therefore it should not be modified. It could point to the packet buffer (in case of + * extracted header) and setting it to the default location (h->ptr0) would be incorrect. + */ + if (MASK64_BIT_GET(valid_headers, header_id)) + return; + /* Headers. */ - t->valid_headers = MASK64_BIT_SET(t->valid_headers, header_id); + t->structs[struct_id] = h->ptr0; + t->valid_headers = MASK64_BIT_SET(valid_headers, header_id); } /* -- 2.17.1