I'm determined not to let the terrible style of pseudo-parsing we have in OVS leak into OVN. Here's the first step.
Signed-off-by: Ben Pfaff <b...@nicira.com> --- ovn/TODO | 5 - ovn/automake.mk | 3 + ovn/lex.c | 697 +++++++++++++++++++++++++++++++++++++++++++++++++++++ ovn/lex.h | 109 +++++++++ tests/automake.mk | 6 +- tests/ovn.at | 95 ++++++++ tests/test-ovn.c | 119 +++++++++ tests/testsuite.at | 1 + 8 files changed, 1028 insertions(+), 7 deletions(-) create mode 100644 ovn/lex.c create mode 100644 ovn/lex.h create mode 100644 tests/ovn.at create mode 100644 tests/test-ovn.c diff --git a/ovn/TODO b/ovn/TODO index 43a867c..d91c3cf 100644 --- a/ovn/TODO +++ b/ovn/TODO @@ -19,11 +19,6 @@ Probably should be defined so that the data structure is also useful for references to fields in action parsing. -** Lexical analysis. - - Probably should be defined so that the lexer can be reused for - parsing actions. - ** Parsing into syntax tree. ** Semantic checking against variable definitions. diff --git a/ovn/automake.mk b/ovn/automake.mk index 37b0ca6..940340c 100644 --- a/ovn/automake.mk +++ b/ovn/automake.mk @@ -74,6 +74,9 @@ SUFFIXES += .xml $(AM_V_GEN)$(run_python) $(srcdir)/build-aux/xml2nroff \ --version=$(VERSION) $< > $@.tmp && mv $@.tmp $@ +lib_LTLIBRARIES += lib/libovn.la +lib_libovn_la_SOURCES = ovn/lex.c ovn/lex.h + EXTRA_DIST += ovn/TODO # ovn IDL diff --git a/ovn/lex.c b/ovn/lex.c new file mode 100644 index 0000000..a837f7c --- /dev/null +++ b/ovn/lex.c @@ -0,0 +1,697 @@ +/* + * Copyright (c) 2015 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> +#include "lex.h" +#include <ctype.h> +#include <errno.h> +#include <stdarg.h> +#include "dynamic-string.h" +#include "json.h" +#include "util.h" + +/* Initializes 'token'. */ +void +lex_token_init(struct lex_token *token) +{ + token->type = LEX_T_END; + token->s = NULL; +} + +/* Frees memory owned by 'token'. */ +void +lex_token_destroy(struct lex_token *token) +{ + free(token->s); +} + +/* Exchanges 'a' and 'b'. */ +void +lex_token_swap(struct lex_token *a, struct lex_token *b) +{ + struct lex_token tmp = *a; + *a = *b; + *b = tmp; +} + +/* lex_token_format(). */ + +static size_t +lex_token_n_zeros(enum lex_format format) +{ + switch (format) { + case LEX_F_DECIMAL: return offsetof(union mf_subvalue, integer); + case LEX_F_HEXADECIMAL: return 0; + case LEX_F_IPV4: return offsetof(union mf_subvalue, ipv4); + case LEX_F_IPV6: return offsetof(union mf_subvalue, ipv6); + case LEX_F_ETHERNET: return offsetof(union mf_subvalue, mac); + default: OVS_NOT_REACHED(); + } +} + +/* Returns the effective format for 'token', that is, the format in which it + * should actually be printed. This is ordinarily the same as 'token->format', + * but it's always possible that someone sets up a token with a format that + * won't work for a value, e.g. 'token->value' is wider than 32 bits but the + * format is LEX_F_IPV4. (The lexer itself won't do that; this is an attempt + * to avoid confusion in the future.) */ +static enum lex_format +lex_token_get_format(const struct lex_token *token) +{ + size_t n_zeros = lex_token_n_zeros(token->format); + return (is_all_zeros(&token->value, n_zeros) + && (token->type != LEX_T_MASKED_INTEGER + || is_all_zeros(&token->mask, n_zeros)) + ? token->format + : LEX_F_HEXADECIMAL); +} + +static void +lex_token_format_value(const union mf_subvalue *value, + enum lex_format format, struct ds *s) +{ + switch (format) { + case LEX_F_DECIMAL: + ds_put_format(s, "%"PRIu64, ntohll(value->integer)); + break; + + case LEX_F_HEXADECIMAL: + mf_format_subvalue(value, s); + break; + + case LEX_F_IPV4: + ds_put_format(s, IP_FMT, IP_ARGS(value->ipv4)); + break; + + case LEX_F_IPV6: + print_ipv6_addr(s, &value->ipv6); + break; + + case LEX_F_ETHERNET: + ds_put_format(s, ETH_ADDR_FMT, ETH_ADDR_ARGS(value->mac)); + break; + + default: + OVS_NOT_REACHED(); + } + +} + +static void +lex_token_format_masked_integer(const struct lex_token *token, struct ds *s) +{ + enum lex_format format = lex_token_get_format(token); + + lex_token_format_value(&token->value, format, s); + ds_put_char(s, '/'); + + const union mf_subvalue *mask = &token->mask; + if (format == LEX_F_IPV4 && ip_is_cidr(mask->ipv4)) { + ds_put_format(s, "%d", ip_count_cidr_bits(mask->ipv4)); + } else if (token->format == LEX_F_IPV6 && ipv6_is_cidr(&mask->ipv6)) { + ds_put_format(s, "%d", ipv6_count_cidr_bits(&mask->ipv6)); + } else { + lex_token_format_value(&token->mask, format, s); + } +} + + +static void +lex_token_format_string(const char *s, struct ds *ds) +{ + struct json json; + json.type = JSON_STRING; + json.u.string = CONST_CAST(char *, s); + json_to_ds(&json, 0, ds); +} + +/* Appends a string representation of 'token' to 's', in a format that can be + * losslessly parsed back by the lexer. (LEX_T_END and LEX_T_ERROR can't be + * parsed back.) */ +void +lex_token_format(struct lex_token *token, struct ds *s) +{ + switch (token->type) { + case LEX_T_END: + ds_put_cstr(s, "$"); + break; + + case LEX_T_ID: + ds_put_cstr(s, token->s); + break; + + case LEX_T_ERROR: + ds_put_cstr(s, "error("); + lex_token_format_string(token->s, s); + ds_put_char(s, ')'); + break; + + case LEX_T_STRING: + lex_token_format_string(token->s, s); + break; + + break; + + case LEX_T_INTEGER: + lex_token_format_value(&token->value, lex_token_get_format(token), s); + break; + + case LEX_T_MASKED_INTEGER: + lex_token_format_masked_integer(token, s); + break; + + case LEX_T_LPAREN: + ds_put_cstr(s, "("); + break; + case LEX_T_RPAREN: + ds_put_cstr(s, ")"); + break; + case LEX_T_LCURLY: + ds_put_cstr(s, "{"); + break; + case LEX_T_RCURLY: + ds_put_cstr(s, "}"); + break; + case LEX_T_LSQUARE: + ds_put_cstr(s, "["); + break; + case LEX_T_RSQUARE: + ds_put_cstr(s, "]"); + break; + case LEX_T_EQ: + ds_put_cstr(s, "=="); + break; + case LEX_T_NE: + ds_put_cstr(s, "!="); + break; + case LEX_T_LT: + ds_put_cstr(s, "<"); + break; + case LEX_T_LE: + ds_put_cstr(s, "<="); + break; + case LEX_T_GT: + ds_put_cstr(s, ">"); + break; + case LEX_T_GE: + ds_put_cstr(s, ">="); + break; + case LEX_T_LOG_NOT: + ds_put_cstr(s, "!"); + break; + case LEX_T_LOG_AND: + ds_put_cstr(s, "&&"); + break; + case LEX_T_LOG_OR: + ds_put_cstr(s, "||"); + break; + case LEX_T_ELLIPSIS: + ds_put_cstr(s, ".."); + break; + case LEX_T_COMMA: + ds_put_cstr(s, ","); + break; + case LEX_T_SEMICOLON: + ds_put_cstr(s, ";"); + break; + case LEX_T_EQUALS: + ds_put_cstr(s, "="); + break; + default: + OVS_NOT_REACHED(); + } + +} + +/* lex_token_parse(). */ + +static void OVS_PRINTF_FORMAT(2, 3) +lex_error(struct lex_token *token, const char *message, ...) +{ + token->type = LEX_T_ERROR; + + va_list args; + va_start(args, message); + token->s = xvasprintf(message, args); + va_end(args); +} + +static void +lex_parse_hex_integer(const char *start, size_t len, struct lex_token *token) +{ + const char *in = start + (len - 1); + uint8_t *out = token->value.u8 + (sizeof token->value.u8 - 1); + + for (int i = 0; i < len; i++) { + int hexit = hexit_value(in[-i]); + if (hexit < 0) { + lex_error(token, "Invalid syntax in hexadecimal constant."); + return; + } + if (hexit && i / 2 >= sizeof token->value.u8) { + lex_error(token, "Hexadecimal constant requires more than " + "%"PRIuSIZE" bits.", 8 * sizeof token->value.u8); + return; + } + out[-(i / 2)] |= i % 2 ? hexit << 4 : hexit; + } + token->format = LEX_F_HEXADECIMAL; +} + +static const char * +lex_parse_integer__(const char *p, struct lex_token *token) +{ + const char *start = p; + const char *end = start; + while (isalnum((unsigned char) *end) || *end == ':' + || (*end == '.' && end[1] != '.')) { + end++; + } + size_t len = end - start; + + int n; + uint8_t mac[ETH_ADDR_LEN]; + + token->type = LEX_T_INTEGER; + if (!len) { + lex_error(token, "Integer constant expected."); + } else if (len == 17 + && ovs_scan(start, ETH_ADDR_SCAN_FMT"%n", + ETH_ADDR_SCAN_ARGS(mac), &n) + && n == len) { + memcpy(token->value.mac, mac, sizeof token->value.mac); + token->format = LEX_F_ETHERNET; + } else if (start + strspn(start, "0123456789") == end) { + if (p[0] == '0' && len > 1) { + lex_error(token, "Decimal constants must not have leading zeros."); + } else { + unsigned long long int integer; + char *tail; + + errno = 0; + integer = strtoull(p, &tail, 10); + if (tail != end || errno == ERANGE) { + lex_error(token, "Decimal constants must be less than 2**64."); + } else { + token->value.integer = htonll(integer); + token->format = LEX_F_DECIMAL; + } + } + } else if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { + if (len > 2) { + lex_parse_hex_integer(start + 2, len - 2, token); + } else { + lex_error(token, "Hex digits expected following 0%c.", p[1]); + } + } else if (len < INET6_ADDRSTRLEN) { + char copy[INET6_ADDRSTRLEN]; + memcpy(copy, p, len); + copy[len] = '\0'; + + struct in_addr ipv4; + struct in6_addr ipv6; + if (inet_pton(AF_INET, copy, &ipv4) == 1) { + token->value.ipv4 = ipv4.s_addr; + token->format = LEX_F_IPV4; + } else if (inet_pton(AF_INET6, copy, &ipv6) == 1) { + token->value.ipv6 = ipv6; + token->format = LEX_F_IPV6; + } else { + lex_error(token, "Invalid numeric constant."); + } + } else { + lex_error(token, "Invalid numeric constant."); + } + + ovs_assert(token->type == LEX_T_INTEGER || token->type == LEX_T_ERROR); + return end; +} + +static const char * +lex_parse_integer(const char *p, struct lex_token *token) +{ + memset(&token->value, 0, sizeof token->value); + p = lex_parse_integer__(p, token); + if (token->type == LEX_T_INTEGER && *p == '/') { + struct lex_token mask; + + lex_token_init(&mask); + memset(&mask.value, 0, sizeof mask.value); + p = lex_parse_integer__(p + 1, &mask); + if (mask.type == LEX_T_INTEGER) { + token->type = LEX_T_MASKED_INTEGER; + + uint32_t prefix_bits = ntohll(mask.value.integer); + if (token->format == mask.format) { + /* Same format value and mask is always OK. */ + token->mask = mask.value; + } else if (token->format == LEX_F_IPV4 + && mask.format == LEX_F_DECIMAL + && prefix_bits <= 32) { + /* IPv4 address with decimal mask is a CIDR prefix. */ + token->mask.integer = htonll(ntohl(be32_prefix_mask( + prefix_bits))); + } else if (token->format == LEX_F_IPV6 + && mask.format == LEX_F_DECIMAL + && prefix_bits <= 128) { + /* IPv6 address with decimal mask is a CIDR prefix. */ + token->mask.ipv6 = ipv6_create_mask(prefix_bits); + } else if (token->format == LEX_F_DECIMAL + && mask.format == LEX_F_HEXADECIMAL + && token->value.integer == 0) { + /* Special case for e.g. 0/0x1234. */ + token->format = LEX_F_HEXADECIMAL; + token->mask = mask.value; + } else { + lex_error(token, "Value and mask have incompatible formats."); + return p; + } + + for (int i = 0; i < ARRAY_SIZE(token->mask.be32); i++) { + ovs_be32 v = token->value.be32[i]; + ovs_be32 m = token->mask.be32[i]; + + if (v & ~m) { + lex_error(token, "Value contains unmasked 1-bits."); + break; + } + } + + return p; + } else { + lex_token_swap(&mask, token); + } + lex_token_destroy(&mask); + } + return p; +} + +static const char * +lex_parse_string(const char *p, struct lex_token *token) +{ + const char *start = ++p; + for (;;) { + switch (*p) { + case '\0': + lex_error(token, "Input ends inside quoted string."); + return p; + + case '"': + token->type = (json_string_unescape(start, p - start, &token->s) + ? LEX_T_STRING : LEX_T_ERROR); + return p + 1; + + case '\\': + p++; + if (*p) { + p++; + } + break; + + default: + p++; + break; + } + } + +} + +static bool +lex_is_id1(unsigned char c) +{ + return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') + || c == '_' || c == '.'); +} + +static bool +lex_is_idn(unsigned char c) +{ + return lex_is_id1(c) || (c >= '0' && c <= '9'); +} + +static const char * +lex_parse_id(const char *p, struct lex_token *token) +{ + const char *start = p; + + do { + p++; + } while (lex_is_idn(*p)); + + token->type = LEX_T_ID; + token->s = xmemdup0(start, p - start); + return p; +} + +/* Initializes 'token' and parses the first token from the beginning of + * null-terminated string 'p' into 'token'. Stores a pointer to the start of + * the token (after skipping white space and comments, if any) into '*startp'. + * Returns the character position at which to begin parsing the next token. */ +const char * +lex_token_parse(struct lex_token *token, const char *p, const char **startp) +{ + lex_token_init(token); + +next: + *startp = p; + switch (*p) { + case '\0': + token->type = LEX_T_END; + return p; + + case ' ': case '\t': case '\n': case '\r': + p++; + goto next; + + case '/': + p++; + if (*p == '/') { + do { + p++; + } while (*p != '\0' && *p != '\n'); + goto next; + } else if (*p == '*') { + p++; + for (;;) { + if (*p == '*' && p[1] == '/') { + p += 2; + goto next; + } else if (*p == '\0' || *p == '\n') { + lex_error(token, "`/*' without matching `*/'."); + return p; + } else { + p++; + } + } + goto next; + } else { + lex_error(token, + "`/' is only valid as part of `//' or `/*'."); + } + break; + + case '(': + token->type = LEX_T_LPAREN; + p++; + break; + + case ')': + token->type = LEX_T_RPAREN; + p++; + break; + + case '{': + token->type = LEX_T_LCURLY; + p++; + break; + + case '}': + token->type = LEX_T_RCURLY; + p++; + break; + + case '[': + token->type = LEX_T_LSQUARE; + p++; + break; + + case ']': + token->type = LEX_T_RSQUARE; + p++; + break; + + case '=': + p++; + if (*p == '=') { + token->type = LEX_T_EQ; + p++; + } else { + token->type = LEX_T_EQUALS; + } + break; + + case '!': + p++; + if (*p == '=') { + token->type = LEX_T_NE; + p++; + } else { + token->type = LEX_T_LOG_NOT; + } + break; + + case '&': + p++; + if (*p == '&') { + token->type = LEX_T_LOG_AND; + p++; + } else { + lex_error(token, "`&' is only valid as part of `&&'."); + } + break; + + case '|': + p++; + if (*p == '|') { + token->type = LEX_T_LOG_OR; + p++; + } else { + lex_error(token, "`|' is only valid as part of `||'."); + } + break; + + case '<': + p++; + if (*p == '=') { + token->type = LEX_T_LE; + p++; + } else { + token->type = LEX_T_LT; + } + break; + + case '>': + p++; + if (*p == '=') { + token->type = LEX_T_GE; + p++; + } else { + token->type = LEX_T_GT; + } + break; + + case '.': + p++; + if (*p == '.') { + token->type = LEX_T_ELLIPSIS; + p++; + } else { + lex_error(token, "`.' is only valid as part of `..' or a number."); + } + break; + + case ',': + p++; + token->type = LEX_T_COMMA; + break; + + case ';': + p++; + token->type = LEX_T_SEMICOLON; + break; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case ':': + p = lex_parse_integer(p, token); + break; + + case '"': + p = lex_parse_string(p, token); + break; + + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + /* We need to distinguish an Ethernet address or IPv6 address from an + * identifier. Fortunately, Ethernet addresses and IPv6 addresses that + * are ambiguous based on the first character, always start with hex + * digits followed by a colon, but identifiers never do. */ + p = (p[strspn(p, "0123456789abcdefABCDEF")] == ':' + ? lex_parse_integer(p, token) + : lex_parse_id(p, token)); + break; + + default: + if (lex_is_id1(*p)) { + p = lex_parse_id(p, token); + } else { + if (isprint((unsigned char) *p)) { + lex_error(token, "Invalid character `%c' in input.", *p); + } else { + lex_error(token, "Invalid byte 0x%d in input.", *p); + } + p++; + } + break; + } + + return p; +} + +/* Initializes 'lexer' for parsing 'input'. + * + * While the lexer is in use, 'input' must remain available, but the caller + * otherwise retains ownership of 'input'. + * + * The caller must call lexer_get() to obtain the first token. */ +void +lexer_init(struct lexer *lexer, const char *input) +{ + lexer->input = input; + lexer->start = NULL; + memset(&lexer->token, 0, sizeof lexer->token); +} + +/* Frees storage associated with 'lexer'. */ +void +lexer_destroy(struct lexer *lexer) +{ + lex_token_destroy(&lexer->token); +} + +/* Obtains the next token from 'lexer' into 'lexer->token', and returns the + * token's type. The caller may examine 'lexer->token' directly to obtain full + * information about the token. */ +enum lex_type +lexer_get(struct lexer *lexer) +{ + lex_token_destroy(&lexer->token); + lexer->input = lex_token_parse(&lexer->token, lexer->input, &lexer->start); + return lexer->token.type; +} + +/* If 'lexer''s current token has the given 'type', advances 'lexer' to the + * next token and returns true. Otherwise returns false. */ +bool +lexer_match(struct lexer *lexer, enum lex_type type) +{ + if (lexer->token.type == type) { + lexer_get(lexer); + return true; + } else { + return false; + } +} diff --git a/ovn/lex.h b/ovn/lex.h new file mode 100644 index 0000000..8f0f6ed --- /dev/null +++ b/ovn/lex.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2015 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef OVN_LEX_H +#define OVN_LEX_H 1 + +/* OVN lexical analyzer + * ==================== + * + * This is a simple lexical analyzer (or tokenizer) for OVN match expressions + * and ACLs. */ + +#include "meta-flow.h" + +struct ds; + +/* Token type. */ +enum lex_type { + LEX_T_END, /* end of input */ + + /* Tokens with auxiliary data. */ + LEX_T_ID, /* foo */ + LEX_T_STRING, /* "foo" */ + LEX_T_INTEGER, /* 12345 or 1.2.3.4 or ::1 or 01:02:03:04:05 */ + LEX_T_MASKED_INTEGER, /* 12345/10 or 1.2.0.0/16 or ::2/127 or... */ + LEX_T_ERROR, /* invalid input */ + + /* Bare tokens. */ + LEX_T_LPAREN, /* ( */ + LEX_T_RPAREN, /* ) */ + LEX_T_LCURLY, /* { */ + LEX_T_RCURLY, /* } */ + LEX_T_LSQUARE, /* [ */ + LEX_T_RSQUARE, /* ] */ + LEX_T_EQ, /* == */ + LEX_T_NE, /* != */ + LEX_T_LT, /* < */ + LEX_T_LE, /* <= */ + LEX_T_GT, /* > */ + LEX_T_GE, /* >= */ + LEX_T_LOG_NOT, /* ! */ + LEX_T_LOG_AND, /* && */ + LEX_T_LOG_OR, /* || */ + LEX_T_ELLIPSIS, /* .. */ + LEX_T_COMMA, /* , */ + LEX_T_SEMICOLON, /* ; */ + LEX_T_EQUALS, /* = */ +}; + +/* Subtype for LEX_T_INTEGER and LEX_T_MASKED_INTEGER tokens. + * + * These do not change the semantics of a token; instead, they determine the + * format used when a token is serialized back to a text form. That's + * important because 3232268289 is meaningless to a human whereas 192.168.128.1 + * has some actual significance. */ +enum lex_format { + LEX_F_DECIMAL, + LEX_F_HEXADECIMAL, + LEX_F_IPV4, + LEX_F_IPV6, + LEX_F_ETHERNET, +}; + +/* A token. + * + * 's' is owned by the token. */ +struct lex_token { + enum lex_type type; /* One of LEX_*. */ + char *s; /* LEX_T_ID, LEX_T_STRING, LEX_T_ERROR only. */ + enum lex_format format; /* LEX_T_INTEGER, LEX_T_MASKED_INTEGER only. */ + union mf_subvalue value; /* LEX_T_INTEGER, LEX_T_MASKED_INTEGER only. */ + union mf_subvalue mask; /* LEX_T_MASKED_INTEGER only. */ +}; + +void lex_token_init(struct lex_token *); +void lex_token_destroy(struct lex_token *); +void lex_token_swap(struct lex_token *, struct lex_token *); + +void lex_token_format(struct lex_token *, struct ds *); +const char *lex_token_parse(struct lex_token *, const char *input, + const char **startp); + +/* A lexical analyzer. */ +struct lexer { + const char *input; /* Remaining input (not owned by lexer). */ + const char *start; /* Start of current token in 'input'. */ + struct lex_token token; /* Current token (owned by lexer). */ +}; + +void lexer_init(struct lexer *, const char *input); +void lexer_destroy(struct lexer *); + +enum lex_type lexer_get(struct lexer *); +bool lexer_match(struct lexer *, enum lex_type); + +#endif /* ovn/lex.h */ diff --git a/tests/automake.mk b/tests/automake.mk index abbfcb5..74f0765 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -81,7 +81,8 @@ TESTSUITE_AT = \ tests/interface-reconfigure.at \ tests/vlog.at \ tests/vtep-ctl.at \ - tests/auto-attach.at + tests/auto-attach.at \ + tests/ovn.at KMOD_TESTSUITE_AT = \ tests/kmod-testsuite.at \ @@ -272,6 +273,7 @@ tests_ovstest_SOURCES = \ tests/test-multipath.c \ tests/test-netflow.c \ tests/test-odp.c \ + tests/test-ovn.c \ tests/test-packets.c \ tests/test-random.c \ tests/test-reconnect.c \ @@ -290,7 +292,7 @@ tests_ovstest_SOURCES += \ tests/test-unix-socket.c endif -tests_ovstest_LDADD = lib/libopenvswitch.la +tests_ovstest_LDADD = lib/libopenvswitch.la lib/libovn.la dist_check_SCRIPTS = tests/flowgen.pl noinst_PROGRAMS += tests/test-strtok_r diff --git a/tests/ovn.at b/tests/ovn.at new file mode 100644 index 0000000..d28a684 --- /dev/null +++ b/tests/ovn.at @@ -0,0 +1,95 @@ +AT_BANNER([OVN]) + +AT_SETUP([ovn -- lexer]) +dnl For lines without =>, input and expected output are identical. +dnl For lines with =>, input precedes => and expected output follows =>. +AT_DATA([test-cases.txt], [dnl +foo bar baz quuxquuxquux _abcd_ a.b.c.d a123_.456 +"abc\u0020def" => "abc def" +" => error("Input ends inside quoted string.")dnl " + +a/*b*/c => a c +a//b c => a +a/**/b => a b +a/*/b => a error("`/*' without matching `*/'.") +a/*/**/b => a b +a/b => a error("`/' is only valid as part of `//' or `/*'.") b + +0 1 12345 18446744073709551615 +18446744073709551616 => error("Decimal constants must be less than 2**64.") +9999999999999999999999 => error("Decimal constants must be less than 2**64.") +01 => error("Decimal constants must not have leading zeros.") + +0/0 +0/1 +1/0 => error("Value contains unmasked 1-bits.") +1/1 +128/384 +1/3 +1/ => error("Integer constant expected.") + +1/0x123 => error("Value and mask have incompatible formats.") + +0x1234 +0x01234 => 0x1234 +0x0 => 0 +0x000 => 0 +0xfedcba9876543210 +0XFEDCBA9876543210 => 0xfedcba9876543210 +0xfedcba9876543210fedcba9876543210 +0xfedcba9876543210fedcba98765432100 => error("Hexadecimal constant requires more than 128 bits.") +0x0000fedcba9876543210fedcba9876543210 => 0xfedcba9876543210fedcba9876543210 +0x => error("Hex digits expected following 0x.") +0X => error("Hex digits expected following 0X.") +0x0/0x0 => 0/0 +0x0/0x1 => 0/0x1 +0x1/0x0 => error("Value contains unmasked 1-bits.") +0xffff/0x1ffff +0x. => error("Invalid syntax in hexadecimal constant.") + +192.168.128.1 1.2.3.4 255.255.255.255 0.0.0.0 +256.1.2.3 => error("Invalid numeric constant.") +192.168.0.0/16 +192.168.0.0/255.255.0.0 => 192.168.0.0/16 +192.168.0.0/255.255.255.0 => 192.168.0.0/24 +192.168.0.0/255.255.0.255 +192.168.0.0/255.0.0.0 => error("Value contains unmasked 1-bits.") +192.168.0.0/32 +192.168.0.0/255.255.255.255 => 192.168.0.0/32 + +:: +::1 +ff00::1234 => ff00::1234 +2001:db8:85a3::8a2e:370:7334 +2001:db8:85a3:0:0:8a2e:370:7334 => 2001:db8:85a3::8a2e:370:7334 +2001:0db8:85a3:0000:0000:8a2e:0370:7334 => 2001:db8:85a3::8a2e:370:7334 +::ffff:192.0.2.128 +::ffff:c000:0280 => ::ffff:192.0.2.128 +::1/::1 +::1/ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff => ::1/128 +::1/128 +ff00::/8 +ff00::/ff00:: => ff00::/8 + +01:23:45:67:ab:cd +01:23:45:67:AB:CD => 01:23:45:67:ab:cd +fe:dc:ba:98:76:54 +FE:DC:ba:98:76:54 => fe:dc:ba:98:76:54 +01:00:00:00:00:00/01:00:00:00:00:00 +ff:ff:ff:ff:ff:ff/ff:ff:ff:ff:ff:ff +fe:ff:ff:ff:ff:ff/ff:ff:ff:ff:ff:ff +ff:ff:ff:ff:ff:ff/fe:ff:ff:ff:ff:ff => error("Value contains unmasked 1-bits.") +fe:x => error("Invalid numeric constant.") +00:01:02:03:04:x => error("Invalid numeric constant.") + +(){}[[]]==!=<<=>>=!&&||..,;= => ( ) { } [[ ]] == != < <= > >= ! && || .. , ; = +& => error("`&' is only valid as part of `&&'.") +| => error("`|' is only valid as part of `||'.") + +^ => error("Invalid character `^' in input.") +]) +AT_CAPTURE_FILE([input.txt]) +sed 's/ =>.*//' test-cases.txt > input.txt +sed 's/.* => //' test-cases.txt > expout +AT_CHECK([ovstest test-ovn lex < input.txt], [0], [expout]) +AT_CLEANUP diff --git a/tests/test-ovn.c b/tests/test-ovn.c new file mode 100644 index 0000000..2229d4e --- /dev/null +++ b/tests/test-ovn.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2015 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> +#include "command-line.h" +#include <getopt.h> +#include "dynamic-string.h" +#include "fatal-signal.h" +#include "match.h" +#include "ovn/lex.h" +#include "ovstest.h" +#include "util.h" +#include "openvswitch/vlog.h" + +static void +compare_token(const struct lex_token *a, const struct lex_token *b) +{ + if (a->type != b->type) { + fprintf(stderr, "type differs: %d -> %d\n", a->type, b->type); + return; + } + + if (!((a->s && b->s && !strcmp(a->s, b->s)) + || (!a->s && !b->s))) { + fprintf(stderr, "string differs: %s -> %s\n", + a->s ? a->s : "(null)", + b->s ? b->s : "(null)"); + return; + } + + if (a->type == LEX_T_INTEGER || a->type == LEX_T_MASKED_INTEGER) { + if (memcmp(&a->value, &b->value, sizeof a->value)) { + fprintf(stderr, "value differs\n"); + return; + } + + if (a->type == LEX_T_MASKED_INTEGER + && memcmp(&a->mask, &b->mask, sizeof a->mask)) { + fprintf(stderr, "mask differs\n"); + return; + } + } + + if (a->format != b->format + && !(a->format == LEX_F_HEXADECIMAL + && b->format == LEX_F_DECIMAL + && a->value.integer == 0)) { + fprintf(stderr, "format differs: %d -> %d\n", a->format, b->format); + } +} + +static void +test_lex(struct ovs_cmdl_context *ctx OVS_UNUSED) +{ + struct ds input; + struct ds output; + + ds_init(&input); + ds_init(&output); + while (!ds_get_line(&input, stdin)) { + struct lexer lexer; + + lexer_init(&lexer, ds_cstr(&input)); + ds_clear(&output); + while (lexer_get(&lexer) != LEX_T_END) { + size_t len = output.length; + lex_token_format(&lexer.token, &output); + + /* Check that the formatted version can really be parsed back + * losslessly. */ + if (lexer.token.type != LEX_T_ERROR) { + const char *s = ds_cstr(&output) + len; + struct lexer l2; + + lexer_init(&l2, s); + lexer_get(&l2); + compare_token(&lexer.token, &l2.token); + lexer_destroy(&l2); + } + ds_put_char(&output, ' '); + } + lexer_destroy(&lexer); + + ds_chomp(&output, ' '); + puts(ds_cstr(&output)); + } + ds_destroy(&input); + ds_destroy(&output); +} + +static void +test_ovn_main(int argc, char *argv[]) +{ + set_program_name(argv[0]); + + static const struct ovs_cmdl_command commands[] = { + {"lex", NULL, 0, 0, test_lex}, + {NULL, NULL, 0, 0, NULL}, + }; + struct ovs_cmdl_context ctx; + ctx.argc = argc - optind; + ctx.argv = argv + optind; + ovs_cmdl_run_command(&ctx, commands); +} + +OVSTEST_REGISTER("test-ovn", test_ovn_main); diff --git a/tests/testsuite.at b/tests/testsuite.at index 152ea17..80bbcd5 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -66,3 +66,4 @@ m4_include([tests/rstp.at]) m4_include([tests/vlog.at]) m4_include([tests/vtep-ctl.at]) m4_include([tests/auto-attach.at]) +m4_include([tests/ovn.at]) -- 2.1.3 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev