Hi, Yes, I tried to use recursive descent parsing techniques but ended up with too many complications that are not justified for the simple syntax rules that I have.
In short I am trying to parse Boolean expressions like the following to verify their syntax. a) (a+!b).(!!c+d) b) ((w+!!!c).(!e+((!!!q)))) To avoid having overly complicated code for a seemingly simple task, I switched to use an old algorithm I devised some 20 years ago. In my algorithm, tokens are checked for acceptable preceding and succeeding tokens. In case the presence or absence of a nearby token changes the rules, deeper checking is employed. The resultant code is simple and seems to work as intended. I have tried to send this mail to this mailing thread several times unsuccessfully. For some reason gmail is sending it to the wrong thread. The code follows from here: #include <stdio.h> #include <ctype.h> #include <stdlib.h> #include <string.h> #define INVALID 0 #define VALID 1 typedef char* pchar; pchar* tokens; /* global variable to hold token list */ int token_count = 0; /* global variable to keep track of token count */ int error_pos = 0; /* global variable to hold error position */ #define token_delta 50 typedef struct { int start_bracket; int stop_bracket; int consecutive_brackets; } t_bracket_values; void tokenise(char* expression) { char ch; /* character at iterator's position */ char* atoken; /* token to be added to token list */ char* start = NULL; /* holds a char* to point to start of multicharacter token */ char* string_token; /* holds multicharacter token */ int char_count = 0; /* keeps track of number of characters in multicharacter token */ int while_counter = 0; /* expr_len_and_null holds the total length of input string including null terminator */ int expr_len_and_null = strlen(expression) + 1; while (while_counter++ <= expr_len_and_null) { ch = *expression++; switch (ch) { case '(': case ')': case '.': case '+': case '!': case '\0': if (start && char_count > 0) { string_token = malloc((char_count + 1)*sizeof(char)); strncpy(string_token, start, char_count); tokens[token_count++] = string_token; start = NULL; char_count = 0; } if (ch == '\0') return; atoken = malloc(2*sizeof(char)); atoken[0] = ch; atoken[1] = '\0'; tokens[token_count++] = atoken; break; case '\n': break; default: if (start == NULL) start = expression - 1; char_count++; } } } void free_memory() { int k = token_count; while (--k >= 0) free(tokens[k]); free(tokens); } /****************PARSER*****************/ static char* getToken(pchar* tokens, int i) { if (i < token_count) return tokens[i]; else return NULL; } #define alpha "abcdefghijklmnopqrstuvwxyz" /* succeeds ". + )" end */ #define alpha_cl_br ")"alpha /* succeeds "! alpha " */ #define oper_op_br "!+.(" /* at the beginning of expression */ #define alpha_excl_op_br "!("alpha static int isOperand(pchar* tokens, int i) { char* atoken = getToken(tokens, i); if (atoken == NULL) return 0; else { if (strlen(atoken) > 1) return 1; else return (strchr(alpha, atoken[0]) != NULL); } } int getErrorPosn(pchar* tokens) { int i = -1; char prev_ch = '\0', ch; int error_pos1 = -1, error_pos2 = -1; char* atoken; if (token_count == 1) { if (!isOperand(tokens, 0)) i = 0; error_pos = i; return error_pos; } while (++i < token_count) { atoken = getToken(tokens, i); ch = atoken[0]; if (i > 0 && i < token_count - 1) { if ( (ch == '+' || ch == '.' || ch == ')') && (prev_ch == ')' || isOperand(tokens, i - 1)) ) { prev_ch = ch; continue; } if ( (ch == '!' || ch == '(' || isOperand(tokens, i)) && strchr(oper_op_br, prev_ch) ) { prev_ch = ch; continue; } } prev_ch = ch; /* at the end of token list */ if ( (i == token_count - 1) && (ch == ')' || isOperand(tokens, i)) ) continue; /* at the beginning of token list */ if ( (i == 0) && (ch == '!' || ch == '(' || isOperand(tokens, i)) ) continue; error_pos1 = i; break; } int brackets = 0; for (i = 0; i < token_count; i++) { atoken = getToken(tokens, i); ch = atoken[0]; if (ch == '(') brackets++; else if (ch == ')') brackets--; if (brackets < 0) { error_pos2 = i; break; } } if (brackets > 0) error_pos2 = i; /* choose the earlier error for output */ if (error_pos1 > error_pos2) error_pos = error_pos1; else error_pos = error_pos2; return error_pos; } /***************************************/ int main() { extern int error_pos; char input[101]; tokens = malloc(token_delta*sizeof(char*)); fgets(input, 100, stdin); tokenise(input); int k = -1; /* print the tokens one per line */ printf("token count is %d\n", token_count); while(++k < token_count) printf("%s\n", tokens[k]); getErrorPosn(tokens); if (error_pos > -1) printf("Error found at position %d\n", error_pos + 1); else printf("No error found.\n"); free_memory(); return 0; } Edward _______________________________________________ Dng mailing list Dng@lists.dyne.org https://mailinglists.dyne.org/cgi-bin/mailman/listinfo/dng