(2nd try, after 5hrs the first mail still didn't go through but didn't produce any non-DSN either)
Hi everyone, I'm maintaining a collection of shell scripts in some 50 different debian packages. Some of them have no dependencies (with respect to external commands), some of them depend on other debian packages and some depend on each other. I have grown sick of dependency management, yet I want it to be clean and - newly - automated as far as possible. There are a hand full of perl scripts out there to parse bash scripts in a "print every 1st word on a line or whatever stands after a semicolon or a pipe" kind of way which didn't satisfy me at all. So I thought why re-invent the wheel...bash already has it's own parser, why not use it to print every would-be-issued-command? I did some hacking and came up with the attached patches, which - add a new command-line option -N (implying -n) - acts like a "verbose noexec" flag: walk the syntax tree and print each command with its associated command type. Currently it's in POC state and I'd like to hear your thoughts and I have a few questions of my own: - Would this feature be considered to be included upstream? - What needs adjustment to be considered (style, technical reasons)? There are some constructs which still aren't covered and I'm gonna need help with: 1 a=$(( $(foo) + $(bar) )) 2 $(foo) 3 `foo` 4 case $(foo) in 5 < <(foo) - 1 through 3 show up as simple_commands with the assignment flag set but are not further evaluated by the parser (I guess that happens at runtime...) - 3 shows up as case_cmd, however unevaluated otherwise as above - 5 doesn't show up at all in the output of ./bash-patched -nN ./testscript.sh I don't have any knowledge of the inner workings of parser.y let alone how to use it or how bash interacts with it - I just happened to have found the right place to hook into...do you have an idea how to cover these? Thanks Daniel
>From 698c8ae89e1bbea8d047d48c66067b09e0c38dc9 Mon Sep 17 00:00:00 2001 From: Daniel Reichelt <hack...@nachtgeist.net> Date: Sun, 31 Mar 2013 15:08:49 +0200 Subject: [PATCH 1/2] add flag 'N' --- bash/flags.c | 16 +++++++++++++++- bash/flags.h | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/bash/flags.c b/bash/flags.c index d3b38ad..fd78008 100644 --- a/bash/flags.c +++ b/bash/flags.c @@ -75,6 +75,11 @@ int place_keywords_in_env = 0; destructive. */ int read_but_dont_execute = 0; +/* Non-zero means print commands if read_and_print_but_dont_execute is non-zero. + This is useful for static analysis of commands to be executed in a + script */ +int read_and_print_but_dont_execute = 0; + /* Non-zero means end of file is after one command. */ int just_one_command = 0; @@ -184,6 +189,7 @@ const struct flags_alist shell_flags[] = { { 'm', &jobs_m_flag }, #endif /* JOB_CONTROL */ { 'n', &read_but_dont_execute }, + { 'N', &read_and_print_but_dont_execute }, { 'p', &privileged_mode }, #if defined (RESTRICTED_SHELL) { 'r', &restricted }, @@ -274,6 +280,14 @@ change_flag (flag, on_or_off) read_but_dont_execute = 0; break; + case 'N': /* set 'n' as well */ + if (interactive_shell) { + read_but_dont_execute = 0; + } else { + read_but_dont_execute = 1; + } + break; + case 'p': if (on_or_off == FLAG_OFF) disable_priv_mode (); @@ -317,7 +331,7 @@ void reset_shell_flags () { mark_modified_vars = exit_immediately_on_error = disallow_filename_globbing = 0; - place_keywords_in_env = read_but_dont_execute = just_one_command = 0; + place_keywords_in_env = read_but_dont_execute = read_and_print_but_dont_execute = just_one_command = 0; noclobber = unbound_vars_is_error = echo_input_at_read = 0; echo_command_at_execute = jobs_m_flag = forced_interactive = 0; no_symbolic_links = no_invisible_vars = privileged_mode = pipefail_opt = 0; diff --git a/bash/flags.h b/bash/flags.h index d8fa757..aab4291 100644 --- a/bash/flags.h +++ b/bash/flags.h @@ -42,7 +42,7 @@ extern char optflags[]; extern int mark_modified_vars, exit_immediately_on_error, disallow_filename_globbing, - place_keywords_in_env, read_but_dont_execute, + place_keywords_in_env, read_but_dont_execute, read_and_print_but_dont_execute, just_one_command, unbound_vars_is_error, echo_input_at_read, echo_command_at_execute, no_invisible_vars, noclobber, hashing_enabled, forced_interactive, privileged_mode, -- 1.7.2.5
>From a0ca52e731716d5339036df0a19f1ed23b598b73 Mon Sep 17 00:00:00 2001 From: Daniel Reichelt <hack...@nachtgeist.net> Date: Tue, 2 Apr 2013 10:39:11 +0200 Subject: [PATCH 2/2] print command type in eval.c and adjust Makefile --- bash/Makefile.in | 4 ++ bash/eval.c | 2 + bash/print_cmd_on_noexec.c | 126 ++++++++++++++++++++++++++++++++++++++++++++ bash/print_cmd_on_noexec.h | 30 ++++++++++ bash/testsh | 16 ++++++ 5 files changed, 178 insertions(+), 0 deletions(-) create mode 100644 bash/print_cmd_on_noexec.c create mode 100755 bash/print_cmd_on_noexec.h create mode 100755 bash/testsh diff --git a/bash/Makefile.in b/bash/Makefile.in index f01d49c..3a21b1a 100644 --- a/bash/Makefile.in +++ b/bash/Makefile.in @@ -418,6 +418,7 @@ CSOURCES = shell.c eval.c parse.y general.c make_cmd.c print_cmd.c y.tab.c \ input.c bashhist.c array.c arrayfunc.c assoc.c sig.c pathexp.c \ unwind_prot.c siglist.c bashline.c bracecomp.c error.c \ list.c stringlib.c locale.c findcmd.c redir.c \ + print_cmd_on_noexec.c \ pcomplete.c pcomplib.c syntax.c xmalloc.c HSOURCES = shell.h flags.h trap.h hashcmd.h hashlib.h jobs.h builtins.h \ @@ -427,6 +428,7 @@ HSOURCES = shell.h flags.h trap.h hashcmd.h hashlib.h jobs.h builtins.h \ subst.h externs.h siglist.h bashhist.h bashline.h bashtypes.h \ array.h arrayfunc.h sig.h mailcheck.h bashintl.h bashjmp.h \ execute_cmd.h parser.h pathexp.h pathnames.h pcomplete.h assoc.h \ + print_cmd_on_noexec.h \ $(BASHINCFILES) SOURCES = $(CSOURCES) $(HSOURCES) $(BUILTIN_DEFS) @@ -446,6 +448,7 @@ OBJECTS = shell.o eval.o y.tab.o general.o make_cmd.o print_cmd.o $(GLOBO) \ trap.o input.o unwind_prot.o pathexp.o sig.o test.o version.o \ alias.o array.o arrayfunc.o assoc.o braces.o bracecomp.o bashhist.o \ bashline.o $(SIGLIST_O) list.o stringlib.o locale.o findcmd.o redir.o \ + print_cmd_on_noexec.o \ pcomplete.o pcomplib.o syntax.o xmalloc.o $(SIGNAMES_O) # Where the source code of the shell builtins resides. @@ -1055,6 +1058,7 @@ variables.o: pcomplete.h ${BASHINCDIR}/chartypes.h variables.o: ${BASHINCDIR}/posixtime.h assoc.h version.o: conftypes.h patchlevel.h version.h xmalloc.o: config.h bashtypes.h ${BASHINCDIR}/ansi_stdlib.h error.h +print_cmd_on_noexec.o: bashtypes.h command.h config.h general.h print_cmd_on_noexec.h # job control diff --git a/bash/eval.c b/bash/eval.c index 9011e0b..9db35d5 100644 --- a/bash/eval.c +++ b/bash/eval.c @@ -138,6 +138,8 @@ reader_loop () { if (interactive_shell == 0 && read_but_dont_execute) { + if (read_and_print_but_dont_execute) + print_cmd_on_noexec(global_command); last_command_exit_value = EXECUTION_SUCCESS; dispose_command (global_command); global_command = (COMMAND *)NULL; diff --git a/bash/print_cmd_on_noexec.c b/bash/print_cmd_on_noexec.c new file mode 100644 index 0000000..5af0216 --- /dev/null +++ b/bash/print_cmd_on_noexec.c @@ -0,0 +1,126 @@ +#include "print_cmd_on_noexec.h" + + + + +/*TODO: instead of duplicating builtins, these should be moved from mkinternals.c + to an includable header +*/ +char *builtins[] = { + ":", ".", "alias", "alias", "bg", "break", "cd", "command", "continue", + "declare", "eval", "exec", "exit", "export", "export", "false", "fc", + "fg", "getopts", "jobs", "kill", "local", "newgrp", "pwd", "read", + "readonly", "readonly", "return", "set", "shift", "source", "times", + "trap", "true", "typeset", "umask", "unalias", "unset", "wait", + }; + +char *specials[] = { + ".", "eval", "nohup", + //TODO: how to handle parameters? + "bash", "exec", "invoke-rc.d", "ionice", "nice", "sh", + }; + +/*TODO: apparently not yet covered: + < <(foo) + case $(bar) in + a=$(( $(foo) + $(bar) )) + $(foo) +*/ +void print_cmd_on_noexec(COMMAND *cmd) { + int i; + WORD_LIST *wl = NULL; + + if (cmd != NULL) { + switch (cmd->type) { +#if defined (DPAREN_ARITHMETIC) + case cm_arith: + //TODO: implement? + printf("cm_arith\n"); + break; +#endif +#if defined (ARITH_FOR_COMMAND) + case cm_arith_for: + printf("cm_arith_for\n"); + print_cmd_on_noexec(cmd->value.ArithFor->action); + //TODO: implement further members? + break; +#endif + case cm_case: + //TODO: handle this how?! + printf("cm_case: %s\n", cmd->value.Case->word->word); + //TODO: print pattern_list? + break; +#if defined (COND_COMMAND) + case cm_cond: + //TODO: implement? + printf("cm_cond\n"); + break; +#endif + case cm_connection: + printf("cm_connection\n"); + print_cmd_on_noexec(cmd->value.Connection->first); + print_cmd_on_noexec(cmd->value.Connection->second); + break; + case cm_coproc: + printf("cm_coproc\n"); + print_cmd_on_noexec(cmd->value.Coproc->command); + break; + case cm_for: + printf("cm_for\n"); + print_cmd_on_noexec(cmd->value.For->action); + break; + case cm_function_def: + printf("cm_function_def: %s\n", cmd->value.Function_def->name->word); + print_cmd_on_noexec(cmd->value.Function_def->command); + break; + case cm_group: + printf("cm_group\n"); + print_cmd_on_noexec(cmd->value.Group->command); + break; + case cm_if: + printf("cm_if\n"); + print_cmd_on_noexec(cmd->value.If->test); + print_cmd_on_noexec(cmd->value.If->true_case); + print_cmd_on_noexec(cmd->value.If->false_case); + break; +#if defined (SELECT_COMMAND) + case cm_select: + printf("cm_select\n"); + print_cmd_on_noexec(cmd->value.Select->action); + break; +#endif + case cm_simple: + wl = cmd->value.Simple->words; + if (wl->word->flags && W_ASSIGNMENT) { + printf("cm_simple assignment: %s\n", wl->word->word); + } else { + for (i=0; specials[i]; i++) { + if (strcmp(wl->word->word, specials[i]) == 0) { + printf("cm_simple %s: %s\n", specials[i], wl->word->word); + return; + } + } + if (strcmp(wl->word->word, builtins[i]) == 0) { + //FIXME: filter bash internals or at least represent them differently + printf("cm_simple internal: %s\n", wl->word->word); + } else { + printf("cm_simple exec: %s\n", wl->word->word); + } + } + break; + case cm_subshell: + printf("cm_subshell\n"); + print_cmd_on_noexec(cmd->value.Subshell->command); + break; + case cm_until: + //TODO: implement? + printf("cm_until\n"); + break; + case cm_while: + printf("cm_while\n"); + print_cmd_on_noexec(cmd->value.While->test); + print_cmd_on_noexec(cmd->value.While->action); + break; + } + } +} diff --git a/bash/print_cmd_on_noexec.h b/bash/print_cmd_on_noexec.h new file mode 100755 index 0000000..cce77a3 --- /dev/null +++ b/bash/print_cmd_on_noexec.h @@ -0,0 +1,30 @@ +#ifndef _INSPECT_CMD_H +#define _INSPECT_CMD_H + + + + +#include <stdio.h> +/* order matters :-( */ +#include "config.h" +#include "bashtypes.h" +#include "command.h" +#include "general.h" + + + + +/* provide some luxury for malloc */ +#define REALLOC(ptr, size) if(((ptr) = realloc(ptr, size)) == NULL) { perror("error: realloc() "); } +#define MIN(a, b) (a < b) ? a : b + + + + +/* prototypes */ +void inspect_cmd(COMMAND *cmd); + + + + +#endif diff --git a/bash/testsh b/bash/testsh new file mode 100755 index 0000000..07f26ca --- /dev/null +++ b/bash/testsh @@ -0,0 +1,16 @@ +#!/bin/bash +a() { + b +} +while c +do + d +done < <(e) +echo $(f) +$(g) +`h` + +case i in +j) k ;; +$(echo l)) m;; +esac -- 1.7.2.5