(2nd try, after 5hrs the first mail still didn't go through but didn't
produce any non-DSN either)


Hi everyone,

I'm maintaining a collection of shell scripts in some 50 different
debian packages. Some of them have no dependencies (with respect to
external commands), some of them depend on other debian packages and
some depend on each other.

I have grown sick of dependency management, yet I want it to be clean
and - newly - automated as far as possible. There are a hand full of
perl scripts out there to parse bash scripts in a "print every 1st word
on a line or whatever stands after a semicolon or a pipe" kind of way
which didn't satisfy me at all.

So I thought why re-invent the wheel...bash already has it's own parser,
why not use it to print every would-be-issued-command?

I did some hacking and came up with the attached patches, which
- add a new command-line option -N (implying -n)
- acts like a "verbose noexec" flag: walk the syntax tree and print each
command with its associated command type.


Currently it's in POC state and I'd like to hear your thoughts and I
have a few questions of my own:
- Would this feature be considered to be included upstream?
- What needs adjustment to be considered (style, technical reasons)?


There are some constructs which still aren't covered and I'm gonna need
help with:
1   a=$(( $(foo) + $(bar) ))
2   $(foo)
3   `foo`
4   case $(foo) in
5   < <(foo)

- 1 through 3 show up as simple_commands with the assignment flag set
but are not further evaluated by the parser (I guess that happens at
runtime...)
- 3 shows up as case_cmd, however unevaluated otherwise as above
- 5 doesn't show up at all in the output of ./bash-patched -nN
./testscript.sh


I don't have any knowledge of the inner workings of parser.y let alone
how to use it or how bash interacts with it - I just happened to have
found the right place to hook into...do you have an idea how to cover these?

Thanks
Daniel

>From 698c8ae89e1bbea8d047d48c66067b09e0c38dc9 Mon Sep 17 00:00:00 2001
From: Daniel Reichelt <hack...@nachtgeist.net>
Date: Sun, 31 Mar 2013 15:08:49 +0200
Subject: [PATCH 1/2] add flag 'N'

---
 bash/flags.c |   16 +++++++++++++++-
 bash/flags.h |    2 +-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/bash/flags.c b/bash/flags.c
index d3b38ad..fd78008 100644
--- a/bash/flags.c
+++ b/bash/flags.c
@@ -75,6 +75,11 @@ int place_keywords_in_env = 0;
    destructive. */
 int read_but_dont_execute = 0;
 
+/* Non-zero means print commands if read_and_print_but_dont_execute is non-zero.
+   This is useful for static analysis of commands to be executed in a
+   script */
+int read_and_print_but_dont_execute = 0;
+
 /* Non-zero means end of file is after one command. */
 int just_one_command = 0;
 
@@ -184,6 +189,7 @@ const struct flags_alist shell_flags[] = {
   { 'm', &jobs_m_flag },
 #endif /* JOB_CONTROL */
   { 'n', &read_but_dont_execute },
+  { 'N', &read_and_print_but_dont_execute },
   { 'p', &privileged_mode },
 #if defined (RESTRICTED_SHELL)
   { 'r', &restricted },
@@ -274,6 +280,14 @@ change_flag (flag, on_or_off)
 	read_but_dont_execute = 0;
       break;
 
+	case 'N': /* set 'n' as well */
+		if (interactive_shell) {
+			read_but_dont_execute = 0;
+		} else {
+			read_but_dont_execute = 1;
+		}
+		break;
+
     case 'p':
       if (on_or_off == FLAG_OFF)
 	disable_priv_mode ();
@@ -317,7 +331,7 @@ void
 reset_shell_flags ()
 {
   mark_modified_vars = exit_immediately_on_error = disallow_filename_globbing = 0;
-  place_keywords_in_env = read_but_dont_execute = just_one_command = 0;
+  place_keywords_in_env = read_but_dont_execute = read_and_print_but_dont_execute = just_one_command = 0;
   noclobber = unbound_vars_is_error = echo_input_at_read = 0;
   echo_command_at_execute = jobs_m_flag = forced_interactive = 0;
   no_symbolic_links = no_invisible_vars = privileged_mode = pipefail_opt = 0;
diff --git a/bash/flags.h b/bash/flags.h
index d8fa757..aab4291 100644
--- a/bash/flags.h
+++ b/bash/flags.h
@@ -42,7 +42,7 @@ extern char optflags[];
 
 extern int
   mark_modified_vars, exit_immediately_on_error, disallow_filename_globbing,
-  place_keywords_in_env, read_but_dont_execute,
+  place_keywords_in_env, read_but_dont_execute, read_and_print_but_dont_execute,
   just_one_command, unbound_vars_is_error, echo_input_at_read,
   echo_command_at_execute, no_invisible_vars, noclobber,
   hashing_enabled, forced_interactive, privileged_mode,
-- 
1.7.2.5


>From a0ca52e731716d5339036df0a19f1ed23b598b73 Mon Sep 17 00:00:00 2001
From: Daniel Reichelt <hack...@nachtgeist.net>
Date: Tue, 2 Apr 2013 10:39:11 +0200
Subject: [PATCH 2/2] print command type in eval.c and adjust Makefile

---
 bash/Makefile.in           |    4 ++
 bash/eval.c                |    2 +
 bash/print_cmd_on_noexec.c |  126 ++++++++++++++++++++++++++++++++++++++++++++
 bash/print_cmd_on_noexec.h |   30 ++++++++++
 bash/testsh                |   16 ++++++
 5 files changed, 178 insertions(+), 0 deletions(-)
 create mode 100644 bash/print_cmd_on_noexec.c
 create mode 100755 bash/print_cmd_on_noexec.h
 create mode 100755 bash/testsh

diff --git a/bash/Makefile.in b/bash/Makefile.in
index f01d49c..3a21b1a 100644
--- a/bash/Makefile.in
+++ b/bash/Makefile.in
@@ -418,6 +418,7 @@ CSOURCES = shell.c eval.c parse.y general.c make_cmd.c print_cmd.c y.tab.c \
 	   input.c bashhist.c array.c arrayfunc.c assoc.c sig.c pathexp.c \
 	   unwind_prot.c siglist.c bashline.c bracecomp.c error.c \
 	   list.c stringlib.c locale.c findcmd.c redir.c \
+	   print_cmd_on_noexec.c \
 	   pcomplete.c pcomplib.c syntax.c xmalloc.c
 
 HSOURCES = shell.h flags.h trap.h hashcmd.h hashlib.h jobs.h builtins.h \
@@ -427,6 +428,7 @@ HSOURCES = shell.h flags.h trap.h hashcmd.h hashlib.h jobs.h builtins.h \
 	   subst.h externs.h siglist.h bashhist.h bashline.h bashtypes.h \
 	   array.h arrayfunc.h sig.h mailcheck.h bashintl.h bashjmp.h \
 	   execute_cmd.h parser.h pathexp.h pathnames.h pcomplete.h assoc.h \
+	   print_cmd_on_noexec.h \
 	   $(BASHINCFILES)
 
 SOURCES	 = $(CSOURCES) $(HSOURCES) $(BUILTIN_DEFS)
@@ -446,6 +448,7 @@ OBJECTS	 = shell.o eval.o y.tab.o general.o make_cmd.o print_cmd.o $(GLOBO) \
 	   trap.o input.o unwind_prot.o pathexp.o sig.o test.o version.o \
 	   alias.o array.o arrayfunc.o assoc.o braces.o bracecomp.o bashhist.o \
 	   bashline.o $(SIGLIST_O) list.o stringlib.o locale.o findcmd.o redir.o \
+	   print_cmd_on_noexec.o \
 	   pcomplete.o pcomplib.o syntax.o xmalloc.o $(SIGNAMES_O)
 
 # Where the source code of the shell builtins resides.
@@ -1055,6 +1058,7 @@ variables.o: pcomplete.h  ${BASHINCDIR}/chartypes.h
 variables.o: ${BASHINCDIR}/posixtime.h assoc.h
 version.o:  conftypes.h patchlevel.h version.h
 xmalloc.o: config.h bashtypes.h ${BASHINCDIR}/ansi_stdlib.h error.h
+print_cmd_on_noexec.o: bashtypes.h command.h config.h general.h print_cmd_on_noexec.h
 
 # job control
 
diff --git a/bash/eval.c b/bash/eval.c
index 9011e0b..9db35d5 100644
--- a/bash/eval.c
+++ b/bash/eval.c
@@ -138,6 +138,8 @@ reader_loop ()
 	{
 	  if (interactive_shell == 0 && read_but_dont_execute)
 	    {
+	      if (read_and_print_but_dont_execute)
+		      print_cmd_on_noexec(global_command);
 	      last_command_exit_value = EXECUTION_SUCCESS;
 	      dispose_command (global_command);
 	      global_command = (COMMAND *)NULL;
diff --git a/bash/print_cmd_on_noexec.c b/bash/print_cmd_on_noexec.c
new file mode 100644
index 0000000..5af0216
--- /dev/null
+++ b/bash/print_cmd_on_noexec.c
@@ -0,0 +1,126 @@
+#include "print_cmd_on_noexec.h"
+
+
+
+
+/*TODO: instead of duplicating builtins, these should be moved from mkinternals.c
+	to an includable header
+*/
+char *builtins[] = {
+	":", ".", "alias", "alias", "bg", "break", "cd", "command", "continue",
+	"declare", "eval", "exec", "exit", "export", "export", "false", "fc",
+	"fg", "getopts", "jobs", "kill", "local", "newgrp", "pwd", "read",
+	"readonly", "readonly", "return", "set", "shift", "source", "times",
+	"trap", "true", "typeset", "umask", "unalias", "unset", "wait",
+	};
+
+char *specials[] = {
+		".", "eval", "nohup",
+		//TODO: how to handle parameters?
+		"bash", "exec", "invoke-rc.d", "ionice", "nice", "sh",
+	};
+
+/*TODO: apparently not yet covered:
+ 		< <(foo)
+		case $(bar) in
+		a=$(( $(foo) + $(bar) ))
+		$(foo)
+*/
+void print_cmd_on_noexec(COMMAND *cmd) {
+	int i;
+	WORD_LIST *wl = NULL;
+
+	if (cmd != NULL) {
+		switch (cmd->type) {
+#if defined (DPAREN_ARITHMETIC)
+		case cm_arith:
+			//TODO: implement?
+			printf("cm_arith\n");
+			break;
+#endif
+#if defined (ARITH_FOR_COMMAND)
+		case cm_arith_for:
+			printf("cm_arith_for\n");
+			print_cmd_on_noexec(cmd->value.ArithFor->action);
+			//TODO: implement further members?
+			break;
+#endif
+		case cm_case:
+			//TODO: handle this how?!
+			printf("cm_case: %s\n", cmd->value.Case->word->word);
+			//TODO: print pattern_list?
+			break;
+#if defined (COND_COMMAND)
+		case cm_cond:
+			//TODO: implement?
+			printf("cm_cond\n");
+			break;
+#endif
+		case cm_connection:
+			printf("cm_connection\n");
+			print_cmd_on_noexec(cmd->value.Connection->first);
+			print_cmd_on_noexec(cmd->value.Connection->second);
+			break;
+		case cm_coproc:
+			printf("cm_coproc\n");
+			print_cmd_on_noexec(cmd->value.Coproc->command);
+			break;
+		case cm_for:
+			printf("cm_for\n");
+			print_cmd_on_noexec(cmd->value.For->action);
+			break;
+		case cm_function_def:
+			printf("cm_function_def: %s\n", cmd->value.Function_def->name->word);
+			print_cmd_on_noexec(cmd->value.Function_def->command);
+			break;
+		case cm_group:
+			printf("cm_group\n");
+			print_cmd_on_noexec(cmd->value.Group->command);
+			break;
+		case cm_if:
+			printf("cm_if\n");
+			print_cmd_on_noexec(cmd->value.If->test);
+			print_cmd_on_noexec(cmd->value.If->true_case);
+			print_cmd_on_noexec(cmd->value.If->false_case);
+			break;
+#if defined (SELECT_COMMAND)
+		case cm_select:
+			printf("cm_select\n");
+			print_cmd_on_noexec(cmd->value.Select->action);
+			break;
+#endif
+		case cm_simple:
+			wl = cmd->value.Simple->words;
+			if (wl->word->flags && W_ASSIGNMENT) {
+				printf("cm_simple assignment: %s\n", wl->word->word);
+			} else {
+				for (i=0; specials[i]; i++) {
+					if (strcmp(wl->word->word, specials[i]) == 0) {
+						printf("cm_simple %s: %s\n", specials[i], wl->word->word);
+						return;
+					}
+				}
+				if (strcmp(wl->word->word, builtins[i]) == 0) {
+ 					//FIXME: filter bash internals or at least represent them differently
+					printf("cm_simple internal: %s\n", wl->word->word);
+				} else {
+					printf("cm_simple exec: %s\n", wl->word->word);
+				}
+			}
+			break;
+		case cm_subshell:
+			printf("cm_subshell\n");
+			print_cmd_on_noexec(cmd->value.Subshell->command);
+			break;
+		case cm_until:
+			//TODO: implement?
+			printf("cm_until\n");
+			break;
+		case cm_while:
+			printf("cm_while\n");
+			print_cmd_on_noexec(cmd->value.While->test);
+			print_cmd_on_noexec(cmd->value.While->action);
+			break;
+		}
+	}
+}
diff --git a/bash/print_cmd_on_noexec.h b/bash/print_cmd_on_noexec.h
new file mode 100755
index 0000000..cce77a3
--- /dev/null
+++ b/bash/print_cmd_on_noexec.h
@@ -0,0 +1,30 @@
+#ifndef _INSPECT_CMD_H
+#define _INSPECT_CMD_H
+
+
+
+
+#include <stdio.h>
+/* order matters :-( */
+#include "config.h"
+#include "bashtypes.h"
+#include "command.h"
+#include "general.h"
+
+
+
+
+/* provide some luxury for malloc */
+#define REALLOC(ptr, size) if(((ptr) = realloc(ptr, size)) == NULL) { perror("error: realloc() "); }
+#define MIN(a, b) (a < b) ? a : b
+
+
+
+
+/* prototypes */
+void		inspect_cmd(COMMAND *cmd);
+
+
+
+
+#endif
diff --git a/bash/testsh b/bash/testsh
new file mode 100755
index 0000000..07f26ca
--- /dev/null
+++ b/bash/testsh
@@ -0,0 +1,16 @@
+#!/bin/bash
+a() {
+	b
+}
+while c
+do
+	d
+done < <(e)
+echo $(f)
+$(g)
+`h`
+
+case i in
+j) k ;;
+$(echo l)) m;;
+esac
-- 
1.7.2.5


Reply via email to