From: Sukadev Bhattiprolu <[email protected]>
Date: Fri, 8 Jan 2010 11:30:23 -0800
Subject: [PATCH] pthread3: Ensure thread state is consistent across C/R

Specifically, this test ensures that concurrency, thread-specific-info and
scheduling parameters are consistent across checkpoint/restart.

Signed-off-by: Sukadev Bhattiprolu <[email protected]>
---
 process-tree/Makefile        |    2 +-
 process-tree/pthread3.c      |  376 ++++++++++++++++++++++++++++++++++++++++++
 process-tree/run-pthread3.sh |  205 +++++++++++++++++++++++
 3 files changed, 582 insertions(+), 1 deletions(-)
 create mode 100644 process-tree/pthread3.c
 create mode 100755 process-tree/run-pthread3.sh

diff --git a/process-tree/Makefile b/process-tree/Makefile
index 3d6010d..b043394 100644
--- a/process-tree/Makefile
+++ b/process-tree/Makefile
@@ -1,5 +1,5 @@
 
-targets                = ptree1 pthread1 pthread2
+targets                = ptree1 pthread1 pthread2 pthread3
 
 INCLUDE                = ../libcrtest
 LIBCRTEST      = ../libcrtest/common.o
diff --git a/process-tree/pthread3.c b/process-tree/pthread3.c
new file mode 100644
index 0000000..c025c9c
--- /dev/null
+++ b/process-tree/pthread3.c
@@ -0,0 +1,376 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <wait.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <libcrtest.h>
+#include <pthread.h>
+
+#define        ERROR_EXIT      ((void *)1)
+#define MIN_STACK_SIZE (64 *1024)
+#define LOG_PREFIX     "logs.d/pthread3"
+
+FILE *logfp;
+int num_threads = 4;
+int *tstatus;
+pthread_barrier_t barrier;
+pthread_mutex_t dump_lock;
+pthread_key_t key;
+
+struct thread_info {
+       int tid;
+       int concurrency;
+       void *specific;
+       sigset_t sigmask;
+       int sched_policy;
+       struct sched_param sched_param;
+};
+
+static void usage(char *argv[])
+{
+       printf("%s [h] [-n num-threads]\n", argv[0]);
+       printf("\t <num-threads> # of threads, default 5\n");
+       do_exit(1);
+}
+
+set_thread_info(int tnum)
+{
+       int rc;
+       void *specific;
+       void *sp2;
+
+       specific = (void *)pthread_self();
+
+       rc = pthread_setspecific(key, specific);
+       if (rc < 0) {
+               fprintf(logfp, "%d: pthread_setspecific() failed, rc %d, "
+                               "error %s\n", tnum, rc, strerror(errno));
+               do_exit(1);
+       }
+
+       /*
+        * TODO: Change other fields in tinfo to some non-default value
+        */
+}
+
+get_thread_info(int tnum, struct thread_info *tinfo)
+{
+       int rc;
+
+       tinfo->tid = pthread_self();
+       tinfo->concurrency = pthread_getconcurrency();
+       tinfo->specific = pthread_getspecific(key);
+
+       if (tinfo->specific != (void *)tinfo->tid) {
+               fprintf(logfp, "%d: pthread_getspcific(): expected %p, actual "
+                               "%p\n", tnum, (void *)tinfo->tid,
+                               tinfo->specific);
+               do_exit(1);
+       }
+
+       rc = pthread_sigmask(SIG_SETMASK, NULL, &tinfo->sigmask);
+       if (rc < 0) {
+               fprintf(logfp, "%d: pthread_sigmask() failed, rc %d, "
+                               "error %s\n", tnum, rc, strerror(errno));
+               do_exit(1);
+       }
+
+       rc = pthread_getschedparam(pthread_self(), &tinfo->sched_policy,
+                               &tinfo->sched_param);
+       if (rc < 0) {
+               fprintf(logfp, "%d: pthread_getschedparam() failed, rc %d, "
+                               "error %s\n", tnum, rc, strerror(errno));
+               do_exit(1);
+       }
+}
+
+compare_thread_info(int tnum, struct thread_info *exp_tinfo,
+               struct thread_info *act_tinfo)
+{
+       int rc;
+
+       rc = 0;
+       if (exp_tinfo->tid != act_tinfo->tid) {
+               rc = 1;
+               fprintf(logfp, "thread_info.tid miscompare: expected %p, "
+                               "actual %p\n", (void *)exp_tinfo->tid,
+                               (void *)act_tinfo->tid);
+       }
+
+       if (exp_tinfo->concurrency != act_tinfo->concurrency) {
+               rc = 1;
+               fprintf(logfp, "thread_info.concurrency miscompare: expected "
+                               "%d, actual %d\n", exp_tinfo->concurrency,
+                               act_tinfo->concurrency);
+       }
+
+       if (exp_tinfo->specific != act_tinfo->specific) {
+               rc = 1;
+               fprintf(logfp, "thread_info.specific miscompare: expected "
+                               "%p, actual %p\n", exp_tinfo->specific,
+                               act_tinfo->specific);
+       }
+
+       if (memcmp(&exp_tinfo->sigmask, &act_tinfo->sigmask, sizeof(sigset_t))) 
{
+               rc = 1;
+               fprintf(logfp, "thread_info.sigmask miscompare: \n");
+       }
+
+       if (exp_tinfo->sched_policy != act_tinfo->sched_policy) {
+               rc = 1;
+               fprintf(logfp, "thread_info.sched_policy miscompare: expected "
+                               "%d, actual %d\n", exp_tinfo->sched_policy,
+                               act_tinfo->sched_policy);
+       }
+
+       if (memcmp(&exp_tinfo->sched_param, &act_tinfo->sched_param,
+                               sizeof(struct sched_param))) {
+               rc = 1;
+               fprintf(logfp, "thread_info.sched_param miscompare: expected "
+                               "priority %d, actual %d\n",
+                               exp_tinfo->sched_param.sched_priority,
+                               act_tinfo->sched_param.sched_priority);
+       }
+
+       if (rc)
+               do_exit(1);
+}
+
+
+void *do_work(void *arg)
+{
+       int tnum = (int)arg;
+       int rc;
+       struct thread_info exp_tinfo, act_tinfo;
+
+       memset(&exp_tinfo, 0, sizeof(struct thread_info));
+       memset(&act_tinfo, 0, sizeof(struct thread_info));
+
+       set_thread_info(tnum);
+
+       get_thread_info(tnum, &exp_tinfo);
+
+       fprintf(logfp, "%d: Thread %lu: waiting for checkpoint\n", tnum,
+                       pthread_self());
+       fflush(logfp);
+
+       /*
+        * Inform main-thread we are ready for checkpoint.
+        */
+       rc = pthread_barrier_wait(&barrier);
+       if (rc != PTHREAD_BARRIER_SERIAL_THREAD && rc != 0) {
+               fprintf(logfp, "%d: pthread_barrier_wait() failed, rc %d, "
+                               "error %s\n", tnum, rc, strerror(errno));
+               do_exit(1);
+       }
+
+       /*
+        * Wait for checkpoint/restart.
+        */
+       while(!test_done())
+               sleep(1);
+
+       /*
+        * Collect attributes after checkpoint/restart.
+        */
+       get_thread_info(tnum, &act_tinfo);
+
+       /*
+        * Compare attributes before and after C/R.
+        */
+       compare_thread_info(tnum, &exp_tinfo, &act_tinfo);
+
+       fprintf(logfp, "%d: Thread %lu: exiting, rc 0\n", tnum,
+                       pthread_self());
+       fflush(logfp);
+
+       tstatus[tnum] = 0;
+       pthread_exit((void *)&tstatus[tnum]);
+}
+
+static void create_key(pthread_key_t *key)
+{
+       int rc;
+
+       rc = pthread_key_create(key, NULL);
+       if (rc < 0) {
+               fprintf(logfp, "pthread_key_create() failed, rc %d, error %s\n",
+                               rc, strerror(errno));
+               do_exit(1);
+       }
+}
+
+pthread_attr_t *alloc_thread_attr()
+{
+       int rc;
+       pthread_attr_t *attr;
+
+       attr = malloc(sizeof(pthread_attr_t));
+       if (!attr) {
+               fprintf(logfp, "malloc(attr): error %s\n", strerror(errno));
+               do_exit(1);
+       }
+
+       rc = pthread_attr_init(attr);
+       if (rc < 0) {
+               fprintf(logfp, "pthread_attr_init(): rc %d error %s\n", rc,
+                               strerror(errno));
+               do_exit(1);
+       }
+
+       return attr;
+}
+
+pthread_t *create_threads(int n)
+{
+       int i;
+       int rc;
+       pthread_t *tid_list;
+       pthread_t tid;
+       pthread_attr_t *attr = NULL;
+
+       tid_list = (pthread_t *)malloc(n * sizeof(pthread_t));
+       tstatus = malloc(sizeof(int) * n);
+
+       if (!tid_list || !tstatus) {
+               fprintf(logfp, "malloc() failed, n %d, error %s\n",
+                               n, strerror(errno));
+               do_exit(1);
+       }
+
+       for (i = 0; i < n; i++) {
+               attr = alloc_thread_attr();
+               if (!attr)
+                       do_exit(1);
+
+               rc = pthread_create(&tid, attr, do_work, (void *)i);
+               if (rc < 0) {
+                       fprintf(logfp, "pthread_create(): i %d, rc %d, "
+                                       "error %s\n", i, rc, strerror(errno));
+                       do_exit(1);
+               }
+
+               tid_list[i] = tid;
+       }
+
+       fprintf(logfp, "Created %d threads\n", n);
+       fflush(logfp);
+
+       return tid_list;
+}
+
+int wait_for_threads(pthread_t *tid_list, int n)
+{
+       int i;
+       int rc;
+       int status;
+       int *statusp;
+       int exit_status;
+
+       exit_status = 0;
+       for (i = 0; i < n; i++) {
+               rc = pthread_join(tid_list[i], (void **)&statusp);
+               if (rc < 0) {
+                       fprintf(logfp, "pthread_join() failed, i %d, rc %d "
+                                       "error %s\n", i, rc, strerror(errno));
+                       do_exit(1);
+               }
+
+               fprintf(logfp, "i %d: *statusp %x\n", i, *statusp);
+               fflush(logfp);
+
+               if (*statusp)
+                       exit_status = 1;
+       }
+
+       return exit_status;
+}
+
+main(int argc, char *argv[])
+{
+       int c;
+       int i;
+       int rc;
+       int status;
+       pthread_t *tid_list;
+       char log_file[256];
+
+       sprintf(log_file, "%s.log", LOG_PREFIX);
+
+       if (test_done()) {
+               fprintf(stderr, "Remove %s before running test\n", TEST_DONE);
+               do_exit(1);
+       }
+
+
+       while ((c = getopt(argc, argv, "hn:")) != EOF) {
+               switch (c) {
+               case 'n': num_threads = atoi(optarg); break;
+               case 'h':
+               default:
+                       usage(argv);
+               }
+       };
+
+       logfp = fopen(log_file, "w");
+       if (!logfp) {
+               fprintf(stderr, "fopen(%s) failed, %s\n", log_file,
+                                       strerror(errno));
+               fflush(stderr);
+               do_exit(1);
+       }
+
+       fprintf(stderr, "Redirecting output to %s\n", log_file);
+       fflush(stderr);
+
+       for (i=0; i<100; i++)  {
+               if (fileno(logfp) != i)
+                       close(i);
+       }
+
+
+       /*
+        * Create a barrier which the main-thread can use to determine
+        * when all threads are ready for checkpoint.
+        */
+       rc = pthread_barrier_init(&barrier, NULL, num_threads+1);
+       if (rc < 0) {
+               fprintf(logfp, "pthread_barrier_init() failed, rc %d, "
+                               "error %s\n", rc, strerror(errno));
+               do_exit(1);
+       }
+
+       rc = pthread_mutex_init(&dump_lock, NULL);
+       if (rc) {
+               fprintf(logfp, "pthread_mutex_init() failed, rc %d, error %s\n",
+                               rc, strerror(errno));
+               do_exit(1);
+       }
+
+       create_key(&key);
+
+       tid_list = create_threads(num_threads);
+
+       /*
+        * Wait for everyone to be ready for checkpoint
+        */
+       pthread_barrier_wait(&barrier);
+       if (rc != PTHREAD_BARRIER_SERIAL_THREAD && rc != 0) {
+               fprintf(logfp, "main: pthread_barrier_wait() failed, rc %d, "
+                               "error %s\n", rc, strerror(errno));
+               do_exit(1);
+       }
+
+       /*
+        * Now that we closed the special files and created the threads,
+        * tell any wrapper scripts, we are ready for checkpoint
+        */
+       set_checkpoint_ready();
+
+       rc = wait_for_threads(tid_list, num_threads);
+
+       fprintf(logfp, "Exiting with status %d\n", rc);
+
+       do_exit(rc);
+}
diff --git a/process-tree/run-pthread3.sh b/process-tree/run-pthread3.sh
new file mode 100755
index 0000000..0cffe44
--- /dev/null
+++ b/process-tree/run-pthread3.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+
+source ../common.sh
+
+dir=`mktemp -p . -d -t cr_pthread3_XXXXXXX` || (echo "mktemp failed"; exit 1)
+mkdir $dir
+echo "Using output dir $dir"
+cd $dir
+
+# NOTE: As of ckpt-v15-dev, the --container option to 'ckpt' causes this
+#      test to fail with "container not isolated" message due to the
+#      log-file being shared between the application threads.
+#
+CHECKPOINT="`which checkpoint` --container"
+RESTART=`which restart`
+ECHO="/bin/echo -e"
+
+TEST_CMD="../pthread3"
+TEST_ARGS="-n 4"                       # -n: number of threads
+SCRIPT_LOG="log-run-pthread3"
+TEST_PID_FILE="pid.pthread3";
+
+SNAPSHOT_DIR="snap1.d"
+
+TEST_DONE="test-done"
+CHECKPOINT_FILE="checkpoint-pthread3";
+CHECKPOINT_READY="checkpoint-ready"
+CHECKPOINT_DONE="checkpoint-done"
+
+LOGS_DIR="logs.d"
+
+NS_EXEC="../../ns_exec"
+NS_EXEC_ARGS="-cgpuimP $TEST_PID_FILE"
+
+checkpoint()
+{
+       local pid=$1
+
+       $ECHO "Checkpoint: $CHECKPOINT $pid \> $CHECKPOINT_FILE"
+       $CHECKPOINT $pid > $CHECKPOINT_FILE
+       ret=$?
+       if [ $ret -ne 0 ]; then
+               $ECHO "***** FAIL: Checkpoint of $pid failed"
+               ps -efL |grep $TEST_CMD >> $SCRIPT_LOG
+               exit 1;
+       fi
+}
+
+function wait_for_checkpoint_ready()
+{
+       # Wait for test to finish setup
+       while [ ! -f $CHECKPOINT_READY ]; do
+               $ECHO "\t- Waiting for $CHECKPOINT_READY"
+               sleep 1;
+       done;
+}
+
+function create_container()
+{
+       local pid;
+
+       cmdline="$NS_EXEC $NS_EXEC_ARGS -- $TEST_CMD $TEST_ARGS"
+
+       $ECHO "\t- Creating container:"
+       $ECHO "\t- $cmdline"
+
+       $cmdline &
+
+       wait_for_checkpoint_ready;
+
+       # Find global pid of container-init
+       pid=`cat $TEST_PID_FILE`;
+       if [  "x$pid" == "x" ]; then
+               $ECHO "***** FAIL: Invalid container-init pid $pid"
+               ps -efL |grep $TEST_CMD >> $SCRIPT_LOG
+               exit 1
+       fi
+       $ECHO "Created container with pid $pid" >> $SCRIPT_LOG
+}
+
+function restart_container
+{
+       local ret;
+
+       cmdline="$RESTART --pids --pidns --wait"
+       $ECHO "\t- $cmdline"
+
+       sleep 1
+
+       $cmdline < $CHECKPOINT_FILE >> $SCRIPT_LOG 2>&1 &
+       ret=$?
+
+       if [ $ret -ne 0 ]; then
+               $ECHO "***** FAIL: Restart of $pid failed"
+               ps -efL |grep $TEST_CMD >> $SCRIPT_LOG
+               exit 1;
+       fi
+}
+
+function create_fs_snapshot()
+{
+       # Prepare for snapshot
+       if [ -d $SNAPSHOT_DIR ]; then
+               rm -rf ${SNAPSHOT_DIR}.prev
+               mv $SNAPSHOT_DIR ${SNAPSHOT_DIR}.prev
+               mkdir $SNAPSHOT_DIR
+       fi
+
+       # Snapshot the log files
+       cp ${LOGS_DIR}/* $SNAPSHOT_DIR
+}
+
+function restore_fs_snapshot()
+{
+       # Restore the snapshot after the main process has been killed
+       /bin/cp ${SNAPSHOT_DIR}/* $LOGS_DIR
+}
+
+# Make sure no stray pthread1 from another run is still going
+killall $TEST_CMD > $SCRIPT_LOG 2>&1
+
+if [ ! -d $LOGS_DIR ]; then
+       mkdir $LOGS_DIR
+fi
+
+if [ ! -d $DATA_DIR ]; then
+       mkdir $DATA_DIR
+fi
+
+if [ ! -d $SNAPSHOT_DIR ]; then
+       mkdir $SNAPSHOT_DIR
+fi
+
+if [ ! -f $INPUT_DATA ]; then
+       $FILEIO -C $INPUT_DATA
+fi
+
+> $SCRIPT_LOG;
+cnt=1
+while [ $cnt -lt 15 ]; do
+       $ECHO "===== Iteration $cnt"
+
+       # Remove any 'state' files, start the app and let it tell us
+       # when it is ready
+       rm -f $CHECKPOINT_READY $TEST_DONE $TEST_PID_FILE
+
+       create_container
+       wait_for_checkpoint_ready
+
+       pid=`cat $TEST_PID_FILE`
+
+       $ECHO "\t- Done creating container, cinit-pid $pid"
+
+       ps -efL |grep $TEST_CMD >> $SCRIPT_LOG
+
+       # override default freezerdir
+       if [ -d $freezerdir ]; then
+               rmdir $freezerdir
+       fi
+       freezerdir=$freezermountpoint/$pid
+       freeze_pid $pid
+
+       num_pids1=`ps -efL |grep $TEST_CMD | wc -l`
+
+       create_fs_snapshot
+
+       checkpoint $pid
+
+       touch $CHECKPOINT_DONE
+
+       killall -9 `basename $TEST_CMD`
+
+       thaw
+
+       sleep 3
+
+       restore_fs_snapshot
+
+       restart_container
+
+       sleep 3;
+
+       num_pids2=`ps -efL |grep $TEST_CMD | wc -l`
+       ps -efL |grep $TEST_CMD >> $SCRIPT_LOG
+       $ECHO "\t- num_pids1 $num_pids1, num_pids2 $num_pids2";
+
+       # ns_exec pid is parent-pid of restarted-container-init
+       nspid=`pidof restart`
+
+       if [ "x$nspid" == "x" ]; then
+               $ECHO "***** FAIL: Can't find pid of $RESTART"
+               exit 1;
+       fi
+
+       # End test gracefully
+       touch $TEST_DONE
+
+       $ECHO "\t- Waiting for restarted container to exit (gloabl-pid $nspid)"
+       wait $nspid;
+       ret=$?
+
+       $ECHO "\t- Container exited, status $ret"
+
+       cnt=$((cnt+1))
+done
-- 
1.6.0.4

_______________________________________________
Containers mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel

Reply via email to