[ 
https://issues.apache.org/jira/browse/FLINK-32668?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

ASF GitHub Bot updated FLINK-32668:
-----------------------------------
    Labels: pull-request-available  (was: )

> fix up watchdog timeout error msg  in common.sh(e2e test) 
> ----------------------------------------------------------
>
>                 Key: FLINK-32668
>                 URL: https://issues.apache.org/jira/browse/FLINK-32668
>             Project: Flink
>          Issue Type: Bug
>          Components: Build System / CI
>    Affects Versions: 1.16.2, 1.18.0, 1.17.1
>            Reporter: Hongshun Wang
>            Assignee: Hongshun Wang
>            Priority: Minor
>              Labels: pull-request-available
>         Attachments: image-2023-07-25-15-27-37-441.png
>
>
> When run e2e test, an error like this occrurs:
> !image-2023-07-25-15-27-37-441.png|width=733,height=115!
>  
> The corresponding code:
> {code:java}
> kill_test_watchdog() {
>     local watchdog_pid=$(cat $TEST_DATA_DIR/job_watchdog.pid)
>     echo "Stopping job timeout watchdog (with pid=$watchdog_pid)"
>     kill $watchdog_pid
> } 
> internal_run_with_timeout() {
>     local timeout_in_seconds="$1"
>     local on_failure="$2"
>     local command_label="$3"
>     local command="${@:4}"
>     on_exit kill_test_watchdog
>    (
>            command_pid=$BASHPID
>            (sleep "${timeout_in_seconds}" # set a timeout for this command
>             echo "${command_label:-"The command '${command}'"} (pid: 
> $command_pid) did not finish after $timeout_in_seconds seconds."
> eval "${on_failure}"
>            kill "$command_pid") & watchdog_pid=$!
>            echo $watchdog_pid > $TEST_DATA_DIR/job_watchdog.pid
>            # invoke
>           $command
>   )
> }{code}
>  
> When {{$command}} completes before the timeout, the watchdog process is 
> killed successfully. However, when {{$command}} times out, the watchdog 
> process kills {{$command}} and then exits itself, leaving behind an error 
> message when trying to kill its own process ID with {{{}kill 
> $watchdog_pid{}}}.This error msg "no such process" is hard to understand.
>  
> So, I will modify like this with better error message:
>  
> {code:java}
> kill_test_watchdog() {
>       local watchdog_pid=$(cat $TEST_DATA_DIR/job_watchdog.pid)
>       if kill -0 $watchdog_pid > /dev/null 2>&1; then
>            echo "Stopping job timeout watchdog (with pid=$watchdog_pid)"
>            kill $watchdog_pid
>       else
>             echo "[ERROR] Test is timeout"
>             exit 1       
>       fi
> } {code}
>  
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to