This is an automated email from the ASF dual-hosted git repository.
hello-stephen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ff954cc45f3 [test](pipeline) collect Cloud P0 resource usage (#65125)
ff954cc45f3 is described below
commit ff954cc45f3fd568c80dcee0ce75877ade8fba9c
Author: shuke <[email protected]>
AuthorDate: Fri Jul 3 11:02:21 2026 +0800
[test](pipeline) collect Cloud P0 resource usage (#65125)
## Proposed changes
- Sample system resource usage every minute during Cloud P0 regression
runs.
- Save snapshots to output/regression-test/log/system_resource_usage.log
so they are archived with Doris logs.
- Capture free, meminfo, top 10 processes by RSS/CPU, top output, and a
final snapshot before archive.
## Testing
- bash -n regression-test/pipeline/common/doris-utils.sh
- bash -n regression-test/pipeline/cloud_p0/run.sh
- git diff --check
---
regression-test/pipeline/cloud_p0/run.sh | 3 ++
regression-test/pipeline/common/doris-utils.sh | 75 ++++++++++++++++++++++++++
2 files changed, 78 insertions(+)
diff --git a/regression-test/pipeline/cloud_p0/run.sh
b/regression-test/pipeline/cloud_p0/run.sh
index 7728cd94698..c659af89141 100644
--- a/regression-test/pipeline/cloud_p0/run.sh
+++ b/regression-test/pipeline/cloud_p0/run.sh
@@ -49,6 +49,8 @@ need_collect_log=false
# monitoring the log files in "${DORIS_HOME}"/regression-test/log/ for keyword
'Reach limit of connections'
_monitor_regression_log &
+start_system_resource_monitor
+trap 'stop_system_resource_monitor' EXIT
# shellcheck disable=SC2329
run() {
@@ -114,6 +116,7 @@ timeout_minutes=$((${repeat_times_from_trigger:-1} *
${BUILD_TIMEOUT_MINUTES:-18
timeout "${timeout_minutes}" bash -cx run
exit_flag="$?"
if print_running_pipeline_tasks; then :; fi
+stop_system_resource_monitor
# shellcheck source=/dev/null
source "$(cd "${teamcity_build_checkoutDir}" && bash
"${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh
'get')"
if get_jstack_and_jmap_of_fe; then echo "INFO: get_jstack_and_jmap_of_fe
done."; fi
diff --git a/regression-test/pipeline/common/doris-utils.sh
b/regression-test/pipeline/common/doris-utils.sh
index 00b6bc9f40a..aa3ee55dc39 100644
--- a/regression-test/pipeline/common/doris-utils.sh
+++ b/regression-test/pipeline/common/doris-utils.sh
@@ -662,6 +662,81 @@ _monitor_regression_log() {
}
+_append_system_resource_snapshot() {
+ local output_file="$1"
+ local top_n="${SYSTEM_RESOURCE_MONITOR_TOP_N:-10}"
+ {
+ echo "==================== $(date '+%Y-%m-%d %H:%M:%S %z')
===================="
+ echo "[uptime]"
+ uptime || true
+ echo
+ echo "[free -h]"
+ free -h || true
+ echo
+ echo "[/proc/meminfo]"
+ grep -E
'^(MemTotal|MemFree|MemAvailable|Buffers|Cached|SwapTotal|SwapFree|Slab|SReclaimable|SUnreclaim):'
/proc/meminfo || true
+ echo
+ echo "[ps sorted by rss]"
+ ps -eo pid,ppid,rss,vsz,%mem,%cpu,comm,args --sort=-rss | head -n
"$((top_n + 1))" || true
+ echo
+ echo "[ps sorted by cpu]"
+ ps -eo pid,ppid,rss,vsz,%mem,%cpu,comm,args --sort=-%cpu | head -n
"$((top_n + 1))" || true
+ echo
+ if command -v top >/dev/null; then
+ echo "[top]"
+ COLUMNS=240 top -b -n 1 -w 240 | head -n "$((top_n + 7))" || true
+ echo
+ fi
+ } >>"${output_file}" 2>&1
+}
+
+start_system_resource_monitor() {
+ if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi
+ if [[ -n "${SYSTEM_RESOURCE_MONITOR_PID:-}" ]] && kill -0
"${SYSTEM_RESOURCE_MONITOR_PID}" 2>/dev/null; then
+ echo "INFO: system resource monitor already started,
pid=${SYSTEM_RESOURCE_MONITOR_PID}"
+ return 0
+ fi
+
+ local interval="${SYSTEM_RESOURCE_MONITOR_INTERVAL_SECONDS:-60}"
+ local output_file="${DORIS_HOME}/be/log/system_resource_usage.log"
+ mkdir -p "$(dirname "${output_file}")"
+ echo "INFO: start system resource monitor, interval=${interval}s,
output=${output_file}"
+ (
+ set +e
+ monitor_sleep_pid=""
+ monitor_cleanup() {
+ if [[ -n "${monitor_sleep_pid}" ]]; then
+ kill "${monitor_sleep_pid}" 2>/dev/null || true
+ wait "${monitor_sleep_pid}" 2>/dev/null || true
+ fi
+ exit 0
+ }
+ trap monitor_cleanup TERM INT
+ while true; do
+ _append_system_resource_snapshot "${output_file}"
+ sleep "${interval}" &
+ monitor_sleep_pid="$!"
+ wait "${monitor_sleep_pid}"
+ monitor_sleep_pid=""
+ done
+ ) &
+ SYSTEM_RESOURCE_MONITOR_PID="$!"
+ export SYSTEM_RESOURCE_MONITOR_PID
+}
+
+stop_system_resource_monitor() {
+ if [[ -z "${SYSTEM_RESOURCE_MONITOR_PID:-}" ]]; then return 0; fi
+ if kill -0 "${SYSTEM_RESOURCE_MONITOR_PID}" 2>/dev/null; then
+ kill "${SYSTEM_RESOURCE_MONITOR_PID}" 2>/dev/null || true
+ wait "${SYSTEM_RESOURCE_MONITOR_PID}" 2>/dev/null || true
+ echo "INFO: stop system resource monitor,
pid=${SYSTEM_RESOURCE_MONITOR_PID}"
+ fi
+ if [[ -d "${DORIS_HOME:-}" ]]; then
+ _append_system_resource_snapshot
"${DORIS_HOME}/be/log/system_resource_usage.log"
+ fi
+ unset SYSTEM_RESOURCE_MONITOR_PID
+}
+
_redact_creds() {
local expr="" v escaped
for v in "${hwYunAk:-}" "${hwYunSk:-}" "${s3SourceAk:-}" "${s3SourceSk:-}"
"${txYunAk:-}" "${txYunSk:-}"; do
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]