This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new e755d64e62f [feature](be jvm monitor)append enable_jvm_monitor in 
be.conf to control jvm monitor. (#35608) (#35764)
e755d64e62f is described below

commit e755d64e62fa6c315c4a833a7dff62a59e2bce66
Author: Mingyu Chen <morning...@163.com>
AuthorDate: Sun Jun 2 00:18:44 2024 +0800

    [feature](be jvm monitor)append enable_jvm_monitor in be.conf to control 
jvm monitor. (#35608) (#35764)
    
    bp #35608
    
    Co-authored-by: daidai <2017501...@qq.com>
---
 be/src/common/config.cpp                       |   3 +
 be/src/common/config.h                         |   3 +
 be/src/util/jvm_metrics.cpp                    | 112 ++++++++++++++++++++-----
 be/src/util/jvm_metrics.h                      |  16 ++--
 regression-test/pipeline/external/conf/be.conf |   3 +
 regression-test/pipeline/p0/conf/be.conf       |   3 +
 regression-test/pipeline/p1/conf/be.conf       |   5 +-
 7 files changed, 116 insertions(+), 29 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 8d8cb8f222e..6d0c866cebe 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1222,6 +1222,9 @@ DEFINE_mInt32(thrift_client_open_num_tries, "1");
 
 DEFINE_mBool(ignore_schema_change_check, "false");
 
+//JVM monitoring enable. To prevent be from crashing due to jvm compatibility 
issues. The default setting is off.
+DEFINE_Bool(enable_jvm_monitor, "false");
+
 // clang-format off
 #ifdef BE_TEST
 // test s3
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 0334cb085c6..b172a3406d6 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1303,6 +1303,9 @@ DECLARE_mInt32(thrift_client_open_num_tries);
 
 DECLARE_mBool(ignore_schema_change_check);
 
+//JVM monitoring enable. To prevent be from crashing due to jvm compatibility 
issues.
+DECLARE_Bool(enable_jvm_monitor);
+
 #ifdef BE_TEST
 // test s3
 DECLARE_String(test_s3_resource);
diff --git a/be/src/util/jvm_metrics.cpp b/be/src/util/jvm_metrics.cpp
index e55cf8f3fbe..fc30d1073ac 100644
--- a/be/src/util/jvm_metrics.cpp
+++ b/be/src/util/jvm_metrics.cpp
@@ -17,10 +17,12 @@
 
 #include "jvm_metrics.h"
 
+#include <util/jni-util.h>
+
 #include <functional>
 
+#include "common/config.h"
 #include "util/metrics.h"
-
 namespace doris {
 
 #define DEFINE_JVM_SIZE_BYTES_METRIC(name, type)                               
      \
@@ -76,15 +78,28 @@ 
DEFINE_COUNTER_METRIC_PROTOTYPE_5ARG(jvm_gc_g1_old_generation_time_ms, MetricUni
 
 const char* JvmMetrics::_s_hook_name = "jvm_metrics";
 
-JvmMetrics::JvmMetrics(MetricRegistry* registry, JNIEnv* env) : 
_jvm_stats(env) {
+JvmMetrics::JvmMetrics(MetricRegistry* registry, JNIEnv* env) {
     DCHECK(registry != nullptr);
     _registry = registry;
 
     _server_entity = _registry->register_entity("server");
     DCHECK(_server_entity != nullptr);
-    if (_jvm_stats.init_complete()) {
+
+    do {
+        if (!doris::config::enable_jvm_monitor) {
+            break;
+        }
+        try {
+            _jvm_stats.init(env);
+        } catch (...) {
+            LOG(WARNING) << "JVM STATS INIT FAIL";
+            break;
+        }
+        if (!_jvm_stats.init_complete()) {
+            break;
+        }
         _server_entity->register_hook(_s_hook_name, 
std::bind(&JvmMetrics::update, this));
-    }
+    } while (false);
 
     INT_GAUGE_METRIC_REGISTER(_server_entity, jvm_heap_size_bytes_max);
     INT_GAUGE_METRIC_REGISTER(_server_entity, jvm_heap_size_bytes_committed);
@@ -117,11 +132,58 @@ JvmMetrics::JvmMetrics(MetricRegistry* registry, JNIEnv* 
env) : _jvm_stats(env)
 }
 
 void JvmMetrics::update() {
-    _jvm_stats.refresh(this);
+    static long fail_count = 0;
+    bool have_exception = false;
+    try {
+        _jvm_stats.refresh(this);
+    } catch (...) {
+        have_exception = true;
+        LOG(WARNING) << "JVM MONITOR UPDATE FAIL!";
+        fail_count++;
+    }
+
+    //When 30 consecutive exceptions occur, turn off jvm information 
collection.
+    if (!have_exception) {
+        fail_count = 0;
+    }
+    if (fail_count >= 30) {
+        LOG(WARNING) << "JVM MONITOR CLOSE!";
+        _jvm_stats.set_complete(false);
+        _server_entity->deregister_hook(_s_hook_name);
+
+        jvm_heap_size_bytes_max->set_value(0);
+        jvm_heap_size_bytes_committed->set_value(0);
+        jvm_heap_size_bytes_used->set_value(0);
+
+        jvm_non_heap_size_bytes_used->set_value(0);
+        jvm_non_heap_size_bytes_committed->set_value(0);
+
+        jvm_young_size_bytes_used->set_value(0);
+        jvm_young_size_bytes_peak_used->set_value(0);
+        jvm_young_size_bytes_max->set_value(0);
+
+        jvm_old_size_bytes_used->set_value(0);
+        jvm_old_size_bytes_peak_used->set_value(0);
+        jvm_old_size_bytes_max->set_value(0);
+
+        jvm_thread_count->set_value(0);
+        jvm_thread_peak_count->set_value(0);
+        jvm_thread_new_count->set_value(0);
+        jvm_thread_runnable_count->set_value(0);
+        jvm_thread_blocked_count->set_value(0);
+        jvm_thread_waiting_count->set_value(0);
+        jvm_thread_timed_waiting_count->set_value(0);
+        jvm_thread_terminated_count->set_value(0);
+
+        jvm_gc_g1_young_generation_count->set_value(0);
+        jvm_gc_g1_young_generation_time_ms->set_value(0);
+        jvm_gc_g1_old_generation_count->set_value(0);
+        jvm_gc_g1_old_generation_time_ms->set_value(0);
+    }
 }
-#include <util/jni-util.h>
 
-jvmStats::jvmStats(JNIEnv* ENV) : env(ENV) {
+void JvmStats::init(JNIEnv* ENV) {
+    env = ENV;
     _managementFactoryClass = 
env->FindClass("java/lang/management/ManagementFactory");
     if (_managementFactoryClass == nullptr) {
         LOG(WARNING)
@@ -244,15 +306,19 @@ jvmStats::jvmStats(JNIEnv* ENV) : env(ENV) {
     LOG(INFO) << "Start JVM monitoring.";
 
     _init_complete = true;
+    return;
 }
 
-#include "jni.h"
-
-void jvmStats::refresh(JvmMetrics* jvm_metrics) {
+void JvmStats::refresh(JvmMetrics* jvm_metrics) {
     if (!_init_complete) {
         return;
     }
-    static_cast<void>(JniUtil::GetJNIEnv(&env));
+
+    Status st = JniUtil::GetJNIEnv(&env);
+    if (!st.ok()) {
+        LOG(WARNING) << "JVM STATS GET JNI ENV FAIL";
+        return;
+    }
 
     jobject memoryMXBeanObj =
             env->CallStaticObjectMethod(_managementFactoryClass, 
_getMemoryMXBeanMethod);
@@ -302,8 +368,8 @@ void jvmStats::refresh(JvmMetrics* jvm_metrics) {
 
         jstring name =
                 (jstring)env->CallObjectMethod(memoryPoolMXBean, 
_getMemoryPollMXBeanNameMethod);
-        const char* nameStr = env->GetStringUTFChars(name, NULL);
-        if (nameStr != NULL) {
+        const char* nameStr = env->GetStringUTFChars(name, nullptr);
+        if (nameStr != nullptr) {
             auto it = _memoryPoolName.find(nameStr);
             if (it == _memoryPoolName.end()) {
                 continue;
@@ -408,16 +474,22 @@ void jvmStats::refresh(JvmMetrics* jvm_metrics) {
     env->DeleteLocalRef(threadMXBean);
     env->DeleteLocalRef(gcMXBeansList);
 }
-jvmStats::~jvmStats() {
+JvmStats::~JvmStats() {
     if (!_init_complete) {
         return;
     }
-    env->DeleteLocalRef(_newThreadStateObj);
-    env->DeleteLocalRef(_runnableThreadStateObj);
-    env->DeleteLocalRef(_blockedThreadStateObj);
-    env->DeleteLocalRef(_waitingThreadStateObj);
-    env->DeleteLocalRef(_timedWaitingThreadStateObj);
-    env->DeleteLocalRef(_terminatedThreadStateObj);
+    try {
+        env->DeleteLocalRef(_newThreadStateObj);
+        env->DeleteLocalRef(_runnableThreadStateObj);
+        env->DeleteLocalRef(_blockedThreadStateObj);
+        env->DeleteLocalRef(_waitingThreadStateObj);
+        env->DeleteLocalRef(_timedWaitingThreadStateObj);
+        env->DeleteLocalRef(_terminatedThreadStateObj);
+
+    } catch (...) {
+        // When be is killed, DeleteLocalRef may fail.
+        // In order to exit more gracefully, we catch the exception here.
+    }
 }
 
 } // namespace doris
diff --git a/be/src/util/jvm_metrics.h b/be/src/util/jvm_metrics.h
index 5f9929d8cf0..459a3cbf938 100644
--- a/be/src/util/jvm_metrics.h
+++ b/be/src/util/jvm_metrics.h
@@ -17,8 +17,6 @@
 
 #pragma once
 
-#include <jni.h>
-
 #include "jni.h"
 #include "util/jni-util.h"
 #include "util/metrics.h"
@@ -27,7 +25,7 @@ namespace doris {
 
 class JvmMetrics;
 
-class jvmStats {
+class JvmStats {
 private:
     JNIEnv* env = nullptr;
     jclass _managementFactoryClass = nullptr;
@@ -98,16 +96,18 @@ private:
     bool _init_complete = false;
 
 public:
-    jvmStats(JNIEnv* ENV);
-    bool init_complete() { return _init_complete; }
+    //    JvmStats(JNIEnv* ENV);
+    void init(JNIEnv* ENV);
+    bool init_complete() const { return _init_complete; }
+    void set_complete(bool val) { _init_complete = val; }
     void refresh(JvmMetrics* jvm_metrics);
-    ~jvmStats();
+    ~JvmStats();
 };
 
 class JvmMetrics {
 public:
     JvmMetrics(MetricRegistry* registry, JNIEnv* env);
-    ~JvmMetrics() {}
+    ~JvmMetrics() = default;
     void update();
 
     IntGauge* jvm_heap_size_bytes_max = nullptr;
@@ -140,7 +140,7 @@ public:
     IntGauge* jvm_gc_g1_old_generation_time_ms = nullptr;
 
 private:
-    jvmStats _jvm_stats;
+    JvmStats _jvm_stats;
     std::shared_ptr<MetricEntity> _server_entity;
     static const char* _s_hook_name;
     MetricRegistry* _registry = nullptr;
diff --git a/regression-test/pipeline/external/conf/be.conf 
b/regression-test/pipeline/external/conf/be.conf
index 9a5b3641b84..2bd810e55cc 100644
--- a/regression-test/pipeline/external/conf/be.conf
+++ b/regression-test/pipeline/external/conf/be.conf
@@ -70,3 +70,6 @@ fragment_pool_thread_num_max=5000
 enable_fuzzy_mode=true
 enable_set_in_bitmap_value=true
 enable_feature_binlog=true
+
+enable_jvm_monitor = true
+
diff --git a/regression-test/pipeline/p0/conf/be.conf 
b/regression-test/pipeline/p0/conf/be.conf
index 15f19ec4f42..b5d6944acae 100644
--- a/regression-test/pipeline/p0/conf/be.conf
+++ b/regression-test/pipeline/p0/conf/be.conf
@@ -82,3 +82,6 @@ user_files_secure_path=/
 enable_debug_points=true
 # debug scanner context dead loop
 enable_debug_log_timeout_secs=0
+
+enable_jvm_monitor = true
+
diff --git a/regression-test/pipeline/p1/conf/be.conf 
b/regression-test/pipeline/p1/conf/be.conf
index e1ae9653c78..0c450c9281e 100644
--- a/regression-test/pipeline/p1/conf/be.conf
+++ b/regression-test/pipeline/p1/conf/be.conf
@@ -71,4 +71,7 @@ fragment_pool_thread_num_max=5000
 enable_fuzzy_mode=true
 enable_set_in_bitmap_value=true
 enable_feature_binlog=true
-max_sys_mem_available_low_water_mark_bytes=69206016
\ No newline at end of file
+max_sys_mem_available_low_water_mark_bytes=69206016
+
+enable_jvm_monitor = true
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to