[doris] branch master updated: remove gc and fix print (#12682)

dataroaring Fri, 16 Sep 2022 09:16:33 -0700

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 42b6532131 remove gc and fix print (#12682)
42b6532131 is described below

commit 42b6532131487b3fa218c388330750a2a8616809
Author: Xinyi Zou <zouxiny...@gmail.com>
AuthorDate: Sat Sep 17 00:16:15 2022 +0800

    remove gc and fix print (#12682)
---
 be/src/common/daemon.cpp                           |  1 +
 be/src/runtime/disk_io_mgr.cc                      |  6 --
 be/src/runtime/memory/mem_tracker.cpp              |  9 ++-
 be/src/runtime/memory/mem_tracker_limiter.cpp      | 89 +++++++---------------
 be/src/runtime/memory/mem_tracker_limiter.h        | 73 ++++++------------
 be/src/service/doris_main.cpp                      |  2 +
 docs/en/docs/admin-manual/data-admin/backup.md     |  2 +-
 .../maint-monitor/tablet-repair-and-balance.md     |  4 +-
 docs/en/docs/data-operate/export/export-manual.md  |  2 +-
 docs/zh-CN/docs/admin-manual/data-admin/backup.md  |  2 +-
 .../maint-monitor/tablet-repair-and-balance.md     |  2 +-
 .../docs/data-operate/export/export-manual.md      |  2 +-
 12 files changed, 67 insertions(+), 127 deletions(-)

diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp
index 4fb1abe628..64f1e509fb 100644
--- a/be/src/common/daemon.cpp
+++ b/be/src/common/daemon.cpp
@@ -68,6 +68,7 @@ namespace doris {
 bool k_doris_exit = false;
 
 void Daemon::tcmalloc_gc_thread() {
+    // TODO All cache GC wish to be supported
     while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(10))) 
{
         size_t used_size = 0;
         size_t free_size = 0;
diff --git a/be/src/runtime/disk_io_mgr.cc b/be/src/runtime/disk_io_mgr.cc
index e56c4bff69..1f506d82b1 100644
--- a/be/src/runtime/disk_io_mgr.cc
+++ b/be/src/runtime/disk_io_mgr.cc
@@ -348,12 +348,6 @@ DiskIoMgr::~DiskIoMgr() {
 
 Status DiskIoMgr::init(const int64_t mem_limit) {
     _mem_tracker = std::make_unique<MemTrackerLimiter>(mem_limit, "DiskIO");
-    // If we hit the process limit, see if we can reclaim some memory by 
removing
-    // previously allocated (but unused) io buffers.
-    // TODO(zxy) After clearing the free buffer, how much impact will it have 
on subsequent
-    // queries may need to be verified.
-    ExecEnv::GetInstance()->process_mem_tracker()->add_gc_function(
-            std::bind<void>(&DiskIoMgr::gc_io_buffers, this, 
std::placeholders::_1));
 
     for (int i = 0; i < _disk_queues.size(); ++i) {
         _disk_queues[i] = new DiskQueue(i);
diff --git a/be/src/runtime/memory/mem_tracker.cpp 
b/be/src/runtime/memory/mem_tracker.cpp
index b282b87758..caf2325386 100644
--- a/be/src/runtime/memory/mem_tracker.cpp
+++ b/be/src/runtime/memory/mem_tracker.cpp
@@ -104,10 +104,11 @@ void 
MemTracker::make_group_snapshot(std::vector<MemTracker::Snapshot>* snapshot
 }
 
 std::string MemTracker::log_usage(MemTracker::Snapshot snapshot) {
-    return fmt::format("MemTracker Label={}, Parent Label={}, Used={}, 
Peak={}", snapshot.label,
-                       snapshot.parent,
-                       PrettyPrinter::print(snapshot.cur_consumption, 
TUnit::BYTES),
-                       PrettyPrinter::print(snapshot.peak_consumption, 
TUnit::BYTES));
+    return fmt::format(
+            "MemTracker Label={}, Parent Label={}, Used={}({} B), Peak={}({} 
B)", snapshot.label,
+            snapshot.parent, PrettyPrinter::print(snapshot.cur_consumption, 
TUnit::BYTES),
+            snapshot.cur_consumption, 
PrettyPrinter::print(snapshot.peak_consumption, TUnit::BYTES),
+            snapshot.peak_consumption);
 }
 
 static std::unordered_map<std::string, std::shared_ptr<MemTracker>> 
global_mem_trackers;
diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp 
b/be/src/runtime/memory/mem_tracker_limiter.cpp
index 6bc7f95c5e..e09e1493c8 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.cpp
+++ b/be/src/runtime/memory/mem_tracker_limiter.cpp
@@ -50,6 +50,8 @@ MemTrackerLimiter::MemTrackerLimiter(int64_t byte_limit, 
const std::string& labe
     MemTrackerLimiter* tracker = this;
     while (tracker != nullptr) {
         _all_ancestors.push_back(tracker);
+        // Process tracker does not participate in the process memory limit, 
process tracker consumption is virtual memory,
+        // and there is a diff between the real physical memory value of the 
process. It is replaced by check_sys_mem_info.
         if (tracker->has_limit() && tracker->label() != "Process")
             _limited_ancestors.push_back(tracker);
         tracker = tracker->_parent.get();
@@ -124,42 +126,6 @@ int64_t MemTrackerLimiter::get_lowest_limit() const {
     return min_limit;
 }
 
-bool MemTrackerLimiter::gc_memory(int64_t max_consumption) {
-    if (max_consumption < 0) return true;
-    std::lock_guard<std::mutex> l(_gc_lock);
-    int64_t pre_gc_consumption = consumption();
-    // Check if someone gc'd before us
-    if (pre_gc_consumption < max_consumption) return false;
-
-    int64_t curr_consumption = pre_gc_consumption;
-    // Free some extra memory to avoid frequent GC, 4M is an empirical value, 
maybe it will be tested later.
-    const int64_t EXTRA_BYTES_TO_FREE = 4L * 1024L * 1024L * 1024L;
-    // Try to free up some memory
-    for (int i = 0; i < _gc_functions.size(); ++i) {
-        // Try to free up the amount we are over plus some extra so that we 
don't have to
-        // immediately GC again. Don't free all the memory since that can be 
unnecessarily
-        // expensive.
-        int64_t bytes_to_free = curr_consumption - max_consumption + 
EXTRA_BYTES_TO_FREE;
-        _gc_functions[i](bytes_to_free);
-        curr_consumption = consumption();
-        if (max_consumption - curr_consumption <= EXTRA_BYTES_TO_FREE) break;
-    }
-
-    return curr_consumption > max_consumption;
-}
-
-Status MemTrackerLimiter::try_gc_memory(int64_t bytes) {
-    if (UNLIKELY(gc_memory(_limit - bytes))) {
-        return Status::MemoryLimitExceeded(fmt::format(
-                "failed_alloc_size={} B, exceeded_tracker={}, limit={} B, 
peak_used={} B, "
-                "current_used={} B",
-                bytes, label(), _limit, _consumption->value(), 
_consumption->current_value()));
-    }
-    VLOG_NOTICE << "GC succeeded, TryConsume bytes=" << bytes
-                << " consumption=" << _consumption->current_value() << " 
limit=" << _limit;
-    return Status::OK();
-}
-
 // Calling this on the query tracker results in output like:
 //
 //  Query(4a4c81fedaed337d:4acadfda00000000) Limit=10.00 GB Total=508.28 MB 
Peak=508.45 MB
@@ -186,10 +152,10 @@ std::string MemTrackerLimiter::log_usage(int 
max_recursive_depth, int64_t* logge
     int64_t peak_consumption = _consumption->value();
     if (logged_consumption != nullptr) *logged_consumption = curr_consumption;
 
-    std::string detail = "MemTrackerLimiter Label={}, Limit={}, Used={}, 
Peak={}, Exceeded={}";
-    detail = fmt::format(detail, _label, PrettyPrinter::print(_limit, 
TUnit::BYTES),
-                         PrettyPrinter::print(curr_consumption, TUnit::BYTES),
-                         PrettyPrinter::print(peak_consumption, TUnit::BYTES),
+    std::string detail =
+            "MemTrackerLimiter Label={}, Limit={}({} B), Used={}({} B), 
Peak={}({} B), Exceeded={}";
+    detail = fmt::format(detail, _label, print_bytes(_limit), _limit, 
print_bytes(curr_consumption),
+                         curr_consumption, print_bytes(peak_consumption), 
peak_consumption,
                          limit_exceeded() ? "true" : "false");
 
     // This call does not need the children, so return early.
@@ -223,17 +189,15 @@ std::string MemTrackerLimiter::log_usage(int 
max_recursive_depth,
         if (!usage_string.empty()) usage_strings.push_back(usage_string);
         *logged_consumption += tracker_consumption;
     }
-    return join(usage_strings, "\n");
+    return usage_strings.size() == 0 ? "" : "\n    " + join(usage_strings, "\n 
   ");
 }
 
 Status MemTrackerLimiter::mem_limit_exceeded_construct(const std::string& msg) 
{
     std::string detail = fmt::format(
-            "{}, backend {} process memory used {}, process limit {}. If is 
query, can "
-            "change the limit "
-            "by `set exec_mem_limit=xxx`, details mem usage see be.INFO.",
-            msg, BackendOptions::get_localhost(),
-            PrettyPrinter::print(PerfCounters::get_vm_rss(), TUnit::BYTES),
-            PrettyPrinter::print(MemInfo::mem_limit(), TUnit::BYTES));
+            "{}. backend {} process memory used {}, limit {}. If query tracker 
exceed, `set "
+            "exec_mem_limit=8G` to change limit, details mem usage see 
be.INFO.",
+            msg, BackendOptions::get_localhost(), 
print_bytes(PerfCounters::get_vm_rss()),
+            print_bytes(MemInfo::mem_limit()));
     return Status::MemoryLimitExceeded(detail);
 }
 
@@ -258,7 +222,7 @@ void MemTrackerLimiter::print_log_usage(const std::string& 
msg) {
 Status MemTrackerLimiter::mem_limit_exceeded(const std::string& msg,
                                              int64_t failed_allocation_size) {
     STOP_CHECK_THREAD_MEM_TRACKER_LIMIT();
-    std::string detail = fmt::format("Memory limit 
exceeded:<consuming_tracker={}, ", _label);
+    std::string detail = fmt::format("Memory limit exceeded:<consuming 
tracker:<{}>, ", _label);
     MemTrackerLimiter* exceeded_tracker = nullptr;
     MemTrackerLimiter* max_consumption_tracker = nullptr;
     int64_t free_size = INT64_MAX;
@@ -281,28 +245,29 @@ Status MemTrackerLimiter::mem_limit_exceeded(const 
std::string& msg,
     MemTrackerLimiter* print_log_usage_tracker = nullptr;
     if (exceeded_tracker != nullptr) {
         detail += fmt::format(
-                "failed_alloc_size={} B, exceeded_tracker={}, limit={} B, 
peak_used={} B, "
-                "current_used={} B>, executing_msg:<{}>",
-                PrettyPrinter::print(failed_allocation_size, TUnit::BYTES),
-                exceeded_tracker->label(), exceeded_tracker->limit(),
-                exceeded_tracker->peak_consumption(), 
exceeded_tracker->consumption(), msg);
+                "failed alloc size {}, exceeded tracker:<{}>, limit {}, peak 
used {}, "
+                "current used {}>, executing msg:<{}>",
+                print_bytes(failed_allocation_size), exceeded_tracker->label(),
+                print_bytes(exceeded_tracker->limit()),
+                print_bytes(exceeded_tracker->peak_consumption()),
+                print_bytes(exceeded_tracker->consumption()), msg);
         print_log_usage_tracker = exceeded_tracker;
     } else if (!sys_exceed_st) {
-        detail += fmt::format("{}>, executing_msg:<{}>", 
sys_exceed_st.get_error_msg(), msg);
+        detail += fmt::format("{}>, executing msg:<{}>", 
sys_exceed_st.get_error_msg(), msg);
     } else if (max_consumption_tracker != nullptr) {
         // must after check_sys_mem_info false
         detail += fmt::format(
-                "failed_alloc_size={} B, max_consumption_tracker={}, limit={} 
B, peak_used={} B, "
-                "current_used={} B>, executing_msg:<{}>",
-                PrettyPrinter::print(failed_allocation_size, TUnit::BYTES),
-                max_consumption_tracker->label(), 
max_consumption_tracker->limit(),
-                max_consumption_tracker->peak_consumption(), 
max_consumption_tracker->consumption(),
-                msg);
+                "failed alloc size {}, max consumption tracker:<{}>, limit {}, 
peak used {}, "
+                "current used {}>, executing msg:<{}>",
+                print_bytes(failed_allocation_size), 
max_consumption_tracker->label(),
+                print_bytes(max_consumption_tracker->limit()),
+                print_bytes(max_consumption_tracker->peak_consumption()),
+                print_bytes(max_consumption_tracker->consumption()), msg);
         print_log_usage_tracker = max_consumption_tracker;
     } else {
         // The limit of the current tracker and parents is less than 0, the 
consume will not fail,
         // and the current process memory has no excess limit.
-        detail += fmt::format("unknown exceed reason, executing_msg:<{}>", 
msg);
+        detail += fmt::format("unknown exceed reason, executing msg:<{}>", 
msg);
         print_log_usage_tracker = 
ExecEnv::GetInstance()->process_mem_tracker_raw();
     }
     auto st = MemTrackerLimiter::mem_limit_exceeded_construct(detail);
@@ -316,7 +281,7 @@ Status MemTrackerLimiter::mem_limit_exceeded(const 
std::string& msg,
                                              Status failed_try_consume_st) {
     STOP_CHECK_THREAD_MEM_TRACKER_LIMIT();
     std::string detail =
-            fmt::format("Memory limit exceeded:<consuming_tracker={}, {}>, 
executing_msg:<{}>",
+            fmt::format("Memory limit exceeded:<consuming tracker:<{}>, {}>, 
executing msg:<{}>",
                         _label, failed_try_consume_st.get_error_msg(), msg);
     auto st = MemTrackerLimiter::mem_limit_exceeded_construct(detail);
     failed_tracker->print_log_usage(st.get_error_msg());
diff --git a/be/src/runtime/memory/mem_tracker_limiter.h 
b/be/src/runtime/memory/mem_tracker_limiter.h
index 7c79442261..5b23d4e0d6 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.h
+++ b/be/src/runtime/memory/mem_tracker_limiter.h
@@ -71,8 +71,9 @@ public:
         // for fast, expect MemInfo::initialized() to be true.
         if (PerfCounters::get_vm_rss() + bytes >= MemInfo::mem_limit()) {
             auto st = Status::MemoryLimitExceeded(
-                    "process memory used {} B, exceed limit {} B, 
failed_alloc_size={} B",
-                    PerfCounters::get_vm_rss(), MemInfo::mem_limit(), bytes);
+                    "process memory used {}, exceed limit {}, failed alloc 
size {}",
+                    print_bytes(PerfCounters::get_vm_rss()), 
print_bytes(MemInfo::mem_limit()),
+                    print_bytes(bytes));
             
ExecEnv::GetInstance()->process_mem_tracker_raw()->print_log_usage(st.get_error_msg());
             return st;
         }
@@ -108,21 +109,6 @@ public:
     // Returns the lowest limit for this tracker limiter and its ancestors. 
Returns -1 if there is no limit.
     int64_t get_lowest_limit() const;
 
-    typedef std::function<void(int64_t bytes_to_free)> GcFunction;
-    // Add a function 'f' to be called if the limit is reached, if none of the 
other
-    // previously-added GC functions were successful at freeing up enough 
memory.
-    // 'f' does not need to be thread-safe as long as it is added to only one 
tracker limiter.
-    // Note that 'f' must be valid for the lifetime of this tracker limiter.
-    void add_gc_function(GcFunction f) { _gc_functions.push_back(f); }
-
-    // TODO Should be managed in a separate process_mem_mgr, not in MemTracker
-    // If consumption is higher than max_consumption, attempts to free memory 
by calling
-    // any added GC functions.  Returns true if max_consumption is still 
exceeded. Takes gc_lock.
-    // Note: If the cache of segment/chunk is released due to insufficient 
query memory at a certain moment,
-    // the performance of subsequent queries may be degraded, so the use of gc 
function should be careful enough.
-    bool gc_memory(int64_t max_consumption);
-    Status try_gc_memory(int64_t bytes);
-
 public:
     // up to (but not including) end_tracker.
     // This happens when we want to update tracking on a particular mem 
tracker but the consumption
@@ -147,6 +133,7 @@ public:
     // Limiting the recursive depth reduces the cost of dumping, particularly
     // for the process tracker limiter.
     std::string log_usage(int max_recursive_depth = INT_MAX, int64_t* 
logged_consumption = nullptr);
+    void print_log_usage(const std::string& msg);
 
     // Log the memory usage when memory limit is exceeded and return a status 
object with
     // msg of the allocation which caused the limit to be exceeded.
@@ -169,6 +156,10 @@ public:
         return msg.str();
     }
 
+    static std::string print_bytes(int64_t bytes) {
+        return fmt::format("{}", PrettyPrinter::print(bytes, TUnit::BYTES));
+    }
+
 private:
     // The following func, for automatic memory tracking and limiting based on 
system memory allocation.
     friend class ThreadMemTrackerMgr;
@@ -198,7 +189,6 @@ private:
                                  int64_t* logged_consumption);
 
     static Status mem_limit_exceeded_construct(const std::string& msg);
-    void print_log_usage(const std::string& msg);
 
 private:
     // Limit on memory consumption, in bytes. If limit_ == -1, there is no 
consumption limit. Used in log_usage。
@@ -230,19 +220,6 @@ private:
     std::atomic_size_t _had_child_count = 0;
 
     bool _print_log_usage = false;
-
-    // Lock to protect gc_memory(). This prevents many GCs from occurring at 
once.
-    std::mutex _gc_lock;
-    // Functions to call after the limit is reached to free memory.
-    // GcFunctions can be attached to a MemTracker in order to free up memory 
if the limit is
-    // reached. If limit_exceeded() is called and the limit is exceeded, it 
will first call
-    // the GcFunctions to try to free memory and recheck the limit. For 
example, the process
-    // tracker has a GcFunction that releases any unused memory still held by 
tcmalloc, so
-    // this will be called before the process limit is reported as exceeded. 
GcFunctions are
-    // called in the order they are added, so expensive functions should be 
added last.
-    // GcFunctions are called with a global lock held, so should be 
non-blocking and not
-    // call back into MemTrackers, except to release memory.
-    std::vector<GcFunction> _gc_functions;
 };
 
 inline void MemTrackerLimiter::consume(int64_t bytes) {
@@ -286,19 +263,17 @@ inline Status MemTrackerLimiter::try_consume(int64_t 
bytes) {
         if (tracker->limit() < 0 || tracker->label() == "Process") {
             tracker->_consumption->add(bytes); // No limit at this tracker.
         } else {
-            // If TryConsume fails, we can try to GC, but we may need to try 
several times if
-            // there are concurrent consumers because we don't take a lock 
before trying to
-            // update _consumption.
-            while (true) {
-                if (LIKELY(tracker->_consumption->try_add(bytes, 
tracker->limit()))) break;
-                Status st = tracker->try_gc_memory(bytes);
-                if (!st) {
-                    // Failed for this mem tracker. Roll back the ones that 
succeeded.
-                    for (int j = _all_ancestors.size() - 1; j > i; --j) {
-                        _all_ancestors[j]->_consumption->add(-bytes);
-                    }
-                    return st;
+            if (!tracker->_consumption->try_add(bytes, tracker->limit())) {
+                // Failed for this mem tracker. Roll back the ones that 
succeeded.
+                for (int j = _all_ancestors.size() - 1; j > i; --j) {
+                    _all_ancestors[j]->_consumption->add(-bytes);
                 }
+                return Status::MemoryLimitExceeded(fmt::format(
+                        "failed alloc size {}, exceeded tracker:<{}>, limit 
{}, peak "
+                        "used {}, current used {}",
+                        print_bytes(bytes), tracker->label(), 
print_bytes(tracker->limit()),
+                        print_bytes(tracker->_consumption->value()),
+                        print_bytes(tracker->_consumption->current_value())));
             }
         }
     }
@@ -314,11 +289,13 @@ inline Status MemTrackerLimiter::check_limit(int64_t 
bytes) {
     // Walk the tracker tree top-down.
     for (i = _limited_ancestors.size() - 1; i >= 0; --i) {
         MemTrackerLimiter* tracker = _limited_ancestors[i];
-        // Process tracker does not participate in the process memory limit, 
process tracker consumption is virtual memory,
-        // and there is a diff between the real physical memory value of the 
process. It is replaced by check_sys_mem_info.
-        while (true) {
-            if (LIKELY(tracker->_consumption->current_value() + bytes < 
tracker->limit())) break;
-            RETURN_IF_ERROR(tracker->try_gc_memory(bytes));
+        if (tracker->_consumption->current_value() + bytes > tracker->limit()) 
{
+            return Status::MemoryLimitExceeded(
+                    fmt::format("expected alloc size {}, exceeded 
tracker:<{}>, limit {}, peak "
+                                "used {}, current used {}",
+                                print_bytes(bytes), tracker->label(), 
print_bytes(tracker->limit()),
+                                print_bytes(tracker->_consumption->value()),
+                                
print_bytes(tracker->_consumption->current_value())));
         }
     }
     return Status::OK();
diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp
index 9dc3f13ef6..3ef0b5b07a 100644
--- a/be/src/service/doris_main.cpp
+++ b/be/src/service/doris_main.cpp
@@ -504,6 +504,8 @@ int main(int argc, char** argv) {
         // this will cause coredump for ASAN build when running regression 
test,
         // disable temporarily.
         
doris::ExecEnv::GetInstance()->task_pool_mem_tracker_registry()->logout_task_mem_tracker();
+        // The process tracker print log usage interval is 1s to avoid a large 
number of tasks being
+        // canceled when the process exceeds the mem limit, resulting in too 
many duplicate logs.
         
doris::ExecEnv::GetInstance()->process_mem_tracker_raw()->enable_print_log_usage();
         sleep(1);
     }
diff --git a/docs/en/docs/admin-manual/data-admin/backup.md 
b/docs/en/docs/admin-manual/data-admin/backup.md
index dc11fb55c7..4edf5860c1 100644
--- a/docs/en/docs/admin-manual/data-admin/backup.md
+++ b/docs/en/docs/admin-manual/data-admin/backup.md
@@ -162,7 +162,7 @@ It is recommended to import the new and old clusters in 
parallel for a period of
 1. Operations related to backup and recovery are currently only allowed to be 
performed by users with ADMIN privileges.
 2. Within a database, only one backup or restore job is allowed to be executed.
 3. Both backup and recovery support operations at the minimum partition 
(Partition) level. When the amount of data in the table is large, it is 
recommended to perform operations by partition to reduce the cost of failed 
retry.
-4. Because of the backup and restore operations, the operations are the actual 
data files. Therefore, when a table has too many shards, or a shard has too 
many small versions, it may take a long time to backup or restore even if the 
total amount of data is small. Users can use `SHOW PARTITIONS FROM table_name;` 
and `SHOW TABLET FROM table_name;` to view the number of shards in each 
partition and the number of file versions in each shard to estimate job 
execution time. The number of files [...]
+4. Because of the backup and restore operations, the operations are the actual 
data files. Therefore, when a table has too many shards, or a shard has too 
many small versions, it may take a long time to backup or restore even if the 
total amount of data is small. Users can use `SHOW PARTITIONS FROM table_name;` 
and `SHOW TABLETS FROM table_name;` to view the number of shards in each 
partition and the number of file versions in each shard to estimate job 
execution time. The number of file [...]
 5. When checking job status via `SHOW BACKUP` or `SHOW RESTORE` command. It is 
possible to see error messages in the `TaskErrMsg` column. But as long as the 
`State` column is not `CANCELLED`, the job is still continuing. These tasks may 
retry successfully. Of course, some Task errors will also directly cause the 
job to fail.
    Common `TaskErrMsg` errors are as follows:
       Q1: Backup to HDFS, the status shows UPLOADING, TaskErrMsg error 
message: [13333: Close broker writer failed, 
broker:TNetworkAddress(hostname=10.10.0.0, port=8000) msg:errors while close 
file output stream, cause by: DataStreamer Exception : ]
diff --git 
a/docs/en/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md 
b/docs/en/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md
index abb88053b6..51d381def5 100644
--- a/docs/en/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md
+++ b/docs/en/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md
@@ -319,7 +319,7 @@ Tablet state view mainly looks at the state of the tablet, 
as well as the state
 
        ` The ADMIN SHOW REPLICA STATUS `command is mainly used to view the 
health status of copies. Users can also view additional information about 
copies of a specified table by using the following commands:
 
-       `SHOW TABLET FROM tbl1;`
+       `SHOW TABLETS FROM tbl1;`
 
     ```
        
+----------+-----------+-----------+------------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------------+--------------+----------------------+----------------------+----------------------+
@@ -385,7 +385,7 @@ Tablet state view mainly looks at the state of the tablet, 
as well as the state
        
+-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+
     ```
    
-       The figure above shows all replicas of the corresponding Tablet. The 
content shown here is the same as `SHOW TABLET FROM tbl1;`. But here you can 
clearly see the status of all copies of a specific Tablet.
+       The figure above shows all replicas of the corresponding Tablet. The 
content shown here is the same as `SHOW TABLETS FROM tbl1;`. But here you can 
clearly see the status of all copies of a specific Tablet.
 
 ### Duplicate Scheduling Task
 
diff --git a/docs/en/docs/data-operate/export/export-manual.md 
b/docs/en/docs/data-operate/export/export-manual.md
index c38208963e..529bf9e91d 100644
--- a/docs/en/docs/data-operate/export/export-manual.md
+++ b/docs/en/docs/data-operate/export/export-manual.md
@@ -204,7 +204,7 @@ FinishTime: 2019-06-25 17:08:34
 
 ### Splitting Query Plans
 
-How many query plans need to be executed for an Export job depends on the 
total number of Tablets and how many Tablets can be allocated for a query plan 
at most. Since multiple query plans are executed serially, the execution time 
of jobs can be reduced if more fragments are processed by one query plan. 
However, if the query plan fails (e.g., the RPC fails to call Broker, the 
remote storage jitters, etc.), too many tablets can lead to a higher retry cost 
of a query plan. Therefore, it is [...]
+How many query plans need to be executed for an Export job depends on the 
total number of Tablets and how many Tablets can be allocated for a query plan 
at most. Since multiple query plans are executed serially, the execution time 
of jobs can be reduced if more fragments are processed by one query plan. 
However, if the query plan fails (e.g., the RPC fails to call Broker, the 
remote storage jitters, etc.), too many tablets can lead to a higher retry cost 
of a query plan. Therefore, it is [...]
 
 ### exec\_mem\_limit
 
diff --git a/docs/zh-CN/docs/admin-manual/data-admin/backup.md 
b/docs/zh-CN/docs/admin-manual/data-admin/backup.md
index 7c3ebc26b3..fd577d5753 100644
--- a/docs/zh-CN/docs/admin-manual/data-admin/backup.md
+++ b/docs/zh-CN/docs/admin-manual/data-admin/backup.md
@@ -162,7 +162,7 @@ BACKUP的更多用法可参考 [这里](../../sql-manual/sql-reference/Data-Defi
 1. 备份恢复相关的操作目前只允许拥有 ADMIN 权限的用户执行。
 2. 一个 Database 内，只允许有一个正在执行的备份或恢复作业。
 3. 备份和恢复都支持最小分区（Partition）级别的操作，当表的数据量很大时，建议按分区分别执行，以降低失败重试的代价。
-4. 
因为备份恢复操作，操作的都是实际的数据文件。所以当一个表的分片过多，或者一个分片有过多的小版本时，可能即使总数据量很小，依然需要备份或恢复很长时间。用户可以通过
 `SHOW PARTITIONS FROM table_name;` 和 `SHOW TABLET FROM table_name;` 
来查看各个分区的分片数量，以及各个分片的文件版本数量，来预估作业执行时间。文件数量对作业执行的时间影响非常大，所以建议在建表时，合理规划分区分桶，以避免过多的分片。
+4. 
因为备份恢复操作，操作的都是实际的数据文件。所以当一个表的分片过多，或者一个分片有过多的小版本时，可能即使总数据量很小，依然需要备份或恢复很长时间。用户可以通过
 `SHOW PARTITIONS FROM table_name;` 和 `SHOW TABLETS FROM table_name;` 
来查看各个分区的分片数量，以及各个分片的文件版本数量，来预估作业执行时间。文件数量对作业执行的时间影响非常大，所以建议在建表时，合理规划分区分桶，以避免过多的分片。
 5. 当通过 `SHOW BACKUP` 或者 `SHOW RESTORE` 命令查看作业状态时。有可能会在 `TaskErrMsg` 
一列中看到错误信息。但只要 `State` 列不为 `CANCELLED`，则说明作业依然在继续。这些 Task 有可能会重试成功。当然，有些 Task 
错误，也会直接导致作业失败。
    常见的`TaskErrMsg`错误如下：
       Q1：备份到HDFS，状态显示UPLOADING，TaskErrMsg 错误信息：[13333: Close broker writer 
failed, broker:TNetworkAddress(hostname=10.10.0.0,port=8000) msg:errors while 
close file output stream, cause by: DataStreamer Exception: ]
diff --git 
a/docs/zh-CN/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md 
b/docs/zh-CN/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md
index c5e0a174e1..949db24aa5 100644
--- a/docs/zh-CN/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md
+++ b/docs/zh-CN/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md
@@ -383,7 +383,7 @@ TabletScheduler 在每轮调度时，都会通过 LoadBalancer 来选择一定
     
+-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+----------+------------------+
 
     ```
 
-    上图显示了对应 Tablet 的所有副本情况。这里显示的内容和 `SHOW TABLET FROM tbl1;` 
的内容相同。但这里可以清楚的知道，一个具体的 Tablet 的所有副本的状态。
+    上图显示了对应 Tablet 的所有副本情况。这里显示的内容和 `SHOW TABLETS FROM tbl1;` 
的内容相同。但这里可以清楚的知道，一个具体的 Tablet 的所有副本的状态。
 
 ### 副本调度任务
 
diff --git a/docs/zh-CN/docs/data-operate/export/export-manual.md 
b/docs/zh-CN/docs/data-operate/export/export-manual.md
index 5ef1bfc974..57fdbed375 100644
--- a/docs/zh-CN/docs/data-operate/export/export-manual.md
+++ b/docs/zh-CN/docs/data-operate/export/export-manual.md
@@ -194,7 +194,7 @@ FinishTime: 2019-06-25 17:08:34
 
 ### 查询计划的拆分
 
-一个 Export 作业有多少查询计划需要执行，取决于总共有多少 Tablet，以及一个查询计划最多可以分配多少个 
Tablet。因为多个查询计划是串行执行的，所以如果让一个查询计划处理更多的分片，则可以减少作业的执行时间。但如果查询计划出错（比如调用 Broker 的 
RPC 失败，远端存储出现抖动等），过多的 Tablet 
会导致一个查询计划的重试成本变高。所以需要合理安排查询计划的个数以及每个查询计划所需要扫描的分片数，在执行时间和执行成功率之间做出平衡。一般建议一个查询计划扫描的数据量在
 3-5 GB内（一个表的 Tablet 的大小以及个数可以通过 `SHOW TABLET FROM tbl_name;` 语句查看。）。
+一个 Export 作业有多少查询计划需要执行，取决于总共有多少 Tablet，以及一个查询计划最多可以分配多少个 
Tablet。因为多个查询计划是串行执行的，所以如果让一个查询计划处理更多的分片，则可以减少作业的执行时间。但如果查询计划出错（比如调用 Broker 的 
RPC 失败，远端存储出现抖动等），过多的 Tablet 
会导致一个查询计划的重试成本变高。所以需要合理安排查询计划的个数以及每个查询计划所需要扫描的分片数，在执行时间和执行成功率之间做出平衡。一般建议一个查询计划扫描的数据量在
 3-5 GB内（一个表的 Tablet 的大小以及个数可以通过 `SHOW TABLETS FROM tbl_name;` 语句查看。）。
 
 ### exec\_mem\_limit
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[doris] branch master updated: remove gc and fix print (#12682)

Reply via email to