This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 42b6532131 remove gc and fix print (#12682) 42b6532131 is described below commit 42b6532131487b3fa218c388330750a2a8616809 Author: Xinyi Zou <zouxiny...@gmail.com> AuthorDate: Sat Sep 17 00:16:15 2022 +0800 remove gc and fix print (#12682) --- be/src/common/daemon.cpp | 1 + be/src/runtime/disk_io_mgr.cc | 6 -- be/src/runtime/memory/mem_tracker.cpp | 9 ++- be/src/runtime/memory/mem_tracker_limiter.cpp | 89 +++++++--------------- be/src/runtime/memory/mem_tracker_limiter.h | 73 ++++++------------ be/src/service/doris_main.cpp | 2 + docs/en/docs/admin-manual/data-admin/backup.md | 2 +- .../maint-monitor/tablet-repair-and-balance.md | 4 +- docs/en/docs/data-operate/export/export-manual.md | 2 +- docs/zh-CN/docs/admin-manual/data-admin/backup.md | 2 +- .../maint-monitor/tablet-repair-and-balance.md | 2 +- .../docs/data-operate/export/export-manual.md | 2 +- 12 files changed, 67 insertions(+), 127 deletions(-) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 4fb1abe628..64f1e509fb 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -68,6 +68,7 @@ namespace doris { bool k_doris_exit = false; void Daemon::tcmalloc_gc_thread() { + // TODO All cache GC wish to be supported while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(10))) { size_t used_size = 0; size_t free_size = 0; diff --git a/be/src/runtime/disk_io_mgr.cc b/be/src/runtime/disk_io_mgr.cc index e56c4bff69..1f506d82b1 100644 --- a/be/src/runtime/disk_io_mgr.cc +++ b/be/src/runtime/disk_io_mgr.cc @@ -348,12 +348,6 @@ DiskIoMgr::~DiskIoMgr() { Status DiskIoMgr::init(const int64_t mem_limit) { _mem_tracker = std::make_unique<MemTrackerLimiter>(mem_limit, "DiskIO"); - // If we hit the process limit, see if we can reclaim some memory by removing - // previously allocated (but unused) io buffers. - // TODO(zxy) After clearing the free buffer, how much impact will it have on subsequent - // queries may need to be verified. - ExecEnv::GetInstance()->process_mem_tracker()->add_gc_function( - std::bind<void>(&DiskIoMgr::gc_io_buffers, this, std::placeholders::_1)); for (int i = 0; i < _disk_queues.size(); ++i) { _disk_queues[i] = new DiskQueue(i); diff --git a/be/src/runtime/memory/mem_tracker.cpp b/be/src/runtime/memory/mem_tracker.cpp index b282b87758..caf2325386 100644 --- a/be/src/runtime/memory/mem_tracker.cpp +++ b/be/src/runtime/memory/mem_tracker.cpp @@ -104,10 +104,11 @@ void MemTracker::make_group_snapshot(std::vector<MemTracker::Snapshot>* snapshot } std::string MemTracker::log_usage(MemTracker::Snapshot snapshot) { - return fmt::format("MemTracker Label={}, Parent Label={}, Used={}, Peak={}", snapshot.label, - snapshot.parent, - PrettyPrinter::print(snapshot.cur_consumption, TUnit::BYTES), - PrettyPrinter::print(snapshot.peak_consumption, TUnit::BYTES)); + return fmt::format( + "MemTracker Label={}, Parent Label={}, Used={}({} B), Peak={}({} B)", snapshot.label, + snapshot.parent, PrettyPrinter::print(snapshot.cur_consumption, TUnit::BYTES), + snapshot.cur_consumption, PrettyPrinter::print(snapshot.peak_consumption, TUnit::BYTES), + snapshot.peak_consumption); } static std::unordered_map<std::string, std::shared_ptr<MemTracker>> global_mem_trackers; diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp index 6bc7f95c5e..e09e1493c8 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.cpp +++ b/be/src/runtime/memory/mem_tracker_limiter.cpp @@ -50,6 +50,8 @@ MemTrackerLimiter::MemTrackerLimiter(int64_t byte_limit, const std::string& labe MemTrackerLimiter* tracker = this; while (tracker != nullptr) { _all_ancestors.push_back(tracker); + // Process tracker does not participate in the process memory limit, process tracker consumption is virtual memory, + // and there is a diff between the real physical memory value of the process. It is replaced by check_sys_mem_info. if (tracker->has_limit() && tracker->label() != "Process") _limited_ancestors.push_back(tracker); tracker = tracker->_parent.get(); @@ -124,42 +126,6 @@ int64_t MemTrackerLimiter::get_lowest_limit() const { return min_limit; } -bool MemTrackerLimiter::gc_memory(int64_t max_consumption) { - if (max_consumption < 0) return true; - std::lock_guard<std::mutex> l(_gc_lock); - int64_t pre_gc_consumption = consumption(); - // Check if someone gc'd before us - if (pre_gc_consumption < max_consumption) return false; - - int64_t curr_consumption = pre_gc_consumption; - // Free some extra memory to avoid frequent GC, 4M is an empirical value, maybe it will be tested later. - const int64_t EXTRA_BYTES_TO_FREE = 4L * 1024L * 1024L * 1024L; - // Try to free up some memory - for (int i = 0; i < _gc_functions.size(); ++i) { - // Try to free up the amount we are over plus some extra so that we don't have to - // immediately GC again. Don't free all the memory since that can be unnecessarily - // expensive. - int64_t bytes_to_free = curr_consumption - max_consumption + EXTRA_BYTES_TO_FREE; - _gc_functions[i](bytes_to_free); - curr_consumption = consumption(); - if (max_consumption - curr_consumption <= EXTRA_BYTES_TO_FREE) break; - } - - return curr_consumption > max_consumption; -} - -Status MemTrackerLimiter::try_gc_memory(int64_t bytes) { - if (UNLIKELY(gc_memory(_limit - bytes))) { - return Status::MemoryLimitExceeded(fmt::format( - "failed_alloc_size={} B, exceeded_tracker={}, limit={} B, peak_used={} B, " - "current_used={} B", - bytes, label(), _limit, _consumption->value(), _consumption->current_value())); - } - VLOG_NOTICE << "GC succeeded, TryConsume bytes=" << bytes - << " consumption=" << _consumption->current_value() << " limit=" << _limit; - return Status::OK(); -} - // Calling this on the query tracker results in output like: // // Query(4a4c81fedaed337d:4acadfda00000000) Limit=10.00 GB Total=508.28 MB Peak=508.45 MB @@ -186,10 +152,10 @@ std::string MemTrackerLimiter::log_usage(int max_recursive_depth, int64_t* logge int64_t peak_consumption = _consumption->value(); if (logged_consumption != nullptr) *logged_consumption = curr_consumption; - std::string detail = "MemTrackerLimiter Label={}, Limit={}, Used={}, Peak={}, Exceeded={}"; - detail = fmt::format(detail, _label, PrettyPrinter::print(_limit, TUnit::BYTES), - PrettyPrinter::print(curr_consumption, TUnit::BYTES), - PrettyPrinter::print(peak_consumption, TUnit::BYTES), + std::string detail = + "MemTrackerLimiter Label={}, Limit={}({} B), Used={}({} B), Peak={}({} B), Exceeded={}"; + detail = fmt::format(detail, _label, print_bytes(_limit), _limit, print_bytes(curr_consumption), + curr_consumption, print_bytes(peak_consumption), peak_consumption, limit_exceeded() ? "true" : "false"); // This call does not need the children, so return early. @@ -223,17 +189,15 @@ std::string MemTrackerLimiter::log_usage(int max_recursive_depth, if (!usage_string.empty()) usage_strings.push_back(usage_string); *logged_consumption += tracker_consumption; } - return join(usage_strings, "\n"); + return usage_strings.size() == 0 ? "" : "\n " + join(usage_strings, "\n "); } Status MemTrackerLimiter::mem_limit_exceeded_construct(const std::string& msg) { std::string detail = fmt::format( - "{}, backend {} process memory used {}, process limit {}. If is query, can " - "change the limit " - "by `set exec_mem_limit=xxx`, details mem usage see be.INFO.", - msg, BackendOptions::get_localhost(), - PrettyPrinter::print(PerfCounters::get_vm_rss(), TUnit::BYTES), - PrettyPrinter::print(MemInfo::mem_limit(), TUnit::BYTES)); + "{}. backend {} process memory used {}, limit {}. If query tracker exceed, `set " + "exec_mem_limit=8G` to change limit, details mem usage see be.INFO.", + msg, BackendOptions::get_localhost(), print_bytes(PerfCounters::get_vm_rss()), + print_bytes(MemInfo::mem_limit())); return Status::MemoryLimitExceeded(detail); } @@ -258,7 +222,7 @@ void MemTrackerLimiter::print_log_usage(const std::string& msg) { Status MemTrackerLimiter::mem_limit_exceeded(const std::string& msg, int64_t failed_allocation_size) { STOP_CHECK_THREAD_MEM_TRACKER_LIMIT(); - std::string detail = fmt::format("Memory limit exceeded:<consuming_tracker={}, ", _label); + std::string detail = fmt::format("Memory limit exceeded:<consuming tracker:<{}>, ", _label); MemTrackerLimiter* exceeded_tracker = nullptr; MemTrackerLimiter* max_consumption_tracker = nullptr; int64_t free_size = INT64_MAX; @@ -281,28 +245,29 @@ Status MemTrackerLimiter::mem_limit_exceeded(const std::string& msg, MemTrackerLimiter* print_log_usage_tracker = nullptr; if (exceeded_tracker != nullptr) { detail += fmt::format( - "failed_alloc_size={} B, exceeded_tracker={}, limit={} B, peak_used={} B, " - "current_used={} B>, executing_msg:<{}>", - PrettyPrinter::print(failed_allocation_size, TUnit::BYTES), - exceeded_tracker->label(), exceeded_tracker->limit(), - exceeded_tracker->peak_consumption(), exceeded_tracker->consumption(), msg); + "failed alloc size {}, exceeded tracker:<{}>, limit {}, peak used {}, " + "current used {}>, executing msg:<{}>", + print_bytes(failed_allocation_size), exceeded_tracker->label(), + print_bytes(exceeded_tracker->limit()), + print_bytes(exceeded_tracker->peak_consumption()), + print_bytes(exceeded_tracker->consumption()), msg); print_log_usage_tracker = exceeded_tracker; } else if (!sys_exceed_st) { - detail += fmt::format("{}>, executing_msg:<{}>", sys_exceed_st.get_error_msg(), msg); + detail += fmt::format("{}>, executing msg:<{}>", sys_exceed_st.get_error_msg(), msg); } else if (max_consumption_tracker != nullptr) { // must after check_sys_mem_info false detail += fmt::format( - "failed_alloc_size={} B, max_consumption_tracker={}, limit={} B, peak_used={} B, " - "current_used={} B>, executing_msg:<{}>", - PrettyPrinter::print(failed_allocation_size, TUnit::BYTES), - max_consumption_tracker->label(), max_consumption_tracker->limit(), - max_consumption_tracker->peak_consumption(), max_consumption_tracker->consumption(), - msg); + "failed alloc size {}, max consumption tracker:<{}>, limit {}, peak used {}, " + "current used {}>, executing msg:<{}>", + print_bytes(failed_allocation_size), max_consumption_tracker->label(), + print_bytes(max_consumption_tracker->limit()), + print_bytes(max_consumption_tracker->peak_consumption()), + print_bytes(max_consumption_tracker->consumption()), msg); print_log_usage_tracker = max_consumption_tracker; } else { // The limit of the current tracker and parents is less than 0, the consume will not fail, // and the current process memory has no excess limit. - detail += fmt::format("unknown exceed reason, executing_msg:<{}>", msg); + detail += fmt::format("unknown exceed reason, executing msg:<{}>", msg); print_log_usage_tracker = ExecEnv::GetInstance()->process_mem_tracker_raw(); } auto st = MemTrackerLimiter::mem_limit_exceeded_construct(detail); @@ -316,7 +281,7 @@ Status MemTrackerLimiter::mem_limit_exceeded(const std::string& msg, Status failed_try_consume_st) { STOP_CHECK_THREAD_MEM_TRACKER_LIMIT(); std::string detail = - fmt::format("Memory limit exceeded:<consuming_tracker={}, {}>, executing_msg:<{}>", + fmt::format("Memory limit exceeded:<consuming tracker:<{}>, {}>, executing msg:<{}>", _label, failed_try_consume_st.get_error_msg(), msg); auto st = MemTrackerLimiter::mem_limit_exceeded_construct(detail); failed_tracker->print_log_usage(st.get_error_msg()); diff --git a/be/src/runtime/memory/mem_tracker_limiter.h b/be/src/runtime/memory/mem_tracker_limiter.h index 7c79442261..5b23d4e0d6 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.h +++ b/be/src/runtime/memory/mem_tracker_limiter.h @@ -71,8 +71,9 @@ public: // for fast, expect MemInfo::initialized() to be true. if (PerfCounters::get_vm_rss() + bytes >= MemInfo::mem_limit()) { auto st = Status::MemoryLimitExceeded( - "process memory used {} B, exceed limit {} B, failed_alloc_size={} B", - PerfCounters::get_vm_rss(), MemInfo::mem_limit(), bytes); + "process memory used {}, exceed limit {}, failed alloc size {}", + print_bytes(PerfCounters::get_vm_rss()), print_bytes(MemInfo::mem_limit()), + print_bytes(bytes)); ExecEnv::GetInstance()->process_mem_tracker_raw()->print_log_usage(st.get_error_msg()); return st; } @@ -108,21 +109,6 @@ public: // Returns the lowest limit for this tracker limiter and its ancestors. Returns -1 if there is no limit. int64_t get_lowest_limit() const; - typedef std::function<void(int64_t bytes_to_free)> GcFunction; - // Add a function 'f' to be called if the limit is reached, if none of the other - // previously-added GC functions were successful at freeing up enough memory. - // 'f' does not need to be thread-safe as long as it is added to only one tracker limiter. - // Note that 'f' must be valid for the lifetime of this tracker limiter. - void add_gc_function(GcFunction f) { _gc_functions.push_back(f); } - - // TODO Should be managed in a separate process_mem_mgr, not in MemTracker - // If consumption is higher than max_consumption, attempts to free memory by calling - // any added GC functions. Returns true if max_consumption is still exceeded. Takes gc_lock. - // Note: If the cache of segment/chunk is released due to insufficient query memory at a certain moment, - // the performance of subsequent queries may be degraded, so the use of gc function should be careful enough. - bool gc_memory(int64_t max_consumption); - Status try_gc_memory(int64_t bytes); - public: // up to (but not including) end_tracker. // This happens when we want to update tracking on a particular mem tracker but the consumption @@ -147,6 +133,7 @@ public: // Limiting the recursive depth reduces the cost of dumping, particularly // for the process tracker limiter. std::string log_usage(int max_recursive_depth = INT_MAX, int64_t* logged_consumption = nullptr); + void print_log_usage(const std::string& msg); // Log the memory usage when memory limit is exceeded and return a status object with // msg of the allocation which caused the limit to be exceeded. @@ -169,6 +156,10 @@ public: return msg.str(); } + static std::string print_bytes(int64_t bytes) { + return fmt::format("{}", PrettyPrinter::print(bytes, TUnit::BYTES)); + } + private: // The following func, for automatic memory tracking and limiting based on system memory allocation. friend class ThreadMemTrackerMgr; @@ -198,7 +189,6 @@ private: int64_t* logged_consumption); static Status mem_limit_exceeded_construct(const std::string& msg); - void print_log_usage(const std::string& msg); private: // Limit on memory consumption, in bytes. If limit_ == -1, there is no consumption limit. Used in log_usage。 @@ -230,19 +220,6 @@ private: std::atomic_size_t _had_child_count = 0; bool _print_log_usage = false; - - // Lock to protect gc_memory(). This prevents many GCs from occurring at once. - std::mutex _gc_lock; - // Functions to call after the limit is reached to free memory. - // GcFunctions can be attached to a MemTracker in order to free up memory if the limit is - // reached. If limit_exceeded() is called and the limit is exceeded, it will first call - // the GcFunctions to try to free memory and recheck the limit. For example, the process - // tracker has a GcFunction that releases any unused memory still held by tcmalloc, so - // this will be called before the process limit is reported as exceeded. GcFunctions are - // called in the order they are added, so expensive functions should be added last. - // GcFunctions are called with a global lock held, so should be non-blocking and not - // call back into MemTrackers, except to release memory. - std::vector<GcFunction> _gc_functions; }; inline void MemTrackerLimiter::consume(int64_t bytes) { @@ -286,19 +263,17 @@ inline Status MemTrackerLimiter::try_consume(int64_t bytes) { if (tracker->limit() < 0 || tracker->label() == "Process") { tracker->_consumption->add(bytes); // No limit at this tracker. } else { - // If TryConsume fails, we can try to GC, but we may need to try several times if - // there are concurrent consumers because we don't take a lock before trying to - // update _consumption. - while (true) { - if (LIKELY(tracker->_consumption->try_add(bytes, tracker->limit()))) break; - Status st = tracker->try_gc_memory(bytes); - if (!st) { - // Failed for this mem tracker. Roll back the ones that succeeded. - for (int j = _all_ancestors.size() - 1; j > i; --j) { - _all_ancestors[j]->_consumption->add(-bytes); - } - return st; + if (!tracker->_consumption->try_add(bytes, tracker->limit())) { + // Failed for this mem tracker. Roll back the ones that succeeded. + for (int j = _all_ancestors.size() - 1; j > i; --j) { + _all_ancestors[j]->_consumption->add(-bytes); } + return Status::MemoryLimitExceeded(fmt::format( + "failed alloc size {}, exceeded tracker:<{}>, limit {}, peak " + "used {}, current used {}", + print_bytes(bytes), tracker->label(), print_bytes(tracker->limit()), + print_bytes(tracker->_consumption->value()), + print_bytes(tracker->_consumption->current_value()))); } } } @@ -314,11 +289,13 @@ inline Status MemTrackerLimiter::check_limit(int64_t bytes) { // Walk the tracker tree top-down. for (i = _limited_ancestors.size() - 1; i >= 0; --i) { MemTrackerLimiter* tracker = _limited_ancestors[i]; - // Process tracker does not participate in the process memory limit, process tracker consumption is virtual memory, - // and there is a diff between the real physical memory value of the process. It is replaced by check_sys_mem_info. - while (true) { - if (LIKELY(tracker->_consumption->current_value() + bytes < tracker->limit())) break; - RETURN_IF_ERROR(tracker->try_gc_memory(bytes)); + if (tracker->_consumption->current_value() + bytes > tracker->limit()) { + return Status::MemoryLimitExceeded( + fmt::format("expected alloc size {}, exceeded tracker:<{}>, limit {}, peak " + "used {}, current used {}", + print_bytes(bytes), tracker->label(), print_bytes(tracker->limit()), + print_bytes(tracker->_consumption->value()), + print_bytes(tracker->_consumption->current_value()))); } } return Status::OK(); diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp index 9dc3f13ef6..3ef0b5b07a 100644 --- a/be/src/service/doris_main.cpp +++ b/be/src/service/doris_main.cpp @@ -504,6 +504,8 @@ int main(int argc, char** argv) { // this will cause coredump for ASAN build when running regression test, // disable temporarily. doris::ExecEnv::GetInstance()->task_pool_mem_tracker_registry()->logout_task_mem_tracker(); + // The process tracker print log usage interval is 1s to avoid a large number of tasks being + // canceled when the process exceeds the mem limit, resulting in too many duplicate logs. doris::ExecEnv::GetInstance()->process_mem_tracker_raw()->enable_print_log_usage(); sleep(1); } diff --git a/docs/en/docs/admin-manual/data-admin/backup.md b/docs/en/docs/admin-manual/data-admin/backup.md index dc11fb55c7..4edf5860c1 100644 --- a/docs/en/docs/admin-manual/data-admin/backup.md +++ b/docs/en/docs/admin-manual/data-admin/backup.md @@ -162,7 +162,7 @@ It is recommended to import the new and old clusters in parallel for a period of 1. Operations related to backup and recovery are currently only allowed to be performed by users with ADMIN privileges. 2. Within a database, only one backup or restore job is allowed to be executed. 3. Both backup and recovery support operations at the minimum partition (Partition) level. When the amount of data in the table is large, it is recommended to perform operations by partition to reduce the cost of failed retry. -4. Because of the backup and restore operations, the operations are the actual data files. Therefore, when a table has too many shards, or a shard has too many small versions, it may take a long time to backup or restore even if the total amount of data is small. Users can use `SHOW PARTITIONS FROM table_name;` and `SHOW TABLET FROM table_name;` to view the number of shards in each partition and the number of file versions in each shard to estimate job execution time. The number of files [...] +4. Because of the backup and restore operations, the operations are the actual data files. Therefore, when a table has too many shards, or a shard has too many small versions, it may take a long time to backup or restore even if the total amount of data is small. Users can use `SHOW PARTITIONS FROM table_name;` and `SHOW TABLETS FROM table_name;` to view the number of shards in each partition and the number of file versions in each shard to estimate job execution time. The number of file [...] 5. When checking job status via `SHOW BACKUP` or `SHOW RESTORE` command. It is possible to see error messages in the `TaskErrMsg` column. But as long as the `State` column is not `CANCELLED`, the job is still continuing. These tasks may retry successfully. Of course, some Task errors will also directly cause the job to fail. Common `TaskErrMsg` errors are as follows: Q1: Backup to HDFS, the status shows UPLOADING, TaskErrMsg error message: [13333: Close broker writer failed, broker:TNetworkAddress(hostname=10.10.0.0, port=8000) msg:errors while close file output stream, cause by: DataStreamer Exception : ] diff --git a/docs/en/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md b/docs/en/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md index abb88053b6..51d381def5 100644 --- a/docs/en/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md +++ b/docs/en/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md @@ -319,7 +319,7 @@ Tablet state view mainly looks at the state of the tablet, as well as the state ` The ADMIN SHOW REPLICA STATUS `command is mainly used to view the health status of copies. Users can also view additional information about copies of a specified table by using the following commands: - `SHOW TABLET FROM tbl1;` + `SHOW TABLETS FROM tbl1;` ``` +----------+-----------+-----------+------------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------------+--------------+----------------------+----------------------+----------------------+ @@ -385,7 +385,7 @@ Tablet state view mainly looks at the state of the tablet, as well as the state +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+ ``` - The figure above shows all replicas of the corresponding Tablet. The content shown here is the same as `SHOW TABLET FROM tbl1;`. But here you can clearly see the status of all copies of a specific Tablet. + The figure above shows all replicas of the corresponding Tablet. The content shown here is the same as `SHOW TABLETS FROM tbl1;`. But here you can clearly see the status of all copies of a specific Tablet. ### Duplicate Scheduling Task diff --git a/docs/en/docs/data-operate/export/export-manual.md b/docs/en/docs/data-operate/export/export-manual.md index c38208963e..529bf9e91d 100644 --- a/docs/en/docs/data-operate/export/export-manual.md +++ b/docs/en/docs/data-operate/export/export-manual.md @@ -204,7 +204,7 @@ FinishTime: 2019-06-25 17:08:34 ### Splitting Query Plans -How many query plans need to be executed for an Export job depends on the total number of Tablets and how many Tablets can be allocated for a query plan at most. Since multiple query plans are executed serially, the execution time of jobs can be reduced if more fragments are processed by one query plan. However, if the query plan fails (e.g., the RPC fails to call Broker, the remote storage jitters, etc.), too many tablets can lead to a higher retry cost of a query plan. Therefore, it is [...] +How many query plans need to be executed for an Export job depends on the total number of Tablets and how many Tablets can be allocated for a query plan at most. Since multiple query plans are executed serially, the execution time of jobs can be reduced if more fragments are processed by one query plan. However, if the query plan fails (e.g., the RPC fails to call Broker, the remote storage jitters, etc.), too many tablets can lead to a higher retry cost of a query plan. Therefore, it is [...] ### exec\_mem\_limit diff --git a/docs/zh-CN/docs/admin-manual/data-admin/backup.md b/docs/zh-CN/docs/admin-manual/data-admin/backup.md index 7c3ebc26b3..fd577d5753 100644 --- a/docs/zh-CN/docs/admin-manual/data-admin/backup.md +++ b/docs/zh-CN/docs/admin-manual/data-admin/backup.md @@ -162,7 +162,7 @@ BACKUP的更多用法可参考 [这里](../../sql-manual/sql-reference/Data-Defi 1. 备份恢复相关的操作目前只允许拥有 ADMIN 权限的用户执行。 2. 一个 Database 内,只允许有一个正在执行的备份或恢复作业。 3. 备份和恢复都支持最小分区(Partition)级别的操作,当表的数据量很大时,建议按分区分别执行,以降低失败重试的代价。 -4. 因为备份恢复操作,操作的都是实际的数据文件。所以当一个表的分片过多,或者一个分片有过多的小版本时,可能即使总数据量很小,依然需要备份或恢复很长时间。用户可以通过 `SHOW PARTITIONS FROM table_name;` 和 `SHOW TABLET FROM table_name;` 来查看各个分区的分片数量,以及各个分片的文件版本数量,来预估作业执行时间。文件数量对作业执行的时间影响非常大,所以建议在建表时,合理规划分区分桶,以避免过多的分片。 +4. 因为备份恢复操作,操作的都是实际的数据文件。所以当一个表的分片过多,或者一个分片有过多的小版本时,可能即使总数据量很小,依然需要备份或恢复很长时间。用户可以通过 `SHOW PARTITIONS FROM table_name;` 和 `SHOW TABLETS FROM table_name;` 来查看各个分区的分片数量,以及各个分片的文件版本数量,来预估作业执行时间。文件数量对作业执行的时间影响非常大,所以建议在建表时,合理规划分区分桶,以避免过多的分片。 5. 当通过 `SHOW BACKUP` 或者 `SHOW RESTORE` 命令查看作业状态时。有可能会在 `TaskErrMsg` 一列中看到错误信息。但只要 `State` 列不为 `CANCELLED`,则说明作业依然在继续。这些 Task 有可能会重试成功。当然,有些 Task 错误,也会直接导致作业失败。 常见的`TaskErrMsg`错误如下: Q1:备份到HDFS,状态显示UPLOADING,TaskErrMsg 错误信息:[13333: Close broker writer failed, broker:TNetworkAddress(hostname=10.10.0.0,port=8000) msg:errors while close file output stream, cause by: DataStreamer Exception: ] diff --git a/docs/zh-CN/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md b/docs/zh-CN/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md index c5e0a174e1..949db24aa5 100644 --- a/docs/zh-CN/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md +++ b/docs/zh-CN/docs/admin-manual/maint-monitor/tablet-repair-and-balance.md @@ -383,7 +383,7 @@ TabletScheduler 在每轮调度时,都会通过 LoadBalancer 来选择一定 +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+----------+------------------+ ``` - 上图显示了对应 Tablet 的所有副本情况。这里显示的内容和 `SHOW TABLET FROM tbl1;` 的内容相同。但这里可以清楚的知道,一个具体的 Tablet 的所有副本的状态。 + 上图显示了对应 Tablet 的所有副本情况。这里显示的内容和 `SHOW TABLETS FROM tbl1;` 的内容相同。但这里可以清楚的知道,一个具体的 Tablet 的所有副本的状态。 ### 副本调度任务 diff --git a/docs/zh-CN/docs/data-operate/export/export-manual.md b/docs/zh-CN/docs/data-operate/export/export-manual.md index 5ef1bfc974..57fdbed375 100644 --- a/docs/zh-CN/docs/data-operate/export/export-manual.md +++ b/docs/zh-CN/docs/data-operate/export/export-manual.md @@ -194,7 +194,7 @@ FinishTime: 2019-06-25 17:08:34 ### 查询计划的拆分 -一个 Export 作业有多少查询计划需要执行,取决于总共有多少 Tablet,以及一个查询计划最多可以分配多少个 Tablet。因为多个查询计划是串行执行的,所以如果让一个查询计划处理更多的分片,则可以减少作业的执行时间。但如果查询计划出错(比如调用 Broker 的 RPC 失败,远端存储出现抖动等),过多的 Tablet 会导致一个查询计划的重试成本变高。所以需要合理安排查询计划的个数以及每个查询计划所需要扫描的分片数,在执行时间和执行成功率之间做出平衡。一般建议一个查询计划扫描的数据量在 3-5 GB内(一个表的 Tablet 的大小以及个数可以通过 `SHOW TABLET FROM tbl_name;` 语句查看。)。 +一个 Export 作业有多少查询计划需要执行,取决于总共有多少 Tablet,以及一个查询计划最多可以分配多少个 Tablet。因为多个查询计划是串行执行的,所以如果让一个查询计划处理更多的分片,则可以减少作业的执行时间。但如果查询计划出错(比如调用 Broker 的 RPC 失败,远端存储出现抖动等),过多的 Tablet 会导致一个查询计划的重试成本变高。所以需要合理安排查询计划的个数以及每个查询计划所需要扫描的分片数,在执行时间和执行成功率之间做出平衡。一般建议一个查询计划扫描的数据量在 3-5 GB内(一个表的 Tablet 的大小以及个数可以通过 `SHOW TABLETS FROM tbl_name;` 语句查看。)。 ### exec\_mem\_limit --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org