This is an automated email from the ASF dual-hosted git repository.

lide pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new abefe26fc9a [refactor](log) Add some log for table and db transaction 
manager's lock (#40746)
abefe26fc9a is described below

commit abefe26fc9a3c57f65d81c68971b95f568b5456e
Author: Lijia Liu <liutang...@yeah.net>
AuthorDate: Fri Sep 20 21:58:06 2024 +0800

    [refactor](log) Add some log for table and db transaction manager's lock 
(#40746)
    
    Use the fe config like #39015, print table lock usage duration.
    In 2.0.6, I found `replayer` thread is blocked for a long time.
    ```
    2024-09-12 11:00:58,797 INFO (replayer|89) 
[DatabaseTransactionMgr.replayUpsertTransactionState():1977] replay a visible 
transaction TransactionState. transaction id: 471728922, label: 
flink_connector_20240912_103150_269f8d635ffb455bb4043eecd1a8406b, db id: 
475180, table id list: 14473765, callback id: -1, coordinator: BE: 
10.217.0.100, transaction status: VISIBLE, error replicas num: 0, replica ids: 
, prepare time: 1726108310505, commit time: 1726108310579, finish time: 
1726108310604 [...]
    2024-09-12 11:00:58,798 WARN (replayer|89) [Env.replayJournal():2575] 
entityCost:940256 loadJournalCost:940256 logId:1392353661 
replayedJournalId:1392353661 code:100 size:562
    ```
    And, there are many table lock fail log:
    ```
    2024-09-12 10:56:53,167 WARN (mysql-nio-pool-32036|412611) 
[Table.tryReadLock():155] Failed to try table atdpersonpaycode's read lock. 
timeout 1 MINUTES. Current owner: null
    ```
    But, I don't kown what thread hold the table lock.
    So, add some logs to print lock holding info.
---
 .../main/java/org/apache/doris/common/Config.java  |  4 ++
 .../main/java/org/apache/doris/catalog/Table.java  | 46 ++++++++++++++++++++++
 .../doris/transaction/DatabaseTransactionMgr.java  | 15 +++++--
 3 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 86547830282..ffc23900a0f 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -2092,6 +2092,10 @@ public class Config extends ConfigBase {
     @ConfField
     public static long lock_reporting_threshold_ms = 500L;
 
+    @ConfField(mutable = true, description = {"表示最大锁持有时间,超过该时间会打印告警日志,单位秒",
+            "Maximum lock hold time; logs a warning if exceeded"})
+    public static long max_lock_hold_threshold_seconds = 10L;
+
     /**
      * If false, when select from tables in information_schema database,
      * the result will not contain the information of the table in external 
catalog.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
index 6b5de7e581f..7cbef2a5d6c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
@@ -19,6 +19,7 @@ package org.apache.doris.catalog;
 
 import org.apache.doris.alter.AlterCancelException;
 import org.apache.doris.analysis.CreateTableStmt;
+import org.apache.doris.common.Config;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.common.MetaNotFoundException;
 import org.apache.doris.common.io.Text;
@@ -73,6 +74,8 @@ public abstract class Table extends MetaObject implements 
Writable, TableIf {
     protected TableType type;
     protected long createTime;
     protected QueryableReentrantReadWriteLock rwLock;
+    private final ThreadLocal<Long> lockReadStart = new ThreadLocal<>();
+    private long lockWriteStart;
 
     /*
      *  fullSchema and nameToColumn should contains all columns, both visible 
and shadow.
@@ -146,6 +149,7 @@ public abstract class Table extends MetaObject implements 
Writable, TableIf {
 
     public void readLock() {
         this.rwLock.readLock().lock();
+        lockReadStart.set(System.currentTimeMillis());
     }
 
     public boolean tryReadLock(long timeout, TimeUnit unit) {
@@ -155,6 +159,9 @@ public abstract class Table extends MetaObject implements 
Writable, TableIf {
                 LOG.warn("Failed to try table {}'s read lock. timeout {} {}. 
Current owner: {}",
                         name, timeout, unit.name(), rwLock.getOwner());
             }
+            if (res) {
+                lockReadStart.set(System.currentTimeMillis());
+            }
             return res;
         } catch (InterruptedException e) {
             LOG.warn("failed to try read lock at table[" + name + "]", e);
@@ -163,11 +170,16 @@ public abstract class Table extends MetaObject implements 
Writable, TableIf {
     }
 
     public void readUnlock() {
+        if (lockReadStart.get() != null) {
+            checkAndLogLockDuration("read", lockReadStart.get(), 
System.currentTimeMillis());
+            lockReadStart.remove();
+        }
         this.rwLock.readLock().unlock();
     }
 
     public void writeLock() {
         this.rwLock.writeLock().lock();
+        lockWriteStart = System.currentTimeMillis();
     }
 
     public boolean writeLockIfExist() {
@@ -176,9 +188,39 @@ public abstract class Table extends MetaObject implements 
Writable, TableIf {
             this.rwLock.writeLock().unlock();
             return false;
         }
+        lockWriteStart = System.currentTimeMillis();
         return true;
     }
 
+    private void checkAndLogLockDuration(String type, long lockStart, long 
lockEnd) {
+        long duration = lockEnd - lockStart;
+        if (duration > Config.max_lock_hold_threshold_seconds * 1000) {
+            StringBuilder msgBuilder = new StringBuilder();
+            msgBuilder.append(getId())
+                    .append(" ")
+                    .append(getName())
+                    .append(" ")
+                    .append(type)
+                    .append(" lock is held at ")
+                    .append(lockStart)
+                    .append(".And release after ")
+                    .append(duration)
+                    .append(" ms.")
+                    .append("Call stack is :\n")
+                    .append(getStackTrace(Thread.currentThread()));
+            LOG.info(msgBuilder.toString());
+        }
+    }
+
+    private static String getStackTrace(Thread t) {
+        final StackTraceElement[] stackTrace = t.getStackTrace();
+        StringBuilder msgBuilder = new StringBuilder();
+        for (StackTraceElement e : stackTrace) {
+            msgBuilder.append(e.toString()).append("\n");
+        }
+        return msgBuilder.toString();
+    }
+
     public boolean tryWriteLock(long timeout, TimeUnit unit) {
         try {
             boolean res = this.rwLock.writeLock().tryLock(timeout, unit);
@@ -186,6 +228,9 @@ public abstract class Table extends MetaObject implements 
Writable, TableIf {
                 LOG.warn("Failed to try table {}'s write lock. timeout {} {}. 
Current owner: {}",
                         name, timeout, unit.name(), rwLock.getOwner());
             }
+            if (res) {
+                lockWriteStart = System.currentTimeMillis();
+            }
             return res;
         } catch (InterruptedException e) {
             LOG.warn("failed to try write lock at table[" + name + "]", e);
@@ -194,6 +239,7 @@ public abstract class Table extends MetaObject implements 
Writable, TableIf {
     }
 
     public void writeUnlock() {
+        checkAndLogLockDuration("write", lockWriteStart, 
System.currentTimeMillis());
         this.rwLock.writeLock().unlock();
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
 
b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
index cf4bed28e5b..3bc676a1b42 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
@@ -165,14 +165,20 @@ public class DatabaseTransactionMgr {
     private volatile long usedQuotaDataBytes = -1;
 
     private long lockWriteStart;
+    private final ThreadLocal<Long> lockReadStart = new ThreadLocal<>();
 
-    private long lockReportingThresholdMs = Config.lock_reporting_threshold_ms;
+    private final long lockReportingThresholdMs = 
Config.lock_reporting_threshold_ms;
 
     protected void readLock() {
         this.transactionLock.readLock().lock();
+        lockReadStart.set(System.currentTimeMillis());
     }
 
     protected void readUnlock() {
+        if (lockReadStart.get() != null) {
+            checkAndLogLockDuration("read", lockReadStart.get(), 
System.currentTimeMillis());
+            lockReadStart.remove();
+        }
         this.transactionLock.readLock().unlock();
     }
 
@@ -182,7 +188,7 @@ public class DatabaseTransactionMgr {
     }
 
     protected void writeUnlock() {
-        checkAndLogWriteLockDuration(lockWriteStart, 
System.currentTimeMillis());
+        checkAndLogLockDuration("write", lockWriteStart, 
System.currentTimeMillis());
         this.transactionLock.writeLock().unlock();
     }
 
@@ -2211,11 +2217,12 @@ public class DatabaseTransactionMgr {
      * @param lockStart holing lock start time.
      * @param lockEnd release lock time.
      */
-    private void checkAndLogWriteLockDuration(long lockStart, long lockEnd) {
+    private void checkAndLogLockDuration(String type, long lockStart, long 
lockEnd) {
         long duration = lockEnd - lockStart;
         if (duration > lockReportingThresholdMs) {
             StringBuilder msgBuilder = new StringBuilder();
-            msgBuilder.append("lock is held at ")
+            msgBuilder.append(type)
+                    .append(" lock is held at ")
                     .append(lockStart)
                     .append(".And release after ")
                     .append(duration)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to