kangkaisen closed pull request #527: Add retry for TCP CLOSE_WAIT in 
AgentBatchTask
URL: https://github.com/apache/incubator-doris/pull/527
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java 
b/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java
index 4d429f9c..a67800db 100644
--- a/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java
+++ b/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java
@@ -46,6 +46,7 @@
 
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
+import org.apache.thrift.transport.TTransportException;
 
 import java.util.HashMap;
 import java.util.LinkedList;
@@ -115,13 +116,39 @@ public void run() {
                 List<AgentTask> tasks = this.backendIdToTasks.get(backendId);
                 // create AgentClient
                 address = new TNetworkAddress(backend.getHost(), 
backend.getBePort());
+
                 client = ClientPool.backendPool.borrowObject(address);
 
                 List<TAgentTaskRequest> agentTaskRequests = new 
LinkedList<TAgentTaskRequest>();
                 for (AgentTask task : tasks) {
                     agentTaskRequests.add(toAgentTaskRequest(task));
                 }
-                client.submit_tasks(agentTaskRequests);
+
+                int count = 0;
+                int retryCount = 1;
+                boolean needRetry = true;
+                while (needRetry && count++ <= retryCount) {
+                    needRetry = false;
+                    try {
+                        client.submit_tasks(agentTaskRequests);
+                    } catch (TTransportException e) {
+                        //handle the TCP connection is CLOSE_WAIT status
+                        if (e.getMessage() == null && (e.getType() == 
TTransportException.END_OF_FILE)) {
+                            LOG.warn("Maybe the BE {} : {} restarted just now, 
will retry", backendId, backend.getHost());
+                            needRetry = true;
+
+                            //close the TCP connection
+                            ClientPool.backendPool.invalidateObject(address, 
client);
+
+                            //create a new TCP connection
+                            address = new TNetworkAddress(backend.getHost(), 
backend.getBePort());
+                            client = 
ClientPool.backendPool.borrowObject(address);
+                        } else {
+                            throw e;
+                        }
+                    }
+                }
+
                 if (LOG.isDebugEnabled()) {
                     for (AgentTask task : tasks) {
                         LOG.debug("send task: type[{}], backend[{}], 
signature[{}]",


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org
For additional commands, e-mail: dev-h...@doris.apache.org

Reply via email to