[ 
https://issues.apache.org/jira/browse/HBASE-19287?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16421410#comment-16421410
 ] 

Abhishek Kulkarni commented on HBASE-19287:
-------------------------------------------

Cant paste all log here. Also not able to attach file. Is there a way to attch 
file?
Pasting few failures which are allowed.
===============

        at 
org.apache.hadoop.hbase.procedure2.ProcedureExecutor.executeProcedure(ProcedureExecutor.java:1221)
        at 
org.apache.hadoop.hbase.procedure2.ProcedureExecutor.access$800(ProcedureExecutor.java:75)
        at 
org.apache.hadoop.hbase.procedure2.ProcedureExecutor$WorkerThread.run(ProcedureExecutor.java:1740)
2018-03-31 05:00:22,646 INFO  [PEWorker-1] master.SplitLogManager: finished 
splitting (more than or equal to) 0 bytes in 0 log files in 
[hdfs://abhishekk1.pne.ven.veritas.com:54310/hbase/WALs/abhishekk2.pne.ven.veritas.com,16020,1522480363659-splitting]
 in 14ms
2018-03-31 05:00:22,654 INFO  [PEWorker-1] procedure.RecoverMetaProcedure: 
pid=11, state=RUNNABLE:RECOVER_META_ASSIGN_REGIONS; RecoverMetaProcedure 
failedMetaServer=null, splitWal=true; Retaining meta assignment to 
server=abhishekk2.pne.ven.veritas.com,16020,1522480363659
2018-03-31 05:00:22,660 INFO  [PEWorker-1] procedure2.ProcedureExecutor: 
Initialized subprocedures=[{pid=12, ppid=11, 
state=RUNNABLE:REGION_TRANSITION_QUEUE; AssignProcedure table=hbase:meta, 
region=1588230740, target=abhishekk2.pne.ven.veritas.com,16020,1522480363659}]
2018-03-31 05:00:22,677 INFO  [PEWorker-2] procedure.MasterProcedureScheduler: 
pid=12, ppid=11, state=RUNNABLE:REGION_TRANSITION_QUEUE; AssignProcedure 
table=hbase:meta, region=1588230740, 
target=abhishekk2.pne.ven.veritas.com,16020,1522480363659, 
hbase:meta,,1.1588230740
2018-03-31 05:00:22,684 INFO  [PEWorker-2] assignment.AssignProcedure: Starting 
pid=12, ppid=11, state=RUNNABLE:REGION_TRANSITION_QUEUE; AssignProcedure 
table=hbase:meta, region=1588230740, 
target=abhishekk2.pne.ven.veritas.com,16020,1522480363659; rit=OFFLINE, 
location=abhishekk2.pne.ven.veritas.com,16020,1522480363659; 
forceNewPlan=false, retain=false
2018-03-31 05:00:22,861 INFO  [master/abhishekk1:16000] 
balancer.BaseLoadBalancer: Reassigned 1 regions. 1 retained the pre-restart 
assignment. 
2018-03-31 05:00:22,863 INFO  [PEWorker-3] assignment.AssignProcedure: Early 
suspend! pid=12, ppid=11, state=RUNNABLE:REGION_TRANSITION_DISPATCH; 
AssignProcedure table=hbase:meta, region=1588230740, 
target=abhishekk2.pne.ven.veritas.com,16020,1522480363659; rit=OFFLINE, 
location=abhishekk2.pne.ven.veritas.com,16020,1522486814482
2018-03-31 05:00:23,629 INFO  [HBase-Metrics2-1] 
impl.GlobalMetricRegistriesAdapter: Registering 
Master,sub=Coprocessor.Master.CP_org.apache.hadoop.hbase.security.access.AccessController
 Metrics about HBase MasterObservers
2018-03-31 05:00:24,237 INFO  [PEWorker-4] zookeeper.MetaTableLocator: Setting 
hbase:meta (replicaId=0) location in ZooKeeper as 
abhishekk2.pne.ven.veritas.com,16020,1522486814482
2018-03-31 05:00:24,243 INFO  [PEWorker-4] 
assignment.RegionTransitionProcedure: Dispatch pid=12, ppid=11, 
state=RUNNABLE:REGION_TRANSITION_DISPATCH; AssignProcedure table=hbase:meta, 
region=1588230740, target=abhishekk2.pne.ven.veritas.com,16020,1522480363659; 
rit=OPENING, location=abhishekk2.pne.ven.veritas.com,16020,1522486814482
2018-03-31 05:00:24,482 WARN  [RSProcedureDispatcher-pool3-t1] 
procedure.RSProcedureDispatcher: Failed dispatch to 
server=abhishekk2.pne.ven.veritas.com,16020,1522486814482 try=0
java.io.IOException: Call to abhishekk2.pne.ven.veritas.com/10.210.62.29:16020 
failed on local exception: java.io.IOException: 
org.apache.hbase.thirdparty.io.netty.handler.codec.DecoderException: 
org.apache.hadoop.ipc.RemoteException(javax.security.sasl.SaslException): GSS 
initiate failed
        at org.apache.hadoop.hbase.ipc.IPCUtil.wrapException(IPCUtil.java:180)
        at 
org.apache.hadoop.hbase.ipc.AbstractRpcClient.onCallFinished(AbstractRpcClient.java:390)
        at 
org.apache.hadoop.hbase.ipc.AbstractRpcClient.access$100(AbstractRpcClient.java:95)
        at 
org.apache.hadoop.hbase.ipc.AbstractRpcClient$3.run(AbstractRpcClient.java:410)
        at 
org.apache.hadoop.hbase.ipc.AbstractRpcClient$3.run(AbstractRpcClient.java:406)
        at org.apache.hadoop.hbase.ipc.Call.callComplete(Call.java:103)
        at org.apache.hadoop.hbase.ipc.Call.setException(Call.java:118)
        at 
org.apache.hadoop.hbase.ipc.BufferCallBeforeInitHandler.userEventTriggered(BufferCallBeforeInitHandler.java:92)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:329)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:315)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.fireUserEventTriggered(AbstractChannelHandlerContext.java:307)
        at 
org.apache.hbase.thirdparty.io.netty.channel.ChannelInboundHandlerAdapter.userEventTriggered(ChannelInboundHandlerAdapter.java:108)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:329)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:315)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.fireUserEventTriggered(AbstractChannelHandlerContext.java:307)
        at 
org.apache.hbase.thirdparty.io.netty.channel.ChannelInboundHandlerAdapter.userEventTriggered(ChannelInboundHandlerAdapter.java:108)
        at 
org.apache.hbase.thirdparty.io.netty.handler.codec.ByteToMessageDecoder.userEventTriggered(ByteToMessageDecoder.java:353)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:329)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:315)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.fireUserEventTriggered(AbstractChannelHandlerContext.java:307)
        at 
org.apache.hbase.thirdparty.io.netty.channel.DefaultChannelPipeline$HeadContext.userEventTriggered(DefaultChannelPipeline.java:1377)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:329)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:315)
        at 
org.apache.hbase.thirdparty.io.netty.channel.DefaultChannelPipeline.fireUserEventTriggered(DefaultChannelPipeline.java:929)
        at 
org.apache.hadoop.hbase.ipc.NettyRpcConnection.failInit(NettyRpcConnection.java:179)
        at 
org.apache.hadoop.hbase.ipc.NettyRpcConnection.access$500(NettyRpcConnection.java:71)
        at 
org.apache.hadoop.hbase.ipc.NettyRpcConnection$2.operationComplete(NettyRpcConnection.java:247)
        at 
org.apache.hbase.thirdparty.io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:507)
        at 
org.apache.hbase.thirdparty.io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:481)
        at 
org.apache.hbase.thirdparty.io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:420)
        at 
org.apache.hbase.thirdparty.io.netty.util.concurrent.DefaultPromise.tryFailure(DefaultPromise.java:122)
        at 
org.apache.hadoop.hbase.security.NettyHBaseSaslRpcClientHandler.exceptionCaught(NettyHBaseSaslRpcClientHandler.java:164)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeExceptionCaught(AbstractChannelHandlerContext.java:285)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeExceptionCaught(AbstractChannelHandlerContext.java:264)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.fireExceptionCaught(AbstractChannelHandlerContext.java:256)
        at 
org.apache.hbase.thirdparty.io.netty.channel.ChannelInboundHandlerAdapter.exceptionCaught(ChannelInboundHandlerAdapter.java:131)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeExceptionCaught(AbstractChannelHandlerContext.java:285)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.notifyHandlerException(AbstractChannelHandlerContext.java:850)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:364)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)
        at 
org.apache.hbase.thirdparty.io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1359)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
        at 
org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
        at 
org.apache.hbase.thirdparty.io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:935)
        at 
org.apache.hbase.thirdparty.io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:801)
        at 
org.apache.hbase.thirdparty.io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:404)
        at 
org.apache.hbase.thirdparty.io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:304)
        at 
org.apache.hbase.thirdparty.io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)


> master hangs forever if RecoverMeta send assign meta region request to target 
> server fail
> -----------------------------------------------------------------------------------------
>
>                 Key: HBASE-19287
>                 URL: https://issues.apache.org/jira/browse/HBASE-19287
>             Project: HBase
>          Issue Type: Bug
>          Components: proc-v2
>    Affects Versions: 2.0.0
>            Reporter: Yi Liang
>            Assignee: Yi Liang
>            Priority: Major
>             Fix For: 2.0.0-beta-1, 2.0.0
>
>         Attachments: HBASE-19287-master-v3.patch, 
> HBASE-19287-master-v3.patch, HBASE-19287-master-v4.patch, 
> hbase-19287-master-v2.patch, master.patch
>
>
> 2017-11-10 19:26:56,019 INFO  [ProcExecWrkr-1] 
> procedure.RecoverMetaProcedure: pid=138, 
> state=RUNNABLE:RECOVER_META_ASSIGN_REGIONS; RecoverMetaProcedure 
> failedMetaServer=null, splitWal=true; Retaining meta assignment to 
> server=hadoop-slave1.hadoop,16020,1510341981454
> 2017-11-10 19:26:56,029 INFO  [ProcExecWrkr-1] procedure2.ProcedureExecutor: 
> Initialized subprocedures=[{pid=139, ppid=138, 
> state=RUNNABLE:REGION_TRANSITION_QUEUE; AssignProcedure table=hbase:meta, 
> region=1588230740, target=hadoop-slave1.hadoop,16020,1510341981454}]
> 2017-11-10 19:26:56,067 INFO  [ProcExecWrkr-2] 
> procedure.MasterProcedureScheduler: pid=139, ppid=138, 
> state=RUNNABLE:REGION_TRANSITION_QUEUE; AssignProcedure table=hbase:meta, 
> region=1588230740, target=hadoop-slave1.hadoop,16020,1510341981454 hbase:meta 
> hbase:meta,,1.1588230740
> 2017-11-10 19:26:56,071 INFO  [ProcExecWrkr-2] assignment.AssignProcedure: 
> Start pid=139, ppid=138, state=RUNNABLE:REGION_TRANSITION_QUEUE; 
> AssignProcedure table=hbase:meta, region=1588230740, 
> target=hadoop-slave1.hadoop,16020,1510341981454; rit=OFFLINE, 
> location=hadoop-slave1.hadoop,16020,1510341981454; forceNewPlan=false, 
> retain=false
> 2017-11-10 19:26:56,224 INFO  [ProcExecWrkr-4] zookeeper.MetaTableLocator: 
> Setting hbase:meta (replicaId=0) location in ZooKeeper as 
> hadoop-slave2.hadoop,16020,1510341988652
> 2017-11-10 19:26:56,230 INFO  [ProcExecWrkr-4] 
> assignment.RegionTransitionProcedure: Dispatch pid=139, ppid=138, 
> state=RUNNABLE:REGION_TRANSITION_DISPATCH; AssignProcedure table=hbase:meta, 
> region=1588230740, target=hadoop-slave1.hadoop,16020,1510341981454; 
> rit=OPENING, location=hadoop-slave2.hadoop,16020,1510341988652
> 2017-11-10 19:26:56,382 INFO  [ProcedureDispatcherTimeoutThread] 
> procedure.RSProcedureDispatcher: Using procedure batch rpc execution for 
> serverName=hadoop-slave2.hadoop,16020,1510341988652 version=2097152
> 2017-11-10 19:26:57,542 INFO  [main-EventThread] 
> zookeeper.RegionServerTracker: RegionServer ephemeral node deleted, 
> processing expiration [hadoop-slave2.hadoop,16020,1510341988652]
> 2017-11-10 19:26:57,543 INFO  [main-EventThread] master.ServerManager: Master 
> doesn't enable ServerShutdownHandler during initialization, delay expiring 
> server hadoop-slave2.hadoop,16020,1510341988652
> 2017-11-10 19:26:58,875 INFO  
> [RpcServer.default.FPBQ.Fifo.handler=29,queue=2,port=16000] 
> master.ServerManager: Registering 
> server=hadoop-slave1.hadoop,16020,1510342016106
> 2017-11-10 19:27:05,832 INFO  
> [RpcServer.default.FPBQ.Fifo.handler=29,queue=2,port=16000] 
> master.ServerManager: Registering 
> server=hadoop-slave2.hadoop,16020,1510342023184
> 2017-11-10 19:27:05,832 INFO  
> [RpcServer.default.FPBQ.Fifo.handler=29,queue=2,port=16000] 
> master.ServerManager: Triggering server recovery; existingServer 
> hadoop-slave2.hadoop,16020,1510341988652 looks stale, new 
> server:hadoop-slave2.hadoop,16020,1510342023184
> 2017-11-10 19:27:05,832 INFO  
> [RpcServer.default.FPBQ.Fifo.handler=29,queue=2,port=16000] 
> master.ServerManager: Master doesn't enable ServerShutdownHandler during 
> initialization, delay expiring server hadoop-slave2.hadoop,16020,1510341988652
> 2017-11-10 19:27:49,815 INFO  
> [RpcServer.default.FPBQ.Fifo.handler=29,queue=2,port=16000] 
> client.RpcRetryingCallerImpl: tarted=38594 ms ago, cancelled=false, 
> msg=org.apache.hadoop.hbase.NotServingRegionException: hbase:meta,,1 is not 
> online on hadoop-slave2.hadoop,16020,1510342023184
>         at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.getRegionByEncodedName(HRegionServer.java:3290)
>         at 
> org.apache.hadoop.hbase.regionserver.RSRpcServices.getRegion(RSRpcServices.java:1370)
>         at 
> org.apache.hadoop.hbase.regionserver.RSRpcServices.get(RSRpcServices.java:2401)
>         at 
> org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:41544)
>         at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:406)
>         at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:133)
>         at 
> org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:278)
>         at 
> org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:258)
>  row 'hbase:namespace' on table 'hbase:meta' at 
> region=hbase:meta,,1.1588230740, 
> hostname=hadoop-slave2.hadoop,16020,1510341988652, seqNum=0



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to