Hello Jun. Unfortunately I do not have logs from broker 6 to find out
reasons for it to be unresponsive, but yes it was not healthy. I found it
to be unresponsive as well.
How can I recover from all this failures with minimum data loss?




On Tue, Aug 27, 2013 at 8:51 PM, Jun Rao <jun...@gmail.com> wrote:

> It seems the replica fetch thread died because of socket timeout (defaults
> to 30 secs). Was broker 6 healthy at that point?
>
> Thanks,
>
> Jun
>
>
> On Tue, Aug 27, 2013 at 11:36 AM, Vadim Keylis <vkeylis2...@gmail.com
> >wrote:
>
> > We do not use controlled shutdown through JMX, its configured in the
> > property file. I do not see control shutdown message at the time I
> > initiated the shutdown. However searching for the string produced the
> > following error messages which happened hours before I started shutting
> > down service.
> >
> > [2013-08-26 12:38:18,850] WARN [ReplicaFetcherThread--1-6], Error in
> fetch
> > Name: FetchRequest; Version: 0; CorrelationId: 1541; ClientId:
> > ReplicaFetcherThread--1-6; ReplicaId: 5; MaxWait: 500 ms; MinBytes: 1
> > bytes; RequestInfo: [pets_pageview,25] ->
> > PartitionFetchInfo(0,1048576),[mm_msg,26] ->
> > PartitionFetchInfo(0,1048576),[pets_cashruns_spin,22] ->
> > PartitionFetchInfo(0,1048576),[page_timings,5] ->
> > PartitionFetchInfo(0,1048576),[cafe_purchases,13] ->
> > PartitionFetchInfo(0,1048576),[meetme_spotlight_action,33] ->
> > PartitionFetchInfo(0,1048576),[mob_gift,5] ->
> > PartitionFetchInfo(0,1048576),[cafe_coin_purchases,18] ->
> > PartitionFetchInfo(0,1048576),[security_trigger,11] ->
> > PartitionFetchInfo(0,1048576),[pysm_click,30] ->
> > PartitionFetchInfo(0,1048576),[gold_blacklist,21] ->
> > PartitionFetchInfo(0,1048576),[meetme_oops,9] ->
> > PartitionFetchInfo(0,1048576),[m3_session_info,15] ->
> > PartitionFetchInfo(0,1048576),[link_review,10] ->
> > PartitionFetchInfo(0,1048576),[cafe_debug,28] ->
> > PartitionFetchInfo(0,1048576),[m3_login_button,0] ->
> > PartitionFetchInfo(0,1048576),[pets_level,24] ->
> > PartitionFetchInfo(0,1048576),[login_detail,9] ->
> > PartitionFetchInfo(0,1048576),[click_mail,31] ->
> > PartitionFetchInfo(0,1048576),[pets_wish,15] ->
> > PartitionFetchInfo(0,1048576),[page_view_admin,6] ->
> > PartitionFetchInfo(0,1048576),[hi5_image_cleanup,9] ->
> > PartitionFetchInfo(0,1048576),[pets_wish,24] ->
> > PartitionFetchInfo(0,1048576),[cafe_food_spoiled,23] ->
> > PartitionFetchInfo(0,1048576),[pets_wish,33] ->
> > PartitionFetchInfo(0,1048576),[account_notifications,11] ->
> > PartitionFetchInfo(0,1048576),[google_transactions,7] ->
> > PartitionFetchInfo(0,1048576),[hi5_image_cleanup,3] ->
> > PartitionFetchInfo(0,1048576),[pets_economy_change,15] ->
> > PartitionFetchInfo(0,1048576),[payment,13] ->
> > PartitionFetchInfo(0,1048576),[validation,0] ->
> > PartitionFetchInfo(0,1048576),[meetme_new_contact_count,5] ->
> > PartitionFetchInfo(0,1048576),[mail_send,18] ->
> > PartitionFetchInfo(0,1048576),[lightbox_click,28] ->
> > PartitionFetchInfo(0,1048576),[rso_scanner_append,5] ->
> > PartitionFetchInfo(0,1048576),[mob_gift,2] ->
> > PartitionFetchInfo(0,1048576),[cafe_waiter_tips,24] ->
> > PartitionFetchInfo(0,1048576),[groups_user_actions,22] ->
> > PartitionFetchInfo(0,1048576),[jstiming,23] ->
> > PartitionFetchInfo(0,1048576),[viral_contact_inviters,26] ->
> > PartitionFetchInfo(0,1048576),[meetme_matches_tab,24] ->
> > PartitionFetchInfo(0,1048576),[promotions_targeted_actions,19] ->
> > PartitionFetchInfo(0,1048576),[reg_check,15] ->
> > PartitionFetchInfo(0,1048576),[cafe_waiters_quit,9] ->
> > PartitionFetchInfo(0,1048576),[groups_search,28] ->
> > PartitionFetchInfo(0,1048576),[meetme_auto_message_send,15] ->
> > PartitionFetchInfo(0,1048576),[virtual_gift,21] ->
> > PartitionFetchInfo(0,1048576),[groupchat_room,21] ->
> > PartitionFetchInfo(0,1048576),[rapleaf_inter,10] ->
> > PartitionFetchInfo(0,1048576),[reg_check,18] ->
> > PartitionFetchInfo(0,1048576),[viral_contact_inviters,29] ->
> > PartitionFetchInfo(0,1048576),[mail_send,21] ->
> > PartitionFetchInfo(0,1048576),[pets_economy_change,0] ->
> > PartitionFetchInfo(0,1048576),[pets_classifier_test,19] ->
> > PartitionFetchInfo(0,1048576),[cafe_goals,10] ->
> > PartitionFetchInfo(0,1048576),[pets_browse,9] ->
> > PartitionFetchInfo(0,1048576),[pets_achievements,24] ->
> > PartitionFetchInfo(0,1048576),[content_review_preprocess,31] ->
> > PartitionFetchInfo(0,1048576),[hi5_image_cleanup,12] ->
> > PartitionFetchInfo(0,1048576),[link_render,16] ->
> > PartitionFetchInfo(0,1048576),[ent_wt_winners,2] ->
> > PartitionFetchInfo(0,1048576),[meetme,23] ->
> > PartitionFetchInfo(1178,1048576),[gwallet_notification,30] ->
> > PartitionFetchInfo(0,1048576),[m3_login_button,18] ->
> > PartitionFetchInfo(0,1048576),[mobile_logout,3] ->
> > PartitionFetchInfo(0,1048576),[trialpay_notification,30] ->
> > PartitionFetchInfo(0,1048576),[sponsorpay_notification,35] ->
> > PartitionFetchInfo(0,1048576),[groupchat_room,6] ->
> > PartitionFetchInfo(0,1048576),[pets_reload,30] ->
> > PartitionFetchInfo(0,1048576),[report_source,1] ->
> > PartitionFetchInfo(0,1048576),[pets_nec_buygold,21] ->
> > PartitionFetchInfo(0,1048576),[set_primary_photo,13] ->
> > PartitionFetchInfo(0,1048576),[ent_wt_lb_sort,2] ->
> > PartitionFetchInfo(0,1048576),[video_shares,24] ->
> > PartitionFetchInfo(0,1048576),[viral_contact_importers,13] ->
> > PartitionFetchInfo(0,1048576),[click_mail,13] ->
> > PartitionFetchInfo(0,1048576),[pets_achievements,6] ->
> > PartitionFetchInfo(0,1048576),[newsfeed_likes,10] ->
> > PartitionFetchInfo(0,1048576),[pets_achievements,12] ->
> > PartitionFetchInfo(0,1048576),[registration_email_domain_scan,31] ->
> > PartitionFetchInfo(0,1048576),[photoviewer_message_click,19] ->
> > PartitionFetchInfo(0,1048576),[registration_email_domain_scan,1] ->
> > PartitionFetchInfo(0,1048576),[pets_cashruns_conf,6] ->
> > PartitionFetchInfo(0,1048576),[meetme_headliners_purchase,28] ->
> > PartitionFetchInfo(0,1048576),[pets_cashflow,33] ->
> > PartitionFetchInfo(0,1048576),[top8_feat_users,6] ->
> > PartitionFetchInfo(0,1048576),[http_headers,29] ->
> > PartitionFetchInfo(0,1048576),[pets_cashruns_purchase,24] ->
> > PartitionFetchInfo(0,1048576),[payment,4] ->
> > PartitionFetchInfo(0,1048576),[talk_recommend_click,13] ->
> > PartitionFetchInfo(0,1048576),[mob_help,3] ->
> > PartitionFetchInfo(0,1048576),[auto_cancel,6] ->
> > PartitionFetchInfo(0,1048576),[bouncemail,32] ->
> > PartitionFetchInfo(0,1048576),[user_support,23] ->
> > PartitionFetchInfo(0,1048576),[groups_search,34] ->
> > PartitionFetchInfo(0,1048576),[pets_cashruns_spin,16] ->
> > PartitionFetchInfo(0,1048576),[trialpay_notification,21] ->
> > PartitionFetchInfo(0,1048576),[lightbox_view,6] ->
> > PartitionFetchInfo(0,1048576),[gwallet_notification,21] ->
> > PartitionFetchInfo(0,1048576),[cafe_unspoils,20] ->
> > PartitionFetchInfo(0,1048576),[pets_cashflow,9] ->
> > PartitionFetchInfo(0,1048576),[promotions_targeted_actions,34] ->
> > PartitionFetchInfo(0,1048576),[chat_login,4] ->
> > PartitionFetchInfo(0,1048576),[google_transactions,28] ->
> > PartitionFetchInfo(0,1048576),[meetme_new_contact_count,11] ->
> > PartitionFetchInfo(0,1048576),[meetme_spotlight_purchase,28] ->
> > PartitionFetchInfo(0,1048576),[validation_email_change,24] ->
> > PartitionFetchInfo(0,1048576),[meetme_oops,30] ->
> > PartitionFetchInfo(0,1048576),[pets_reset_achievement,18] ->
> > PartitionFetchInfo(0,1048576),[hi5_blast_photocount,32] ->
> > PartitionFetchInfo(0,1048576),[security_trigger,5] ->
> > PartitionFetchInfo(0,1048576),[pets_cashruns_winners,32] ->
> > PartitionFetchInfo(0,1048576),[validation,24] ->
> > PartitionFetchInfo(0,1048576),[lightbox_click,31] ->
> > PartitionFetchInfo(0,1048576),[viral_landing_page,7] ->
> > PartitionFetchInfo(0,1048576),[mobile_performance,26] ->
> > PartitionFetchInfo(0,1048576),[viral_reg_pv,28] ->
> > PartitionFetchInfo(0,1048576),[statictiming,8] ->
> > PartitionFetchInfo(0,1048576),[hi5_blast_photocount,20] ->
> > PartitionFetchInfo(0,1048576),[mobile_logout,12] ->
> > PartitionFetchInfo(0,1048576),[validation_lightbox_action,5] ->
> > PartitionFetchInfo(0,1048576),[cafe_food_served,10] ->
> > PartitionFetchInfo(0,1048576),[content_checkboxes,4] ->
> > PartitionFetchInfo(0,1048576),[meetme_matches_tab,27] ->
> > PartitionFetchInfo(0,1048576),[pets_cashruns_purchase,27] ->
> > PartitionFetchInfo(0,1048576),[auto_cancel,27] ->
> > PartitionFetchInfo(0,1048576),[pets_reset_achievement,9] ->
> > PartitionFetchInfo(0,1048576),[m3_login_button,9] ->
> > PartitionFetchInfo(0,1048576),[chat_login,16] ->
> > PartitionFetchInfo(0,1048576),[rapleaf_inter,13] ->
> > PartitionFetchInfo(0,1048576),[hi5_image_cleanup,6] ->
> > PartitionFetchInfo(0,1048576),[link_review,1] ->
> > PartitionFetchInfo(0,1048576),[itunes_notification,6] ->
> > PartitionFetchInfo(0,1048576),[m3_session_info,24] ->
> > PartitionFetchInfo(0,1048576),[account_notifications,23] ->
> > PartitionFetchInfo(0,1048576),[photoviewer_message_click,16] ->
> > PartitionFetchInfo(0,1048576),[pets_economy_change,18] ->
> > PartitionFetchInfo(0,1048576),[cafe_coin_purchases,9] ->
> > PartitionFetchInfo(0,1048576),[virtual_gift,3] ->
> > PartitionFetchInfo(0,1048576),[complains,8] ->
> > PartitionFetchInfo(0,1048576),[pets_purchased,20] ->
> > PartitionFetchInfo(0,1048576),[cafe_waiter_tips,18] ->
> > PartitionFetchInfo(0,1048576),[rso_scanner_append,23] ->
> > PartitionFetchInfo(0,1048576),[viral_contact_importers,4] ->
> > PartitionFetchInfo(0,1048576),[viral_contact_inviters,17] ->
> > PartitionFetchInfo(0,1048576),[meetme_matches_tab,21] ->
> > PartitionFetchInfo(0,1048576),[viral_invite,31] ->
> > PartitionFetchInfo(0,1048576),[page_view_api,13] ->
> > PartitionFetchInfo(1143,1048576),[mm_msg,17] ->
> > PartitionFetchInfo(0,1048576),[cancel,22] ->
> > PartitionFetchInfo(0,1048576),[mobile_performance,20] ->
> > PartitionFetchInfo(0,1048576),[set_primary_photo,22] ->
> > PartitionFetchInfo(0,1048576),[registration_email_domain_scan,10] ->
> > PartitionFetchInfo(0,1048576),[mob_properties,28] ->
> > PartitionFetchInfo(0,1048576),[viral_registration,22] ->
> > PartitionFetchInfo(0,1048576),[user_contact,9] ->
> > PartitionFetchInfo(0,1048576),[rapleaf_image,2] ->
> > PartitionFetchInfo(0,1048576),[pets_cashruns_spin,25] ->
> > PartitionFetchInfo(0,1048576),[cafe_debug,7] ->
> > PartitionFetchInfo(0,1048576),[mobile_app_activities,8] ->
> > PartitionFetchInfo(0,1048576),[cafe_food_spoiled,20] ->
> > PartitionFetchInfo(0,1048576),[validation,9] ->
> > PartitionFetchInfo(0,1048576),[pets_achievements,15] ->
> > PartitionFetchInfo(0,1048576),[gwallet_notification,3] ->
> > PartitionFetchInfo(0,1048576),[index_pageview,33] ->
> > PartitionFetchInfo(0,1048576),[security_trigger,23] ->
> > PartitionFetchInfo(0,1048576),[link_render,22] ->
> > PartitionFetchInfo(0,1048576),[lightbox_click,19] ->
> > PartitionFetchInfo(0,1048576),[cafe_waiter_tips,30] ->
> > PartitionFetchInfo(0,1048576),[questions,13] ->
> > PartitionFetchInfo(0,1048576),[hi5_image_cleanup,15] ->
> > PartitionFetchInfo(0,1048576),[phoneflow,31] ->
> > PartitionFetchInfo(0,1048576),[pets_buyback_lightbox,18] ->
> > PartitionFetchInfo(0,1048576),[mob_premium,6] ->
> > PartitionFetchInfo(0,1048576),[cafe_unspoils,5] ->
> > PartitionFetchInfo(0,1048576),[pets_cashruns_spin,34] ->
> > PartitionFetchInfo(0,1048576),[viral_reg_pv,16] ->
> > PartitionFetchInfo(0,1048576),[http_headers,23] ->
> > PartitionFetchInfo(0,1048576),[viral_landing_page,10] ->
> > PartitionFetchInfo(0,1048576),[mob_properties,13] ->
> > PartitionFetchInfo(0,1048576),[mobile_api,13] ->
> > PartitionFetchInfo(0,1048576),[pets_purchased,32] ->
> > PartitionFetchInfo(0,1048576),[pets_cashruns_bid,22] ->
> > PartitionFetchInfo(0,1048576),[photo_view,22] ->
> > PartitionFetchInfo(0,1048576),[viral_import_conf_pv,18] ->
> > PartitionFetchInfo(0,1048576),[mob_viral_job,28] ->
> > PartitionFetchInfo(0,1048576),[newsfeed_filter_change,22] ->
> > PartitionFetchInfo(0,1048576),[password_change,12] ->
> > PartitionFetchInfo(0,1048576),[ent_wt_winners,23] ->
> > PartitionFetchInfo(0,1048576),[m3_account_cancel,15] ->
> > PartitionFetchInfo(0,1048576),[meetme_auto_message_send,6] ->
> > PartitionFetchInfo(0,1048576),[pets_cashflow,27] ->
> > PartitionFetchInfo(0,1048576),[rapleaf_inter,31] ->
> > PartitionFetchInfo(0,1048576),[user_contact,33] ->
> > PartitionFetchInfo(0,1048576),[validation_email_change,0] ->
> > PartitionFetchInfo(0,1048576),[pets_classifier_pysb,18] ->
> > PartitionFetchInfo(0,1048576),[http_headers,14] ->
> > PartitionFetchInfo(0,1048576),[meetme_matches_tab,30] ->
> > PartitionFetchInfo(0,1048576),[m3_user_block,23] ->
> > PartitionFetchInfo(0,1048576),[mob_premium,9] ->
> > PartitionFetchInfo(0,1048576),[page_view_api,19] ->
> > PartitionFetchInfo(1125,1048576),[photoviewer_like,15] ->
> > PartitionFetchInfo(0,1048576),[meetme_oops,27] ->
> > PartitionFetchInfo(0,1048576),[photoviewer_message_click,1] ->
> > PartitionFetchInfo(0,1048576),[page_view_admin,27] ->
> > PartitionFetchInfo(0,1048576),[cafe_waiters_quit,6] ->
> > PartitionFetchInfo(0,1048576),[mob_premium,15] ->
> > PartitionFetchInfo(0,1048576),[itunes_notification,15] ->
> > PartitionFetchInfo(0,1048576),[phoneflow,22] ->
> > PartitionFetchInfo(0,1048576),[auto_cancel,30] ->
> > PartitionFetchInfo(0,1048576),[skins_library,33] ->
> > PartitionFetchInfo(0,1048576),[hi5_blast_photocount,2] ->
> > PartitionFetchInfo(0,1048576),[security_trigger,8] ->
> > PartitionFetchInfo(0,1048576),[test_assign,0] ->
> > PartitionFetchInfo(0,1048576),[test_assign,9] ->
> > PartitionFetchInfo(0,1048576),[cafe_badge,3] ->
> > PartitionFetchInfo(0,1048576),[mob_properties,31] ->
> > PartitionFetchInfo(0,1048576),[pets_classifier_bp,7] ->
> > PartitionFetchInfo(0,1048576),[chat_login,13] ->
> > PartitionFetchInfo(0,1048576),[cafe_waiters_hire,18] ->
> > PartitionFetchInfo(0,1048576),[click_mail,7] ->
> > PartitionFetchInfo(0,1048576),[mob_gift,14] ->
> > PartitionFetchInfo(0,1048576),[password_change,3] ->
> > PartitionFetchInfo(0,1048576),[mm_msg,8] ->
> > PartitionFetchInfo(0,1048576),[tagline,1] ->
> > PartitionFetchInfo(0,1048576),[meetme_oops,21] ->
> > PartitionFetchInfo(0,1048576),[pets_cashflow,18] ->
> > PartitionFetchInfo(0,1048576),[newsfeed_likes,22] ->
> > PartitionFetchInfo(0,1048576),[video_shares,33] ->
> > PartitionFetchInfo(0,1048576),[pets_classifier_test,28] ->
> > PartitionFetchInfo(0,1048576),[itunes_notification,33] ->
> > PartitionFetchInfo(0,1048576),[trialpay_notification,24] ->
> > PartitionFetchInfo(0,1048576),[viral_import,28] ->
> > PartitionFetchInfo(0,1048576),[chat_guest,4] ->
> > PartitionFetchInfo(0,1048576),[registration_mobile,35] ->
> > PartitionFetchInfo(0,1048576),[sponsorpay_notification,20] ->
> > PartitionFetchInfo(0,1048576),[tagline,16] ->
> > PartitionFetchInfo(0,1048576),[auto_cancel,15] ->
> > PartitionFetchInfo(0,1048576),[pets_level,0] ->
> > PartitionFetchInfo(0,1048576),[security_trigger,20] ->
> > PartitionFetchInfo(0,1048576),[pets_cashruns_bid,7] ->
> > PartitionFetchInfo(0,1048576),[statictiming,29] ->
> > PartitionFetchInfo(0,1048576),[meetme_auto_message_send,21] ->
> > PartitionFetchInfo(0,1048576),[pets_cashruns_pageview,29] ->
> > PartitionFetchInfo(0,1048576),[newsfeed_likes,13] ->
> > PartitionFetchInfo(0,1048576),[page_view_admin,24] ->
> > PartitionFetchInfo(0,1048576),[cafe_waiters_hire,30] ->
> > PartitionFetchInfo(0,1048576),[matches_bar_view,35] ->
> > PartitionFetchInfo(0,1048576),[pets_pageview,31] ->
> > PartitionFetchInfo(0,1048576),[external_url,15] ->
> > PartitionFetchInfo(0,1048576),[newsfeed_posts,34] ->
> > PartitionFetchInfo(0,1048576),[viral_import,13] ->
> > PartitionFetchInfo(0,1048576),[pets_classifier_bp,22] ->
> > PartitionFetchInfo(0,1048576),[virtual_gift,6] ->
> > PartitionFetchInfo(0,1048576),[rapleaf_image,17] ->
> > PartitionFetchInfo(0,1048576),[cafe_goals,28] ->
> > PartitionFetchInfo(0,1048576),[language_reg,35] ->
> > PartitionFetchInfo(0,1048576),[pets_nec_view,12] ->
> > PartitionFetchInfo(0,1048576),[pets_economy_change,3] ->
> > PartitionFetchInfo(0,1048576),[pets_purchased,26] ->
> > PartitionFetchInfo(0,1048576),[mob_properties,10] ->
> > PartitionFetchInfo(0,1048576),[mob_gift,26] ->
> > PartitionFetchInfo(0,1048576),[registration_email_domain_scan,13] ->
> > PartitionFetchInfo(0,1048576),[viral_import_pv,22] ->
> > PartitionFetchInfo(0,1048576),[photo_view,28] ->
> > PartitionFetchInfo(0,1048576),[videos,10] ->
> > PartitionFetchInfo(0,1048576),[cafe_coin_purchases,15] ->
> > PartitionFetchInfo(0,1048576),[phoneflow,19] ->
> > PartitionFetchInfo(0,1048576),[cafe_debug,19] ->
> > PartitionFetchInfo(0,1048576),[pets_level,3] ->
> > PartitionFetchInfo(0,1048576),[mob_gift,23] ->
> > PartitionFetchInfo(0,1048576),[pets_reset_achievement,27] ->
> > PartitionFetchInfo(0,1048576),[m3_profile_views,2] ->
> > PartitionFetchInfo(0,1048576),[bouncemail,5] ->
> > PartitionFetchInfo(0,1048576),[mob_properties,4] ->
> > PartitionFetchInfo(0,1048576),[content_review_preprocess,28] ->
> > PartitionFetchInfo(0,1048576),[pysm_click,3] ->
> > PartitionFetchInfo(0,1048576),[m3_auth,23] ->
> > PartitionFetchInfo(0,1048576),[friend_request,2] ->
> > PartitionFetchInfo(0,1048576),[photoviewer_message_click,4] ->
> > PartitionFetchInfo(0,1048576),[push_notifications,19] ->
> > PartitionFetchInfo(0,1048576),[mob_spend_ap,8] ->
> > PartitionFetchInfo(0,1048576),[purchase_actions,3] ->
> > PartitionFetchInfo(0,1048576),[promotions_targeted_actions,13] ->
> > PartitionFetchInfo(0,1048576),[sponsorpay_notification,23] ->
> > PartitionFetchInfo(0,1048576),[cafe_unspoils,8] ->
> > PartitionFetchInfo(0,1048576),[meetme_spotlight_action,3] ->
> > PartitionFetchInfo(0,1048576),[viral_registration,7] ->
> > PartitionFetchInfo(0,1048576),[complains,20] ->
> > PartitionFetchInfo(0,1048576),[hi5_image_cleanup,18] ->
> > PartitionFetchInfo(0,1048576),[sponsorpay_notification,5] ->
> > PartitionFetchInfo(0,1048576),[pets_nec_view,0] ->
> > PartitionFetchInfo(0,1048576),[chat_guest,16] ->
> > PartitionFetchInfo(0,1048576),[promo_toast_view,1] ->
> > PartitionFetchInfo(0,1048576),[cancel,25] ->
> > PartitionFetchInfo(0,1048576),[jstiming,8] ->
> > PartitionFetchInfo(0,1048576),[pets_wish,12] ->
> > PartitionFetchInfo(0,1048576),[rapleaf_image,5] ->
> > PartitionFetchInfo(0,1048576),[language_reg,29] ->
> > PartitionFetchInfo(0,1048576),[long_query,28] ->
> > PartitionFetchInfo(0,1048576),[pets_cashflow,6] ->
> > PartitionFetchInfo(0,1048576),[mobile_click,28] ->
> > PartitionFetchInfo(0,1048576),[mob_help,18] ->
> > PartitionFetchInfo(0,1048576),[talk_recommend_click,22] ->
> > PartitionFetchInfo(0,1048576),[set_primary_photo,16] ->
> > PartitionFetchInfo(0,1048576),[pets_browse,3] -> Part
> > java.net.SocketTimeoutException
> >         at
> > sun.nio.ch.SocketAdaptor$SocketInputStream.read(SocketAdaptor.java:201)
> >         at sun.nio.ch.ChannelInputStream.read(ChannelInputStream.java:86)
> >         at
> >
> java.nio.channels.Channels$ReadableByteChannelImpl.read(Channels.java:221)
> >         at kafka.utils.Utils$.read(Utils.scala:394)
> >         at
> >
> >
> kafka.network.BoundedByteBufferReceive.readFrom(BoundedByteBufferReceive.scala:54)
> >         at
> > kafka.network.Receive$class.readCompletely(Transmission.scala:56)
> >         at
> >
> >
> kafka.network.BoundedByteBufferReceive.readCompletely(BoundedByteBufferReceive.scala:29)
> >         at
> kafka.network.BlockingChannel.receive(BlockingChannel.scala:100)
> >         at
> > kafka.consumer.SimpleConsumer.liftedTree1$1(SimpleConsumer.scala:81)
> >         at
> >
> >
> kafka.consumer.SimpleConsumer.kafka$consumer$SimpleConsumer$$sendRequest(SimpleConsumer.scala:71)
> >         at
> >
> >
> kafka.consumer.SimpleConsumer$$anonfun$fetch$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(SimpleConsumer.scala:110)
> >         at
> >
> >
> kafka.consumer.SimpleConsumer$$anonfun$fetch$1$$anonfun$apply$mcV$sp$1.apply(SimpleConsumer.scala:110)
> >         at
> >
> >
> kafka.consumer.SimpleConsumer$$anonfun$fetch$1$$anonfun$apply$mcV$sp$1.apply(SimpleConsumer.scala:110)
> >         at kafka.metrics.KafkaTimer.time(KafkaTimer.scala:33)
> >         at
> >
> >
> kafka.consumer.SimpleConsumer$$anonfun$fetch$1.apply$mcV$sp(SimpleConsumer.scala:109)
> >         at
> >
> >
> kafka.consumer.SimpleConsumer$$anonfun$fetch$1.apply(SimpleConsumer.scala:109)
> >         at
> >
> >
> kafka.consumer.SimpleConsumer$$anonfun$fetch$1.apply(SimpleConsumer.scala:109)
> >         at kafka.metrics.KafkaTimer.time(KafkaTimer.scala:33)
> >         at kafka.consumer.SimpleConsumer.fetch(SimpleConsumer.scala:108)
> >         at
> >
> >
> kafka.server.AbstractFetcherThread.processFetchRequest(AbstractFetcherThread.scala:96)
> >         at
> > kafka.server.AbstractFetcherThread.doWork(AbstractFetcherThread.scala:88)
> >         at
> kafka.utils.ShutdownableThread.run(ShutdownableThread.scala:51)
> >
> >
> > [2013-08-26 12:38:18,854] INFO [ReplicaFetcherThread--1-6], Stopped
> >  (kafka.server.ReplicaFetcherThread)
> > [2013-08-26 12:38:18,854] INFO [ReplicaFetcherThread--1-6], Shutdown
> > completed (kafka.server.ReplicaFetcherThread)
> > [2013-08-26 12:38:18,858] INFO [ReplicaFetcherThread-0-6], Shutting down
> > (kafka.server.ReplicaFetcherThread)
> > [2013-08-26 12:38:23,821] INFO Closing socket connection to /10.15.11.24
> .
> > (kafka.network.Processor)
> > [2013-08-26 12:38:23,899] INFO [ReplicaFetcherThread-0-6], Stopped
> >  (kafka.server.ReplicaFetcherThread)
> > [2013-08-26 12:38:23,899] INFO [ReplicaFetcherThread-0-6], Shutdown
> > completed (kafka.server.ReplicaFetcherThread)
> > [2013-08-26 12:38:23,899] INFO [ReplicaFetcherThread-2-6], Shutting down
> > (kafka.server.ReplicaFetcherThread)
> > [2013-08-26 12:38:24,064] INFO [ReplicaFetcherThread-2-6], Stopped
> >  (kafka.server.ReplicaFetcherThread)
> > [2013-08-26 12:38:24,064] INFO [ReplicaFetcherThread-2-6], Shutdown
> > completed (kafka.server.ReplicaFetcherThread)
> >
> >
> >
> > On Tue, Aug 27, 2013 at 10:05 AM, Neha Narkhede <neha.narkh...@gmail.com
> > >wrote:
> >
> > > When you said you tried to shutdown the broker, did you try controlled
> > > shutdown? Do you see "Shutting down" in the logs of the broker that
> > seemed
> > > to hang?
> > >
> > > Thanks,
> > > Neha
> > >
> > >
> > > On Tue, Aug 27, 2013 at 9:12 AM, Vadim Keylis <vkeylis2...@gmail.com>
> > > wrote:
> > >
> > > > No. They actually were stuck. Not responding to shutdown request. I
> had
> > > to
> > > > kill them with kill -9 command. I try to take heap dump which hang as
> > > well.
> > > >
> > > > Sent from my iPhone
> > > >
> > > > On Aug 27, 2013, at 8:14 AM, Jun Rao <jun...@gmail.com> wrote:
> > > >
> > > > > The errors you listed may not be serious, as long as they are
> > > transient.
> > > > > When you say 2 of the brokers are not responsive, are they issuing
> > > fetch
> > > > > requests to the 3rd broker (look at the request log)? During a
> > restart
> > > of
> > > > > the whole cluster, brokers that are started later may not have any
> > > leader
> > > > > and thus won't take any request from the client. You will need to
> run
> > > the
> > > > > leader balance tool.
> > > > >
> > > > > Thanks,
> > > > >
> > > > > Jun
> > > > >
> > > > >
> > > > > On Mon, Aug 26, 2013 at 10:12 PM, Vadim Keylis <
> > vkeylis2...@gmail.com
> > > > >wrote:
> > > > >
> > > > >> Somehow I am getting my instances of kafka to crash. I started
> kafka
> > > > >> instances one by one and they started successfully. Later it some
> > how
> > > > two
> > > > >> of 3 instances became completely unresponsive. The process is
> > running,
> > > > but
> > > > >> connnection over jmx or taking heat dump not possible. The last
> one
> > > some
> > > > >> what resposnive.
> > > > >> I am not sure how server get to this state. Is there anything I
> can
> > > > monitor
> > > > >> to predict instances about to crash. What are ways to recover
> > without
> > > > data
> > > > >> loss? What am I doing wrong to get to this state. Please advise.
> > > > >> I poke around error logs on hosts that are not responsive and here
> > are
> > > > the
> > > > >> errors I found. One that I have not listed
> LeaderNotFoundExceotion.
> > > > >>
> > > > >> The most puzzling is about zookeeper as it was not redeployed or
> > > > updated.
> > > > >> [2013-08-26 12:14:35,357] ERROR [KafkaApi-5] Error while fetching
> > > > metadata
> > > > >> for partition [self_reactivation,0] (kafka.server.KafkaApis)
> > > > >> kafka.common.ReplicaNotAvailableException
> > > > >>        at
> > > > >>
> > > >
> > kafka.server.KafkaApis$$anonfun$17$$anonfun$20.apply(KafkaApis.scala:471)
> > > > >>        at
> > > > >>
> > > >
> > kafka.server.KafkaApis$$anonfun$17$$anonfun$20.apply(KafkaApis.scala:456)
> > > > >>        at
> > > > >>
> > > > >>
> > > >
> > >
> >
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:233)
> > > > >>        at
> > > > >>
> > > > >>
> > > >
> > >
> >
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:233)
> > > > >>        at
> > > > >>
> > > > >>
> > > >
> > >
> >
> scala.collection.LinearSeqOptimized$class.foreach(LinearSeqOptimized.scala:59)
> > > > >>        at scala.collection.immutable.List.foreach(List.scala:76)
> > > > >>        at
> > > > >>
> > scala.collection.TraversableLike$class.map(TraversableLike.scala:233)
> > > > >>
> > > > >>
> > > > >> in server.log
> > > > >> [2013-08-26 21:00:51,942] ERROR Conditional update of path
> > > > >> /brokers/topics/meetme/partitions/12/state with data {
> > > > >> "controller_epoch":6, "isr":[ 5 ], "leader":5, "leader_epoch":1,
> > > > >> "version":1 } and expected version 2 failed due to
> > > > >> org.apache.zookeeper.KeeperException$BadVersionException:
> > > > KeeperErrorCode =
> > > > >> BadVersion for /brokers/topics/meetme/partitions/12/state
> > > > >> (kafka.utils.ZkUtils$)
> > > > >> [2013-08-26 21:00:51,943] INFO Partition [meetme,12] on broker 5:
> > > Cached
> > > > >> zkVersion [2] not equal to that in zookeeper, skip updating ISR
> > > > >> (kafka.cluster.Partition)
> > > > >> [2013-08-26 21:00:51,990] INFO Partition [meetme,4] on broker 5:
> > > > Shrinking
> > > > >> ISR for partition [meetme,4] from 5,4 to 5
> (kafka.cluster.Partition)
> > > > >> [2013-08-26 21:00:51,993] ERROR Conditional update of path
> > > > >> /brokers/topics/meetme/partitions/4/state with data {
> > > > "controller_epoch":6,
> > > > >> "isr":[ 5 ], "leader":5, "leader_epoch":1, "version":1 } and
> > expected
> > > > >> version 2 failed due to
> > > > >> org.apache.zookeeper.KeeperException$BadVersionException:
> > > > KeeperErrorCode =
> > > > >> BadVersion for /brokers/topics/meetme/partitions/4/state
> > > > >> (kafka.utils.ZkUtils$)
> > > > >> [2013-08-26 21:00:51,993] INFO Partition [meetme,4] on broker 5:
> > > Cached
> > > > >> zkVersion [2] not equal to that in zookeeper, skip updating ISR
> > > > >> (kafka.cluster.Partition)
> > > > >> [2013-08-26 21:00:52,103] INFO Partition [meetme,6] on broker 5:
> > > > Shrinking
> > > > >> ISR for partition [meetme,6] from 5,4 to 5
> (kafka.cluster.Partition)
> > > > >> [2013-08-26 21:00:52,107] ERROR Conditional update of path
> > > > >> /brokers/topics/meetme/partitions/6/state with data {
> > > > "controller_epoch":6,
> > > > >> "isr":[ 5 ], "leader":5, "leader_epoch":2, "version":1 } and
> > expected
> > > > >> version 3 failed due to
> > > > >> org.apache.zookeeper.KeeperException$BadVersionException:
> > > > KeeperErrorCode =
> > > > >> BadVersion for /brokers/topics/meetme/partitions/6/state
> > > > >> (kafka.utils.ZkUtils$)
> > > > >> [2013-08-26 21:00:52,107] INFO Partition [meetme,6] on broker 5:
> > > Cached
> > > > >> zkVersion [3] not equal to that in zookeeper, skip updating ISR
> > > > >> (kafka.cluster.Partition)
> > > > >>
> > > >
> > >
> >
>

Reply via email to