On Thu, Apr 27, 2023 at 11:36 AM Tarrago, Eli (RIS-BCT) <eli.tarr...@lexisnexisrisk.com> wrote: > > After working on this issue for a bit. > The active plan is to fail over master, to the “west” dc. Perform a realm > pull from the west so that it forces the failover to occur. Then have the > “east” DC, then pull the realm data back. Hopefully will get both sides back > in sync.. > > My concern with this approach is both sides are “active”, meaning the client > has been writing data to both endpoints. Will this cause an issue where > “west” will have data that the metadata does not have record of, and then > delete the data?
no object data would be deleted as a result of metadata failover issues, no > > Thanks > > From: Tarrago, Eli (RIS-BCT) <eli.tarr...@lexisnexisrisk.com> > Date: Thursday, April 20, 2023 at 3:13 PM > To: Ceph Users <ceph-users@ceph.io> > Subject: Radosgw multisite replication issues > Good Afternoon, > > I am experiencing an issue where east-1 is no longer able to replicate from > west-1, however, after a realm pull, west-1 is now able to replicate from > east-1. > > In other words: > West <- Can Replicate <- East > West -> Cannot Replicate -> East > > After confirming the access and secret keys are identical on both sides, I > restarted all radosgw services. > > Here is the current status of the cluster below. > > Thank you for your help, > > Eli Tarrago > > > root@east01:~# radosgw-admin zone get > { > "id": "ddd66ab8-0417-46ee-a53b-043352a63f93", > "name": "rgw-east", > "domain_root": "rgw-east.rgw.meta:root", > "control_pool": "rgw-east.rgw.control", > "gc_pool": "rgw-east.rgw.log:gc", > "lc_pool": "rgw-east.rgw.log:lc", > "log_pool": "rgw-east.rgw.log", > "intent_log_pool": "rgw-east.rgw.log:intent", > "usage_log_pool": "rgw-east.rgw.log:usage", > "roles_pool": "rgw-east.rgw.meta:roles", > "reshard_pool": "rgw-east.rgw.log:reshard", > "user_keys_pool": "rgw-east.rgw.meta:users.keys", > "user_email_pool": "rgw-east.rgw.meta:users.email", > "user_swift_pool": "rgw-east.rgw.meta:users.swift", > "user_uid_pool": "rgw-east.rgw.meta:users.uid", > "otp_pool": "rgw-east.rgw.otp", > "system_key": { > "access_key": "PxxxxxxxxxxxxxxxxW", > "secret_key": "Hxxxxxxxxxxxxxxxx6" > }, > "placement_pools": [ > { > "key": "default-placement", > "val": { > "index_pool": "rgw-east.rgw.buckets.index", > "storage_classes": { > "STANDARD": { > "data_pool": "rgw-east.rgw.buckets.data" > } > }, > "data_extra_pool": "rgw-east.rgw.buckets.non-ec", > "index_type": 0 > } > } > ], > "realm_id": "98e0e391-16fb-48da-80a5-08437fd81789", > "notif_pool": "rgw-east.rgw.log:notif" > } > > root@west01:~# radosgw-admin zone get > { > "id": "b2a4a31c-1505-4fdc-b2e0-ea07d9463da1", > "name": "rgw-west", > "domain_root": "rgw-west.rgw.meta:root", > "control_pool": "rgw-west.rgw.control", > "gc_pool": "rgw-west.rgw.log:gc", > "lc_pool": "rgw-west.rgw.log:lc", > "log_pool": "rgw-west.rgw.log", > "intent_log_pool": "rgw-west.rgw.log:intent", > "usage_log_pool": "rgw-west.rgw.log:usage", > "roles_pool": "rgw-west.rgw.meta:roles", > "reshard_pool": "rgw-west.rgw.log:reshard", > "user_keys_pool": "rgw-west.rgw.meta:users.keys", > "user_email_pool": "rgw-west.rgw.meta:users.email", > "user_swift_pool": "rgw-west.rgw.meta:users.swift", > "user_uid_pool": "rgw-west.rgw.meta:users.uid", > "otp_pool": "rgw-west.rgw.otp", > "system_key": { > "access_key": "PxxxxxxxxxxxxxxW", > "secret_key": "Hxxxxxxxxxxxxxx6" > }, > "placement_pools": [ > { > "key": "default-placement", > "val": { > "index_pool": "rgw-west.rgw.buckets.index", > "storage_classes": { > "STANDARD": { > "data_pool": "rgw-west.rgw.buckets.data" > } > }, > "data_extra_pool": "rgw-west.rgw.buckets.non-ec", > "index_type": 0 > } > } > ], > "realm_id": "98e0e391-16fb-48da-80a5-08437fd81789", > "notif_pool": "rgw-west.rgw.log:notif" > east01:~# radosgw-admin metadata sync status > { > "sync_status": { > "info": { > "status": "init", > "num_shards": 0, > "period": "", > "realm_epoch": 0 > }, > "markers": [] > }, > "full_sync": { > "total": 0, > "complete": 0 > } > } > > west01:~# radosgw-admin metadata sync status > { > "sync_status": { > "info": { > "status": "sync", > "num_shards": 64, > "period": "44b6b308-e2d8-4835-8518-c90447e7b55c", > "realm_epoch": 3 > }, > "markers": [ > { > "key": 0, > "val": { > "state": 1, > "marker": "", > "next_step_marker": "", > "total_entries": 46, > "pos": 0, > "timestamp": "0.000000", > "realm_epoch": 3 > } > }, > #### goes on for a long time… > { > "key": 63, > "val": { > "state": 1, > "marker": "", > "next_step_marker": "", > "total_entries": 0, > "pos": 0, > "timestamp": "0.000000", > "realm_epoch": 3 > } > } > ] > }, > "full_sync": { > "total": 46, > "complete": 46 > } > } > > east01:~# radosgw-admin sync status > realm 98e0e391-16fb-48da-80a5-08437fd81789 (rgw-blobs) > zonegroup 0e0faf4e-39f5-402e-9dbb-4a1cdc249ddd (EastWestceph) > zone ddd66ab8-0417-46ee-a53b-043352a63f93 (rgw-east) > metadata sync no sync (zone is master) > 2023-04-20T19:03:13.388+0000 7f25fa036c80 0 ERROR: failed to fetch datalog > info > data sync source: b2a4a31c-1505-4fdc-b2e0-ea07d9463da1 (rgw-west) > failed to retrieve sync info: (13) Permission denied does the multisite system user exist on the rgw-west zone? you can check there with `radosgw-admin user info --access-key PxxxxxxxxxxxxxxW` the sync status on rgw-west shows that metadata sync is caught up so i would expect it to have that user metadata, but maybe not? > > west01:~# radosgw-admin sync status > realm 98e0e391-16fb-48da-80a5-08437fd81789 (rgw-blobs) > zonegroup 0e0faf4e-39f5-402e-9dbb-4a1cdc249ddd (EastWestceph) > zone b2a4a31c-1505-4fdc-b2e0-ea07d9463da1 (rgw-west) > metadata sync syncing > full sync: 0/64 shards > incremental sync: 64/64 shards > metadata is caught up with master > data sync source: ddd66ab8-0417-46ee-a53b-043352a63f93 (rgw-east) > syncing > full sync: 0/128 shards > incremental sync: 128/128 shards > data is behind on 16 shards > behind shards: > [5,56,62,65,66,70,76,86,87,94,104,107,111,113,120,126] > oldest incremental change not applied: > 2023-04-20T19:02:48.783283+0000 [5] > > east01:~# radosgw-admin zonegroup get > { > "id": "0e0faf4e-39f5-402e-9dbb-4a1cdc249ddd", > "name": "EastWestceph", > "api_name": "EastWestceph", > "is_master": "true", > "endpoints": [ > http://east01.example.net:8080, > http://east02.example.net:8080, > http://east03.example.net:8080, > http://west01.example.net:8080, > http://west02.example.net:8080, > http://west03.example.net:8080 > ], > "hostnames": [ > "eastvip.example.net", > "westvip.example.net" > ], > "hostnames_s3website": [], > "master_zone": "ddd66ab8-0417-46ee-a53b-043352a63f93", > "zones": [ > { > "id": "b2a4a31c-1505-4fdc-b2e0-ea07d9463da1", > "name": "rgw-west", > "endpoints": [ > http://west01.example.net:8080, > http://west02.example.net:8080, > http://west03.example.net:8080 > ], > "log_meta": "false", > "log_data": "true", > "bucket_index_max_shards": 0, > "read_only": "false", > "tier_type": "", > "sync_from_all": "true", > "sync_from": [], > "redirect_zone": "" > }, > { > "id": "ddd66ab8-0417-46ee-a53b-043352a63f93", > "name": "rgw-east", > "endpoints": [ > http://east01.example.net:8080, > http://east02.example.net:8080, > http://east03.example.net:8080 > ], > "log_meta": "false", > "log_data": "true", > "bucket_index_max_shards": 0, > "read_only": "false", > "tier_type": "", > "sync_from_all": "true", > "sync_from": [], > "redirect_zone": "" > } > ], > "placement_targets": [ > { > "name": "default-placement", > "tags": [], > "storage_classes": [ > "STANDARD" > ] > } > ], > "default_placement": "default-placement", > "realm_id": "98e0e391-16fb-48da-80a5-08437fd81789", > "sync_policy": { > "groups": [] > } > } > > > ________________________________ > The information contained in this e-mail message is intended only for the > personal and confidential use of the recipient(s) named above. This message > may be an attorney-client communication and/or work product and as such is > privileged and confidential. If the reader of this message is not the > intended recipient or an agent responsible for delivering it to the intended > recipient, you are hereby notified that you have received this document in > error and that any review, dissemination, distribution, or copying of this > message is strictly prohibited. If you have received this communication in > error, please notify us immediately by e-mail, and delete the original > message. > _______________________________________________ > ceph-users mailing list -- ceph-users@ceph.io > To unsubscribe send an email to ceph-users-le...@ceph.io > _______________________________________________ ceph-users mailing list -- ceph-users@ceph.io To unsubscribe send an email to ceph-users-le...@ceph.io