Hi, we have a small (3x10GB) ceph cluster for testing, running Ceph 0.72.2 with RadosGW inside an Apache2. All the three nodes are identical: each has one osd, one mon, one radosgw.
The application (self-written) uses Perl's Amazon::S3 for a basic "connect, create bucket, put a pdf into the bucket" from another host. Now we are facing an interesting problem, I do not really understand: after a radosgw restart, the very first create bucket command will fail. Looking at the log, it seems to be the check_version function, inside the OSD, but I am not totaly sure and would love if someone with more clue could have a look at the log: > 12:35:33.340072 7ff634416700 1 ====== starting new request req=0xd15c20 ===== > 12:35:33.340932 7ff634416700 1 -- 192.0.2.11:0/1015476 --> > 192.0.2.11:6800/2354 -- osd_op(client.293418.0:225 azd_dev.buckets > [omap-get-vals 0~16] 17.35ac3502 e170) v4 -- ?+0 0xd3fcb0 con 0xc73b20 > 12:35:33.344357 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.2 > 192.0.2.11:6800/2354 105 ==== osd_op_reply(225 azd_dev.buckets [omap-get-vals > 0~16] v0'0 uv38777 ondisk = 0) v6 ==== 174+0+1194 (890368809 0 4124636948) > 0xd3fd70 con 0xc73b20 > 12:35:33.344765 7ff634416700 1 -- 192.0.2.11:0/1015476 --> > 192.0.2.9:6800/2222 -- osd_op(client.293418.0:226 dev_foo_bucket [call > version.read,getxattrs,stat] 15.2f6b990 e170) v4 -- ?+0 0xd40a00 con 0xc5eb60 > 12:35:33.347150 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.0 > 192.0.2.9:6800/2222 88 ==== osd_op_reply(226 dev_foo_bucket > [call,getxattrs,stat] v0'0 uv2 ondisk = 0) v6 ==== 263+0+139 (3834169773 0 > 732307286) 0xd40bb0 con 0xc5eb60 > 12:35:33.347285 7ff634416700 1 -- 192.0.2.11:0/1015476 --> > 192.0.2.9:6800/2222 -- osd_op(client.293418.0:227 dev_foo_bucket [call > version.check_conds,call version.read,read 0~524288] 15.2f6b990 e170) v4 -- > ?+0 0xd420e0 con 0xc5eb60 > 12:35:33.348252 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.0 > 192.0.2.9:6800/2222 89 ==== osd_op_reply(227 dev_foo_bucket [call,call,read > 0~524288] v0'0 uv2 ondisk = 0) v6 ==== 263+0+182 (659711759 0 978969982) > 0xd427f0 con 0xc5eb60 > 12:35:33.348375 7ff634416700 1 -- 192.0.2.11:0/1015476 --> > 192.0.2.9:6800/2222 -- osd_op(client.293418.0:228 > .bucket.meta.dev_foo_bucket:default.4878.161 [call > version.read,getxattrs,stat] 15.8914af77 e170) v4 -- ?+0 0xd420e0 con 0xc5eb60 > 12:35:33.349764 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.0 > 192.0.2.9:6800/2222 90 ==== osd_op_reply(228 > .bucket.meta.dev_foo_bucket:default.4878.161 [call,getxattrs,stat] v0'0 uv12 > ondisk = 0) v6 ==== 293+0+308 (916756706 0 993046662) 0xd420e0 con 0xc5eb60 > 12:35:33.349874 7ff634416700 1 -- 192.0.2.11:0/1015476 --> > 192.0.2.9:6800/2222 -- osd_op(client.293418.0:229 > .bucket.meta.dev_foo_bucket:default.4878.161 [call version.check_conds,call > version.read,read 0~524288] 15.8914af77 e170) v4 -- ?+0 0xd420e0 con 0xc5eb60 > 12:35:33.350863 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.0 > 192.0.2.9:6800/2222 91 ==== osd_op_reply(229 > .bucket.meta.dev_foo_bucket:default.4878.161 [call,call,read 0~524288] v0'0 > uv12 ondisk = 0) v6 ==== 293+0+241 (2673515210 0 1349304681) 0xd4d860 con > 0xc5eb60 > 12:35:33.350956 7ff634416700 0 WARNING: couldn't find acl header for object, > generating default > 12:35:33.351059 7ff634416700 1 -- 192.0.2.11:0/1015476 --> > 192.0.2.9:6800/2222 -- osd_op(client.293418.0:230 .dir.default.293418.6 > [create 0~0,call rgw.bucket_init_index] 20.317f5c28 e170) v4 -- ?+0 0xd40a00 > con 0xc5eb60 > 12:35:33.365686 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.0 > 192.0.2.9:6800/2222 92 ==== osd_op_reply(230 .dir.default.293418.6 [create > 0~0,call] v170'1219 uv1219 ondisk = 0) v6 ==== 222+0+0 (928032174 0 0) > 0xd40a00 con 0xc5eb60 > 12:35:33.365970 7ff634416700 1 -- 192.0.2.11:0/1015476 --> > 192.0.2.9:6800/2222 -- osd_op(client.293418.0:231 > .bucket.meta.dev_foo_bucket:default.293418.6 [create 0~0,call > version.set,writefull 0~193,setxattr user.rgw.acl (149)] 15.23f161c1 e170) v4 > -- ?+0 0xd3f960 con 0xc5eb60 > 12:35:33.371216 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.0 > 192.0.2.9:6800/2222 93 ==== osd_op_reply(231 > .bucket.meta.dev_foo_bucket:default.293418.6 [create 0~0,call,writefull > 0~193,setxattr (149)] v170'326 uv326 ondisk = 0) v6 ==== 335+0+0 (2829361797 > 0 0) 0xd4d860 con 0xc5eb60 > 12:35:33.371353 7ff634416700 1 -- 192.0.2.11:0/1015476 --> > 192.0.2.10:6800/2306 -- osd_op(client.293418.0:232 notify.4 [notify 18~0] > 14.a204812d e170) v4 -- ?+0 0xd4e720 con 0xc71050 > 12:35:33.373145 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.1 > 192.0.2.10:6800/2306 52 ==== watch-notify(c=5 v=1 i=730144440905 opcode=1) v1 > ==== 676+0+0 (596630801 0 0) 0xd521c0 con 0xc71050 > 12:35:33.373170 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.1 > 192.0.2.10:6800/2306 53 ==== osd_op_reply(232 notify.4 [notify 18~0] v0'0 uv1 > ondisk = 0) v6 ==== 167+0+0 (3561411623 0 0) 0xd52630 con 0xc71050 > 12:35:33.373241 7ff64fa52700 1 -- 192.0.2.11:0/1015476 --> > 192.0.2.10:6800/2306 -- osd_op(client.293418.0:233 notify.4 [notify-ack > 730144440905~1] 14.a204812d e170) v4 -- ?+0 0xd52da0 con 0xc71050 > 12:35:33.374404 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.1 > 192.0.2.10:6800/2306 54 ==== watch-notify(c=18 v=1 i=730144440905 opcode=1) > v1 ==== 676+0+0 (2005030999 0 0) 0xd52610 con 0xc71050 > 12:35:33.374427 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.1 > 192.0.2.10:6800/2306 55 ==== osd_op_reply(233 notify.4 [notify-ack > 730144440905~1] v0'0 uv1 ondisk = 0) v6 ==== 167+0+0 (4115851956 0 0) > 0xd5a090 con 0xc71050 > 12:35:33.374474 7ff64fa52700 1 -- 192.0.2.11:0/1015476 --> > 192.0.2.10:6800/2306 -- osd_op(client.293418.0:234 notify.4 [notify-ack > 730144440905~1] 14.a204812d e170) v4 -- ?+0 0xd52da0 con 0xc71050 > 12:35:33.374679 7ff634416700 1 -- 192.0.2.11:0/1015476 --> > 192.0.2.9:6800/2222 -- osd_op(client.293418.0:235 dev_foo_bucket [create > 0~0,call version.set,writefull 0~134] 15.2f6b990 e170) v4 -- ?+0 0xd3f960 con > 0xc5eb60 > 12:35:33.375313 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.1 > 192.0.2.10:6800/2306 56 ==== osd_op_reply(234 notify.4 [notify-ack > 730144440905~1] v0'0 uv1 ondisk = 0) v6 ==== 167+0+0 (4115851956 0 0) > 0xd52360 con 0xc71050 > 12:35:33.375461 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.0 > 192.0.2.9:6800/2222 94 ==== osd_op_reply(235 dev_foo_bucket [create > 0~0,call,writefull 0~134] v0'0 uv0 ondisk = -17 (File exists)) v6 ==== > 263+0+0 (3681567401 0 0) 0xd52360 con 0xc5eb60 > 12:35:33.375569 7ff634416700 1 -- 192.0.2.11:0/1015476 --> > 192.0.2.9:6800/2222 -- osd_op(client.293418.0:236 dev_foo_bucket [call > version.read,getxattrs,stat] 15.2f6b990 e170) v4 -- ?+0 0xd463a0 con 0xc5eb60 > 12:35:33.376507 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.0 > 192.0.2.9:6800/2222 95 ==== osd_op_reply(236 dev_foo_bucket > [call,getxattrs,stat] v0'0 uv2 ondisk = 0) v6 ==== 263+0+139 (3834169773 0 > 732307286) 0xd463a0 con 0xc5eb60 > 12:35:33.376650 7ff634416700 1 -- 192.0.2.11:0/1015476 --> > 192.0.2.9:6800/2222 -- osd_op(client.293418.0:237 dev_foo_bucket [call > version.check_conds,call version.read,read 0~524288] 15.2f6b990 e170) v4 -- > ?+0 0xd52360 con 0xc5eb60 > 12:35:33.377474 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.0 > 192.0.2.9:6800/2222 96 ==== osd_op_reply(237 dev_foo_bucket [call,call,read > 0~524288] v0'0 uv2 ondisk = 0) v6 ==== 263+0+182 (659711759 0 978969982) > 0xd463a0 con 0xc5eb60 > 12:35:33.377617 7ff634416700 1 -- 192.0.2.11:0/1015476 --> > 192.0.2.9:6800/2222 -- osd_op(client.293418.0:238 > .bucket.meta.dev_foo_bucket:default.4878.161 [call version.check_conds,call > version.read,getxattrs,stat] 15.8914af77 e170) v4 -- ?+0 0xd46a60 con 0xc5eb60 > 12:35:33.378510 7ff652358700 1 -- 192.0.2.11:0/1015476 <== osd.0 > 192.0.2.9:6800/2222 97 ==== osd_op_reply(238 > .bucket.meta.dev_foo_bucket:default.4878.161 [call,call,getxattrs,stat] v0'0 > uv0 ondisk = -125 (Operation canceled)) v6 ==== 335+0+0 (1193355596 0 0) > 0xd46a60 con 0xc5eb60 > 12:35:33.378687 7ff634416700 0 get_bucket_info returned -125 > 12:35:33.378710 7ff634416700 0 WARNING: set_req_state_err err_no=125 > resorting to 500 > 12:35:33.378986 7ff634416700 1 ====== req done req=0xd15c20 http_status=500 > ====== The three OSDs are 192.0.2.9, 192.0.2.10 and 192.0.2.11, the radosgw I poked in this iteration is 192.0.2.11. The only relevant thing I could find in the archives was [1], but we have cache enabled (and disabling it did not change a thing). Greets Evgeni [1] http://thread.gmane.org/gmane.comp.file-systems.ceph.devel/17951 _______________________________________________ ceph-users mailing list ceph-users@lists.ceph.com http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com