Thanks Max, I have done a few tests with what you suggest and I have listed the 
output below. I wait a few minutes before deciding it's failed, and there is 
never any console output about anything failing, and nothing in any log files 
I've looked in: /var/adm/messages or /var/log/syslog. Maybe if I left it 2 
hours I might see a message somewhere, but who knows?

This is a nasty problem, as:
1. it appears to be failing on different files, although I think I'm seeing a 
common pattern where it fails on the third file often.
2. it copied all the files successfullly once, see log below for run #2, but 
then I do run #3 immediately afterwards and it fails again, so I list debug 
output for run #3
3. I cannot kill the hanging cp command and then my whole zfs filesystem is 
locked up meaning I have to reboot. Even doing an 'ls' command hangs often due 
to the hanging 'cp' command.
4. I cannot use 'shutdown -y -g 0 -i 5' to shutdown the machine, as it seems to 
be blocked by the hanging cp command
5. The way to shutdown the machine is to hit the reset button, and I don't like 
doing this when there are, theoretically, write operations occurring, or at 
least pending.

Anyway here is the long output. Perhaps if people reply they can avoid keeping 
this text as part of their reply or we'll be lost in a sea of dump output :)


========================================

output: run #1 (after reboot)

bash-3.2$ truss -topen cp -r testdir z
open("/var/ld/ld.config", O_RDONLY)             Err#2 ENOENT
open("/lib/libc.so.1", O_RDONLY)                = 3
open("/usr/lib/locale/en_GB.UTF-8/en_GB.UTF-8.so.3", O_RDONLY) = 3
open("/usr/lib/locale/common/methods_unicode.so.3", O_RDONLY) = 3
open("/lib/libsec.so.1", O_RDONLY)              = 3
open("/lib/libcmdutils.so.1", O_RDONLY)         = 3
open("/lib/libavl.so.1", O_RDONLY)              = 3
open64("testdir/f06", O_RDONLY)                 = 4
open64("testdir/f15", O_RDONLY)                 = 4
open64("testdir/f12", O_RDONLY)                 = 4



# mdb -k
Loading modules: [ unix genunix specfs dtrace cpu.generic 
cpu_ms.AuthenticAMD.15 uppc pcplusmp scsi_vhci ufs ip hook neti sctp arp usba 
s1394 nca lofs zfs random md sppp smbsrv nfs ptm crypto ipc ]
> ::pgrep cp
S    PID   PPID   PGID    SID    UID      FLAGS             ADDR NAME
R    910    909    909    869    501 0x4a004000 ffffff01d96e6e30 cp
> ffffff01d96e6e30::walk thread | ::threadlist -v
            ADDR             PROC              LWP CLS PRI            WCHAN
ffffff01d4371b60 ffffff01d96e6e30 ffffff01d9b28930   2  60 ffffff01f4aa52c0
  PC: _resume_from_idle+0xf1    CMD: cp -r testdir z
  stack pointer for thread ffffff01d4371b60: ffffff000949f260
  [ ffffff000949f260 _resume_from_idle+0xf1() ]
    swtch+0x17f()
    cv_wait+0x61()
    zio_wait+0x5f()
    dmu_buf_hold_array_by_dnode+0x214()
    dmu_read+0xd4()
    zfs_fillpage+0x15e()
    zfs_getpage+0x187()
    fop_getpage+0x9f()
    segvn_fault+0x9ef()
    as_fault+0x5ae()
    pagefault+0x95()
    trap+0x1286()
    0xfffffffffb8001d9()
    fuword8+0x21()
    zfs_write+0x147()
    fop_write+0x69()
    write+0x2af()
    write32+0x1e()
    sys_syscall32+0x101()             
                                      
> 


bash-3.2$ iostat -xce 1
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0    33.5    5.7  431.0   32.6  0.4  0.2   14.5   6  12   0   0   0   0   4 
 2  0 94
sd0       1.3    4.3  111.1   45.0  0.1  0.0   17.3   0   1   0   0   0   0 
sd1       2.0    4.4  210.0   45.1  0.1  0.0   14.5   0   1   0   0   0   0 
sd2       1.3    4.3  111.1   45.2  0.1  0.0   17.4   0   1   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   1 
48  0 51
sd0     325.6   56.3 36340.5  146.7 26.2  0.9   71.0  92  92   0   0   0   0 
sd1     518.6   49.2 65734.5  117.1 25.9  0.9   47.0  86  85   0   0   0   0 
sd2     327.6   57.3 36983.7  144.7 27.3  0.9   73.4  94  93   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   0 
43  0 57
sd0     301.1    1.0 33550.0    0.0 23.6  0.8   80.8  84  84   0   0   0   0 
sd1     556.2    1.0 69661.1    0.0 26.1  0.8   48.3  92  83   0   0   0   0 
sd2     300.1    5.0 33229.9    4.0 23.8  0.8   80.7  84  84   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   0 
54  0 46
sd0     343.1    0.0 39052.6    0.0 32.2  1.0   96.9 100  99   0   0   0   0 
sd1     633.2    0.0 80986.1    0.0 33.1  1.0   53.8 100  99   0   0   0   0 
sd2     366.1    0.0 41677.4    0.0 31.7  1.0   89.4 100  99   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   1 
48  0 50
sd0     275.0    0.0 30590.9    0.0 14.7  0.7   56.2  73  73   0   0   0   0 
sd1     459.0    0.0 57149.9    0.0 18.4  0.7   41.6  67  66   0   0   0   0 
sd2     262.0    0.0 28031.0    0.0 10.1  0.6   40.9  63  63   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   1 
87  0 12
sd0     151.0  388.0 18557.6 49593.7 24.4  1.0   47.1  96  96   0   0   0   0 
sd1     105.0  373.0 11390.5 47737.9 24.1  1.0   52.5  95  96   0   0   0   0 
sd2     101.0  391.0 11262.6 49913.6 25.1  0.9   52.9  96  94   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   1 
18  0 81
sd0       0.0  556.1    0.0 71183.9 33.9  1.0   62.8 100  99   0   0   0   0 
sd1       0.0  565.1    0.0 72336.2 33.9  1.0   61.8 100  99   0   0   0   0 
sd2       0.0  534.1    0.0 68367.3 33.9  1.0   65.3 100  99   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   0 
15  0 84
sd0       0.0  547.0    0.0 70016.1 33.9  1.0   63.8 100  99   0   0   0   0 
sd1       0.0  563.0    0.0 72064.1 33.9  1.0   62.0 100  99   0   0   0   0 
sd2       0.0  543.0    0.0 69504.0 33.9  1.0   64.3 100  99   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   1 
15  0 85
sd0       0.0  564.0    0.0 72191.7 33.9  1.0   61.9 100  99   0   0   0   0 
sd1       0.0  547.0    0.0 70015.7 33.9  1.0   63.8 100  99   0   0   0   0 
sd2       0.0  560.0    0.0 71679.7 33.9  1.0   62.3 100  99   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   1 
15  0 84
sd0       0.0  554.0    0.0 70912.1 33.9  1.0   63.0 100  99   0   0   0   0 
sd1       0.0  591.0    0.0 75648.1 33.9  1.0   59.1 100  99   0   0   0   0 
sd2       0.0  574.0    0.0 73472.1 33.9  1.0   60.8 100  99   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   1 
40  0 59
sd0     415.0  153.0 53116.4 18320.2 33.9  1.0   61.5 100  99   0   0   0   0 
sd1     258.0  123.0 29054.0 14480.0 33.9  1.0   91.6 100  99   0   0   0   0 
sd2     247.0  161.0 28094.1 19343.2 34.0  1.0   85.7 100  99   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   1 
46  0 52
sd0     529.1   44.0 67654.7  177.5 30.6  0.9   54.9  93  92   0   0   0   0 
sd1     258.0   42.0 29186.9  110.5 25.7  0.8   88.3  86  82   0   0   0   0 
sd2     266.0   44.0 30275.0  113.5 28.5  0.9   94.9  95  89   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   1 
48  0 50
sd0     253.8   77.9 28645.9 8216.5 26.1  0.9   81.4  96  90   0   0   0   0 
sd1     478.6   77.9 60041.3 8210.5 27.7  0.9   51.4  93  90   0   0   0   0 
sd2     274.7   78.9 30692.1 8207.5 26.2  0.9   76.5  93  90   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   1 
52  0 48
sd0     310.2   29.0 35036.3  100.6 25.4  0.9   77.6  96  90   0   0   0   0 
sd1     533.4   38.0 67507.7  104.1 23.9  0.9   43.3  91  87   0   0   0   0 
sd2     295.2   34.0 33175.9  110.1 22.8  0.9   72.0  90  86   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   0 
40  0 60
sd0     336.0    4.0 38020.1    4.0 32.1  1.0   97.3 100  99   0   0   0   0 
sd1     574.1    0.0 72391.8    0.0 32.9  1.0   59.0 100  99   0   0   0   0 
sd2     423.0    4.0 49925.4    4.0 29.8  1.0   72.1 100  99   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    1.0    0.0    1.5  0.0  0.0    0.2   0   0   0   0   0   0   1 
40  0 60
sd0     318.0    0.0 36225.4    0.0 33.9  1.0  109.9 100  99   0   0   0   0 
sd1     330.0    0.0 37569.5    0.0 33.9  1.0  105.8 100  99   0   0   0   0 
sd2     584.0    0.0 74755.0    0.0 33.9  1.0   59.7 100  99   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    1.0    0.0    8.0  0.0  0.0    2.0   0   0   0   0   0   0   1 
40  0 59
sd0      92.0   76.0 10115.5 9600.5 22.1  0.4  134.2  81  41   0   0   0   0 
sd1      87.0   92.0 9664.5 11140.5 20.0  0.4  114.1  76  42   0   0   0   0 
sd2     142.0   88.0 17731.9 11136.5 20.6  0.4   91.3  74  37   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     1.0    3.0    4.0  124.0  0.0  0.0    7.1   0   1   0   0   0   0   1 
55  0 44
sd0       0.0    0.0    0.0    0.0 35.0  0.0    0.0 100   0   0   0   0   0 
sd1       0.0    0.0    0.0    0.0 35.0  0.0    0.0 100   0   0   0   0   0 
sd2       0.0    0.0    0.0    0.0 35.0  0.0    0.0 100   0   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   1 
 1  0 99
sd0       0.0    0.0    0.0    0.0 35.0  0.0    0.0 100   0   0   0   0   0 
sd1       0.0    0.0    0.0    0.0 35.0  0.0    0.0 100   0   0   0   0   0 
sd2       0.0    0.0    0.0    0.0 35.0  0.0    0.0 100   0   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 


==================================================================

run #2 & #3:  (after reboot, leaving machine idle for 1 hour, then repeating 
this test)

bash-3.2$ mkdir z2
bash-3.2$ truss -topen cp -r testdir z2
open("/var/ld/ld.config", O_RDONLY)             Err#2 ENOENT
open("/lib/libc.so.1", O_RDONLY)                = 3
open("/usr/lib/locale/en_GB.UTF-8/en_GB.UTF-8.so.3", O_RDONLY) = 3
open("/usr/lib/locale/common/methods_unicode.so.3", O_RDONLY) = 3
open("/lib/libsec.so.1", O_RDONLY)              = 3
open("/lib/libcmdutils.so.1", O_RDONLY)         = 3
open("/lib/libavl.so.1", O_RDONLY)              = 3
open64("testdir/f06", O_RDONLY)                 = 4
open64("testdir/f15", O_RDONLY)                 = 4
open64("testdir/f12", O_RDONLY)                 = 4
open64("testdir/f01", O_RDONLY)                 = 4
open64("testdir/f05", O_RDONLY)                 = 4
open64("testdir/f16", O_RDONLY)                 = 4
open64("testdir/f11", O_RDONLY)                 = 4
open64("testdir/f02", O_RDONLY)                 = 4
open64("testdir/f08", O_RDONLY)                 = 4
open64("testdir/f13", O_RDONLY)                 = 4
open64("testdir/f14", O_RDONLY)                 = 4
open64("testdir/f07", O_RDONLY)                 = 4
open64("testdir/f09", O_RDONLY)                 = 4
open64("testdir/f03", O_RDONLY)                 = 4
open64("testdir/f10", O_RDONLY)                 = 4
open64("testdir/f04", O_RDONLY)                 = 4

bash-3.2$ mkdir z3
bash-3.2$ truss -topen cp -r testdir z3
open("/var/ld/ld.config", O_RDONLY)             Err#2 ENOENT
open("/lib/libc.so.1", O_RDONLY)                = 3
open("/usr/lib/locale/en_GB.UTF-8/en_GB.UTF-8.so.3", O_RDONLY) = 3
open("/usr/lib/locale/common/methods_unicode.so.3", O_RDONLY) = 3
open("/lib/libsec.so.1", O_RDONLY)              = 3
open("/lib/libcmdutils.so.1", O_RDONLY)         = 3
open("/lib/libavl.so.1", O_RDONLY)              = 3
open64("testdir/f06", O_RDONLY)                 = 4
open64("testdir/f15", O_RDONLY)                 = 4
open64("testdir/f12", O_RDONLY)                 = 4
open64("testdir/f01", O_RDONLY)                 = 4


# mdb -k
Loading modules: [ unix genunix specfs dtrace cpu.generic 
cpu_ms.AuthenticAMD.15 uppc pcplusmp scsi_vhci ufs ip hook neti sctp arp usba 
s1394 fctl nca lofs zfs random md sppp smbsrv nfs ptm ipc crypto cpc fcp fcip 
logindmux nsctl sdbc sv ii nsmb rdc ]
> ::pgrep cp
S    PID   PPID   PGID    SID    UID      FLAGS             ADDR NAME
R    927    926    926    918    501 0x4a004000 ffffff01dea2ecb0 cp
> ffffff01dea2ecb0::walk thread | ::threadlist -v
            ADDR             PROC              LWP CLS PRI            WCHAN
ffffff01dee52ac0 ffffff01dea2ecb0 ffffff01df063300   2  59 ffffff01d413ed16
  PC: _resume_from_idle+0xf1    CMD: cp -r testdir z3
  stack pointer for thread ffffff01dee52ac0: ffffff000874bb60
  [ ffffff000874bb60 _resume_from_idle+0xf1() ]
    swtch+0x17f()
    cv_wait+0x61()
    txg_wait_open+0x7f()
    dmu_tx_wait+0xb6()
    zfs_write+0x360()
    fop_write+0x69()
    write+0x2af()
    write32+0x1e()
    sys_syscall32+0x101()

> 

# iostat -xce 1
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0    14.5    1.2  132.7   10.0  0.3  0.1   19.8   2   4   0   0   0   0   1 
 2  0 97
sd0       8.1   12.5  938.4 1514.3  3.5  0.0  169.5  10   4   0   0   0   0 
sd1      11.6   12.5 1444.1 1514.7  3.4  0.0  144.6  10   4   0   0   0   0 
sd2       7.9   12.5  895.4 1514.4  3.4  0.0  170.0  10   4   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     1.0    0.0    4.2    0.0  0.0  0.0    6.2   0   1   0   0   0   0   1 
 1  0 98
sd0       0.0    0.0    0.0    0.0 35.0  0.0    0.0 100   0   0   0   0   0 
sd1       0.0    0.0    0.0    0.0 35.0  0.0    0.0 100   0   0   0   0   0 
sd2       0.0    0.0    0.0    0.0 35.0  0.0    0.0 100   0   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
                         extended device statistics         ---- errors ---     
 cpu
device    r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b s/w h/w trn tot  us 
sy wt id
cmdk0     0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   0   0   0   0 
 1  0 99
sd0       0.0    0.0    0.0    0.0 35.0  0.0    0.0 100   0   0   0   0   0 
sd1       0.0    0.0    0.0    0.0 35.0  0.0    0.0 100   0   0   0   0   0 
sd2       0.0    0.0    0.0    0.0 35.0  0.0    0.0 100   0   0   0   0   0 
sd3       0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0   0   5   0   5 
^C# 

=============================================
 
 
This message posted from opensolaris.org
_______________________________________________
zfs-discuss mailing list
zfs-discuss@opensolaris.org
http://mail.opensolaris.org/mailman/listinfo/zfs-discuss

Reply via email to