Dear Mark, Our Linux server uses adaptec ASR-2010S and we are using dpt_i2o driver. To raise the reliability of the server, we reviewd the dpt_i2o driver. # The version of driver is 2.4 Build 5 in linux-2.4.28.
We found some problems and questions.
Please see the attached the dpt_i2o_problem_document.txt about the problems.
And also I made a patch agains the driver(dpt_i2o.diff).
I'm not participated in the linux-scsi mailing list.
Please reply to the following addresses.
E-Mail : [EMAIL PROTECTED]
Best Regards
Hiroomi Sakurai
Please see the dpt_i2o.diff about the correction of each problem.
Problem
1. Problem in adpt_detect() function
When adpt_install_hba() returns error status, adpt_detect() returns
hba_count-1.
But it's necessary to call adpt_i2o_sys_shutdown() and return 0.
adpt_detect() : 181 - 191
----------------------------------------------------------------------------
/* search for all Adatpec I2O RAID cards */
while ((pDev = pci_find_device( PCI_DPT_VENDOR_ID, PCI_ANY_ID, pDev))) {
if(pDev->device == PCI_DPT_DEVICE_ID ||
pDev->device == PCI_DPT_RAPTOR_DEVICE_ID){
if(adpt_install_hba(sht, pDev) ){
PERROR("Could not Init an I2O RAID device\n");
PERROR("Will not try to detect others.\n");
return hba_count-1;
}
}
}
----------------------------------------------------------------------------
2. Problem in adpt_detect() and adpt_inquiry() function
adpt_inquiry() doesn't return any value even if it detects any errors.
Please add error status as a return value.
When caller receives error from adpt_inquiry(), caller should check the
return value.
For example, when adpt_detect() receives error status, it should check
the value.
adpt_detect() : 238 - 242
----------------------------------------------------------------------------
if (adpt_i2o_parse_lct(pHba) < 0){
adpt_i2o_delete_hba(pHba);
continue;
}
adpt_inquiry(pHba);
----------------------------------------------------------------------------
adpt_inquiry() : 277 - 355
----------------------------------------------------------------------------
static void adpt_inquiry(adpt_hba* pHba)
{
(omission)
}
----------------------------------------------------------------------------
3. Problem in adpt_proc_info() function
adpt_proc_info() doesn't check 'd->pScsi_dev' is null or not.
It cause panic when 'd->pScsi_dev' is null.
It is necessary to check it.
adpt_proc_info() : 577 - 583
----------------------------------------------------------------------------
for(chan = 0; chan < MAX_CHANNEL; chan++) {
for(id = 0; id < MAX_ID; id++) {
d = pHba->channel[chan].device[id];
while(d){
len += sprintf(buffer+len,"\t%-24.24s",
d->pScsi_dev->vendor);
len += sprintf(buffer+len," Rev: %-8.8s\n",
d->pScsi_dev->rev);
pos = begin + len;
----------------------------------------------------------------------------
4. Problem in adpt_install_hba() function
adpt_install_hba() find the emplty hbas[i] by 'for (i=0;i<DPTI_MAX_HBA;i++)'.
But even if there are no emply hbas[], it continues the operaiton.
This causes device name 'dpti16' for all devices exceeds 17 cards.
You should remove the limitation of card number(DPTI_MAX_HBA=16).
(At least you should add the error sequence to fail exceeds 17.)
adpt_install_hba() : 929 - 934
----------------------------------------------------------------------------
for(i=0;i<DPTI_MAX_HBA;i++) {
if(hbas[i]==NULL) {
hbas[i]=pHba;
break;
}
}
----------------------------------------------------------------------------
5. Problem in adpt_find_device() function
In adpt_find_device(), you should check 'id' is smaller than 'MAX_ID'.
adpt_find_device() : 1087 - 1088
----------------------------------------------------------------------------
if(chan < 0 || chan >= MAX_CHANNEL)
return NULL;
----------------------------------------------------------------------------
6. Problem in adpt_i2o_reparse_lct() function
In adpt_i2o_reparse_lct(), you check 'bus_no' is smaller than 'MAX_CHANNEL',
but you use 'bus_no' before checking.
You should move the sequence of checking before using.
Also you should check the check 'scsi_id' is smaller than 'MAX_ID'.
adpt_i2o_reparse_lct() : 2410 - 2442
----------------------------------------------------------------------------
bus_no = buf[0]>>16;
scsi_id = buf[1];
scsi_lun = (buf[2]>>8 )&0xff;
pDev = pHba->channel[bus_no].device[scsi_id];
(omission)
if(bus_no >= MAX_CHANNEL) { // Something wrong skip it
printk(KERN_WARNING"%s: Channel number %d out of range \n",
pHba->name, bus_no);
continue;
}
pDev = pHba->channel[bus_no].device[scsi_id];
----------------------------------------------------------------------------
7. Problem in adpt_i2o_reparse_lct() function
In adpt_i2o_reparse_lct(), you check 'pDev->pScsi_dev' is null or not,
but you use 'pDev->pScsi_dev' before checking.
You shoud move the sequence ot checking before using.
adpt_i2o_reparse_lct() : 2480 - 2486
----------------------------------------------------------------------------
if(pDev->pScsi_dev->online == FALSE) {
printk(KERN_WARNING"%s: Setting device (%d,%d,%d) back online\n",
pHba->name,bus_no,scsi_id,scsi_lun);
if (pDev->pScsi_dev) {
pDev->pScsi_dev->online = TRUE;
}
}
----------------------------------------------------------------------------
8. Problem in adpt_i2o_lct_get() function
adpt_i2o_lct_get() continues the operation even of adpt_i2o_query_scalar()
returns with error status.
It's necessary return -1 when error detects.
adpt_i2o_lct_get() : 2924 - 2936
----------------------------------------------------------------------------
// I2O_DPT_EXEC_IOP_BUFFERS_GROUP_NO;
if(adpt_i2o_query_scalar(pHba, 0 , 0x8000, -1, buf, sizeof(buf))>=0) {
pHba->FwDebugBufferSize = buf[1];
pHba->FwDebugBuffer_P = pHba->base_addr_virt + buf[0];
pHba->FwDebugFlags_P = pHba->FwDebugBuffer_P +
FW_DEBUG_FLAGS_OFFSET;
pHba->FwDebugBLEDvalue_P = pHba->FwDebugBuffer_P +
FW_DEBUG_BLED_OFFSET;
pHba->FwDebugBLEDflag_P = pHba->FwDebugBLEDvalue_P + 1;
pHba->FwDebugStrLength_P = pHba->FwDebugBuffer_P +
FW_DEBUG_STR_LENGTH_OFFSET;
pHba->FwDebugBuffer_P += buf[2];
pHba->FwDebugFlags = 0;
}
return 0;
----------------------------------------------------------------------------
Question
1. Question in adpt_queue() function
In adpt_queue(), you check 'cmd->device->hostdata' is null or not every
time. But this sequence seems redundant.
I think this operation can move in adpt_detect().
'pDev->pScsi_dev=cmd->device' aslo seems redundant.
adpt_queue() : 422 - 439
----------------------------------------------------------------------------
// TODO if the cmd->device if offline then I may need to issue a bus rescan
// followed by a get_lct to see if the device is there anymore
if((pDev = (struct adpt_device*) (cmd->device->hostdata)) == NULL) {
/*
* First command request for this device. Set up a pointer
* to the device structure. This should be a TEST_UNIT_READY
* command from scan_scsis_single.
*/
if ((pDev = adpt_find_device(pHba, (u32)cmd->channel,
(u32)cmd->target, (u32)cmd-> lun)) == NULL) {
// TODO: if any luns are at this bus, scsi id then fake a
TEST_UNIT_READY and INQUIRY response
// with type 7F (for all luns less than the max for this
bus,id) so the lun scan will continue.
cmd->result = (DID_NO_CONNECT << 16);
cmd->scsi_done(cmd);
return 0;
}
(struct adpt_device*)(cmd->device->hostdata) = pDev;
}
pDev->pScsi_dev = cmd->device;
----------------------------------------------------------------------------
2. Question in adpt_i2o_to_scsi() function
Why adpt_i2o_to_scsi() returns DID_TIMEOUT when sense code is
DATA_PROTECT and 'class 7'?
adpt_i2o_to_scsi() : 2306 - 2322
----------------------------------------------------------------------------
// copy over the request sense data if it was a check
// condition status
if(dev_status == 0x02 /*CHECK_CONDITION*/) {
u32 len = sizeof(cmd->sense_buffer);
len = (len > 40) ? 40 : len;
// Copy over the sense data
memcpy(cmd->sense_buffer, (void*)(reply+28) , len);
if(cmd->sense_buffer[0] == 0x70 /* class 7 */ &&
cmd->sense_buffer[2] == DATA_PROTECT ){
/* This is to handle an array failed */
cmd->result = (DID_TIME_OUT << 16);
printk(KERN_WARNING"%s: SCSI Data Protect-Device (%d,%d,%d)
hba_status=0x%x, dev_status=0x%x, cmd=0x%x\n",
pHba->name, (u32)cmd->channel, (u32)cmd->target,
(u32)cmd->lun,
hba_status, dev_status, cmd->cmnd[0]);
}
}
----------------------------------------------------------------------------
3. Question in adpt_i2o_build_sys_table() function
adpt_i2o_build_systable() continues the operation even if
adpt_i2o_status_get()
returns the error. This cause the diferrence between the number of actual
HBAs(hba_count) and
and the number of sys_tbl entry(sys_tbl->num_entries).
Does this cause some kind of problem?
adpt_i2o_build_sys_table() : 2961 - 2981
----------------------------------------------------------------------------
for(pHba = hba_chain; pHba; pHba = pHba->next) {
// Get updated Status Block so we have the latest information
if (adpt_i2o_status_get(pHba)) {
sys_tbl->num_entries--;
continue; // try next one
}
sys_tbl->iops[count].org_id = pHba->status_block->org_id;
sys_tbl->iops[count].iop_id = pHba->unit + 2;
sys_tbl->iops[count].seg_num = 0;
sys_tbl->iops[count].i2o_version = pHba->status_block->i2o_version;
sys_tbl->iops[count].iop_state = pHba->status_block->iop_state;
sys_tbl->iops[count].msg_type = pHba->status_block->msg_type;
sys_tbl->iops[count].frame_size =
pHba->status_block->inbound_frame_size;
sys_tbl->iops[count].last_changed = sys_tbl_ind - 1; // ??
sys_tbl->iops[count].iop_capabilities =
pHba->status_block->iop_capabilities;
sys_tbl->iops[count].inbound_low =
(u32)virt_to_bus((void*)pHba->post_port);
sys_tbl->iops[count].inbound_high =
(u32)((u64)virt_to_bus((void*)pHba->post_port)>>32);
count++;
}
----------------------------------------------------------------------------
dpt_i2o.diff
Description: Binary data

