Hello,

 

I am trying to run a  coxph model but get an error

 
Error in model.frame.default(formula = Surv(time, status) ~
selectedVarnames,  : 
  variable lengths differ (found for 'selectedVarnames')

 

Of note the dataset is generated as part of using the glmnet for Lasso
regularization.

Glmnet takes in as input a matrix where all categorical variables have been
converted to binary factors with dummies.

As a result I have had to do some data manipulations that are probably the
origin of the error.

Original dataframe -> transform categorical to binary -> transform to matrix
-> run glment -> get best features -> selected features + status + time ->
transform to dataframe -> run coxph

 

The coxph code (entire code below):

glmnet.cox <- coxph(Surv(time,status) ~ selectedVarnames,
data=reformat_dataSet, init=selectedBeta,iter=200)

 

 

Here is a description of the data (I can't attach the real thing as it is
protected health data):

 

selectedVar: 394X81 Double Matrix

reformat_dataSet: 394 obs. 83 varaibles (dataframe including 59 predicting
variables, time and status)

selectedBeta: numeric [81]

selectedVarnames:
"FABM0+FABM1+FABM2+FABM4+FABM4EOS+SEXF+SEXM+Age_at_Dx+Performance_Status0+Pe
rformance_Status2+Performance_Status3+Performance_Status4+AHD_typeabnormal
counts_MDS+AHD_typeALL_MDS+AHD_typecytopenia+AHD_typelow PLT+AHD_typelow
white
count+AHD_typeMPD+AHD_typepancytopenia+PRIOR_MALNO+PRIOR_MALYES+PRIOR_CHEMON
O+PRIOR_XRTNO+PRIOR_XRTYES+cyto_grpUNFAVORABLE+Cyto5+Cyto7+Cyto8+Cyto10+Cyto
11+Cyto12+ITDPOS+D835POS+ABS_BLST+BM_PROM+PB_MONO+PB_PROM+HGB+PLT+ALBUMIN+CR
EATININE+CD13+CD34+CD19+Fresh_or__CryoCryo+AKT1_2_3_pT308+ARC+ATF3+BECN1+BIR
C2+CAV1+CDKN2A+CTNNB1+FOXO3_S318_321+GAB2+GRP78+IGFBP2+INPP5D+JMJD6+JUN_pS73
+KIT+LYN+MAP2K1_2_pS217_221+NRP1+PIK3CA+PLAC1+PRKCA+PRKCD_pT507+PTEN_pS380T3
82T383+RAC1_2_3+RPS6KB1+SMAD1+SMAD2_pS245+SMAD5+SMAD5_pS463+STAT3+STMN1+TP53
+TRIM62+VASP+XPO1"

dataset: 394 obs. 270 variables

predict_matrix: 394X392 double matrix

 

 

THANK YOU!!!

 

 

 

The entire code (without the data though):

library("survival")

library("pec")

library ("glmnet")

library ("peperr")

library ("Hmisc")

 

cIndexCoxglmnet <- list()

for (i in 1:50){

  train <- sample(1:nrow(dataset), nrow(dataset), replace = TRUE) ## random
sampling with replacement

  trainSet <-dataset [train,] 

  testSet<-dataset [-train,]

  cat ("\n","ITERATION:",i,"\n")

  

  #creat Y (survival matrix) for glmnet

  surv_obj <- Surv(trainSet$time,trainSet$status) 

  

  

  ## tranform categorical variables into binary variables with dummy for
trainSet

  predict_matrix <- model.matrix(~ ., data=trainSet, 

                                 contrasts.arg = lapply
(trainSet[,sapply(trainSet, is.factor)], contrasts, contrasts=FALSE))

  

  ## remove the statu/time variables from the predictor matrix (x) for
glmnet

  predict_matrix <- subset (predict_matrix, select=c(-time,-status))

  

  ## create a glmnet cox object using lasso regularization and cross
validation

  glmnet.cv <- cv.glmnet (predict_matrix, surv_obj, family="cox")

  

  ## get the glmnet model on the full dataset

  glmnet.obj <- glmnet.cv$glmnet.fit

  

  # find lambda index for the models with least partial likelihood deviance
(by cv.glmnet) 

  optimal.lambda <- glmnet.cv$lambda.min    # For a more parsimoneous model
use lambda.1se     

  lambda.index <- which(glmnet.obj$lambda==optimal.lambda) 

  

  

  # take beta for optimal lambda 

  optimal.beta  <- glmnet.obj$beta[,lambda.index] 

  

  # find non zero beta coef 

  nonzero.coef <- abs(optimal.beta)>0 

  selectedBeta <- optimal.beta[nonzero.coef] 

  

  # take only covariates for which beta is not zero 

  selectedVar   <- predict_matrix[,nonzero.coef] 

  

  # create a dataframe for trainSet with time, status and selected variables
in binary representation for evaluation in pec

  reformat_dataSet <- as.data.frame(cbind(surv_obj,selectedVar))

  

  # names of selectedVars

  selectedVarnames<-paste(colnames(selectedVar),collapse="+")

  

  # create coxph object with pre-defined coefficients 

  glmnet.cox <- coxph(Surv(time,status) ~ selectedVarnames,
data=reformat_dataSet, init=selectedBeta,iter=200)

  

  

  ## create datasets based on the testSet fit for glmnet models for testing
in pec function

  ## !!!encountered tech problems in variables selection for unknown reason
so the code is somewhat redundant !!!

  

  #creat Y (survival matrix) for glmnet

  surv_obj_test <- Surv(testSet$time,testSet$status)

  

  ## tranform categorical variables into binary variables with dummy for
testSet

  predict_matrix_test <- model.matrix(~ ., data=testSet, 

                                      contrasts.arg = lapply
(testSet[,sapply(testSet, is.factor)], contrasts, contrasts=FALSE))

  

  ## remove the statu/time variables from the predictor matrix (x) for
glmnet

  predict_matrix_test <- subset (predict_matrix_test,
select=c(-time,-status))

  

  ## remove the statu/time variables from the predictor matrix (x) for
glmnet

  selectedVar_test <- predict_matrix_test [,nonzero.coef]

  

  # create a dataframe for trainSet with time, status and selected variables
in binary representation for evaluation in pec

  reformat_testSet <- as.data.frame(cbind(surv_obj_test,selectedVar_test))

  refrmat_matrix <- cbind(surv_obj_test,selectedVar_test)

  

  ## compute c-index (Harrell's) for cox-glmnet models

  if (is.null(glmnet.cox)){

    cIndexCoxglmnet <- c(cIndexCoxglmnet,NULL)

  }else{

    cIndexCoxglmnet <- c(cIndexCoxglmnet, rcorr.cens(predict(glmnet.cox,
reformat_testSet),Surv(reformat_testSet$time,reformat_testSet$status))[1])

  }

  

}  

  


        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to