Hi, One problem you have is with the command: regr<-randomForest(y~x1+x2, data=X_train, proximity=TRUE)
What you need is something like this: X2 <- cbind(X,y) regr<-randomForest(y~x1+x2, data=X2, proximity=TRUE) HTH, Eric On Mon, May 8, 2023 at 11:11 PM varin sacha via R-help <r-help@r-project.org> wrote: > > Dear R-experts, > > Here below a toy example with some error messages, especially at the end of > the code (Tuning the parameters). Your help to correct my R code would be > highly appreciated. > > > ####################################### > #libraries > library(lattice) > library(ggplot2) > library(caret) > library(randomForest) > > #Data > y=c(23,34,32,12,24,35,45,56,76,87,54,34,23,45,41,13,16,98,35,65,56,67,78,89,87,64,53,31,14,34,45,46,57,69,90,80,70,65,50,45,60,56,87,79,64,34,25,47,61,24,10,13,12,15,46,58,76,89,90,98) > x1=c(4,5,6,7,1,10,19,20,21,14,23,6,5,32,15,12,16,14,2,3,4,5,3,2,1,2,6,7,5,4,3,2,1,3,4,6,7,9,5,4,3,7,10,11,12,13,10,3,2,5,6,9,8,7,4,12,15,16,2,3) > x2=c(0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1) > > y=as.numeric(y) > x1=as.numeric(x1) > x2=as.factor(x2) > > X=data.frame(x1,x2) > y=y > > #Split data into training and test sets > index=createDataPartition(y, p=0.75, list=FALSE) > X_train = X[index, ] > X_test = X[-index, ] > y_train= y[index ] > y_test = y[-index ] > > #Train de model > regr=randomForest (x=X_train, y=y_train, maxnodes=10, ntree=10) > > regr<-randomForest(y~x1+x2, data=X_train, proximity=TRUE) > regr > > #Make prediction > predictions= predict(regr, X_test) > > result= X_test > result['y'] = y_test > result['prediction'] = predictions > result > > # Import library for Metrics > library(Metrics) > > print(paste0('MAE: ' , mae(y_test,predictions) )) > print(paste0('MSE: ' ,caret::postResample(predictions , y_test)['RMSE']^2 )) > print(paste0('R2: ' ,caret::postResample(predictions , y_test)['Rsquared'] )) > > > #Tuning the parameters > N=500 #length(X_train) > X_train_ = X_train[1:N , ] > y_train_ = y_train[1:N] > > seed <-7 > metric<-'RMSE' > > customRF <- list(type = "Regression", library = "randomForest", loop = NULL) > > customRF$parameters <- data.frame(parameter = c("maxnodes", "ntree"), class = > rep("numeric", 2), label = c("maxnodes", "ntree")) > > customRF$grid <- function(x, y, len = NULL, search = "grid") {} > > customRF$fit <- function(x, y, wts, param, lev, last, weights, classProbs, > ...) { > > randomForest(x, y, maxnodes = param$maxnodes, ntree=param$ntree, ...) > > } > > customRF$predict <- function(modelFit, newdata, preProc = NULL, submodels = > NULL) > > predict(modelFit, newdata) > > customRF$prob <- function(modelFit, newdata, preProc = NULL, submodels = NULL) > > predict(modelFit, newdata, type = "prob") > > customRF$sort <- function(x) x[order(x[,1]),] > > customRF$levels <- function(x) x$classes > > > # Set grid search parameters > control <- trainControl(method="repeatedcv", number=10, repeats=3, > search='grid') > > # Outline the grid of parameters > tunegrid <- expand.grid(.maxnodes=c(10,20,30,50), .ntree=c(100, 200, 300)) > set.seed(seed) > > # Train the model > rf_gridsearch <- train(x=X_train_, y=y_train_, method=customRF, > metric=metric, tuneGrid=tunegrid, trControl=control) > > plot(rf_gridsearch) > > rf_gridsearch$bestTune > > ################################################# > > ______________________________________________ > R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.