Hi,
I am trying to run a random forest analysis through Rstudio and am getting the following error. Any idea what would be causing this error?
Thanks
Error
randfor <- randomForest(as.factor(response) ~., data=trainvals, importance=TRUE)
Error in model.frame.default(formula = as.factor(response) ~ ., data = trainvals, :
variable lengths differ (found for 'Class_mnr')
### Run Random Forest
#Load libraries, you may need to install these first
library("maptools")
library("sp")
library("randomForest")
library("raster")
library("rgdal")
#
cat("Set variables and start processing\n")
#
############################# SET VARIABLES HERE ###################################
# Read shapefile into R
# Set Parameters:
vec <- readOGR(shapefile) #<===============
# Create a table from attName" field of vec Shapefile
tabAtt <- table(vec[[attName]])
# View number of unique classes in tabAtt object
uniqueAtt <- names(tabAtt) # View uniqueAtt object
uniqueAtt # Will be used later in script
# Create imput data for model from vec shapefile
for (x in 1:length(uniqueAtt)) {
class_data <- vec[vec[[attName]]==uniqueAtt[x], ]
areas <- sapply(slot(class_data, "polygons"), slot, "area")
nsamps <- ceiling(numsamps*(areas/sum(areas)))
for (i in 1:dim(class_data)[1]) {
xy_class <- spsample(class_data[i, ], type="random", n=nsamps[i])
if (i == 1) cpts <- xy_class
else cpts <- rbind(cpts, xy_class)
}
classpts <- cpts
if (x == 1) {
xy_allClasses <- classpts
} else {
xy_allClasses <- rbind(xy_allClasses, classpts)
}
}
# Get data from polygon shapefile for each random sample point
temp <- over(xy_allClasses,vec)
# Create the response variable for randomForest model
response <- temp[attName]
write.csv(uniqueAtt, file = "G:\\fw\\Programs\\Elc\\Projects_Ongoing\\EcosystemMapping\\BeaverRiver_PEM\\R\\Ecosite_UniqueAtt.csv") #save your list of predicted ecosite types for later
##########################################
#sets directory
setwd(zz)
getwd()
list.files(pattern='\\.tif$')
#raster Information
##GDALinfo("BB3.tif")
##GDALinfo("BB44.tif")
#Creates Variables
VDCH <- raster("VDCH.tif")
TWI <- raster("TWI.tif")
B11 <- raster("B11.tif")
EVI <- raster("ENhance_VI.tif")
DEM <- raster("GEN_DEM.tif")
NDVI <- raster("NDVI.tif")
PFST <- raster("PFST.tif")
SLP <- raster("slope.tif")
# Plots Rasters
plot(VDCH, main="VDCH")
plot(B4, main="B4")
#Creates Raster Stack and renames## could update to rename from list
stk <- stack(VDCH,TWI,B11,EVI,DEM,NDVI,PFST,SLP)
names(stk) <- c('VDCH','TWI','B11','EVI','DEM','NDVI','PFST','SLOPE')
names(stk)
#Saves stack as S
s <- stackSave(stk, "mystack")
#shows content of stack
s
names(s)
##############################################
##### Load Previous Saved Raster Stack Into R ######
setwd("G:\\fw\\Programs\\Elc\\Projects_Ongoing\\EcosystemMapping\\BeaverRiver_PEM\\R\\raster")
getwd()
stk.save <- stackOpen("mystack") #<=================
names(stk.save)
# Note! need to add names to raster stack to preserve layer information
# Import List of Raster Names (Saved when stack was previously SubSet)
# Import list
saved.names <- read.table("InputRasterNames.txt") #<================
#Change Data Frame to a List and Unlist for vector of Correct Length
stack.names <- unlist(rapply(saved.names, as.character, classess="factor", how="replace"))
# Add names to saved raster sack
names(stk.save) <- c(stack.names)
# Change name of re-loaded raster stack to x.var for analysis below
########x.var <- stk.save
satImage <- stk.save
###### Create RF Training Data #######
# Create training data for randomForest model
# Gets the pixel value for sample plots for each raster in x.var object
startTime <- Sys.time()
trainvals <- cbind(response, extract(satImage, xy_allClasses))
timeDiff <- Sys.time() - startTime # Calculate time
timeDiff # Print time difference
head(trainvals, 15) #check results
class(trainvals) #check results
# Run Random Forest
cat("Calculating random forest object\n")
randfor <- randomForest(as.factor(response) ~., data=trainvals, importance=TRUE)