This is the code i created using the R software and the packages "caret" and "e0171".
The script was supposed to work in all general cases.
# Categorical vector
spam <- c("spam","not_spam")
spam_vec <- sample(spam,60, replace = T)
# Supposing two independent variables so that the kappa will be close to 0.
x1 <- rnorm(60)
x2 <- rnorm(60)
# Creating the dataset
data1 <- cbind(spam_vec,x1,x2)
data1 <-
names(data1) <- make.names(c("spamvec","x","y"))
# Creating the partition
index <- createDataPartition(data1$spamvec,
p=0.8, list=FALSE)
training_data <- data1[index,]
testing_data <- data1[-index,]
fitControl <- trainControl(method = "cv",
number = 5,
savePred = TRUE,
classProb = TRUE)
tune.grid <- expand.grid(C = seq(0, 10, .1))
# Scaling the predictors
preProcess_cs <- preProcess(training_data[, -1],
method = c("center", "scale"))
spam_training_cs <- predict(preProcess_cs, training_data)
spam_testing_cs <- predict(preProcess_cs, testing_data)
# Training a Naive Bayes to predict binary outcome
Naive_Bayes_Model=naiveBayes(spamvec ~.,
tuneGrid = tune.grid,
trControl = fitControl)
# Confusion matrix
prediction <- predict(Naive_Bayes_Model, spam_testing_cs)
confusionMatrix(prediction, spam_testing_cs$spamvec, positive = "spam")
confM <- confusionMatrix(prediction, spam_testing_cs$spamvec, positive = "spam")
accuracy <- confM$overall[1]