48 lines
1.8 KiB
R
48 lines
1.8 KiB
R
library(kknn)
|
|
library(caret)
|
|
library(tidyverse)
|
|
|
|
credit_data <- read.table("C:/Users/mjpearl/Downloads/data 2.2/credit_card_data.txt", stringsAsFactors = FALSE, header = FALSE)
|
|
|
|
#Number of rows in credit card table data
|
|
rows = nrow(credit_data)
|
|
|
|
#Randomly selecting 1 5th of the rows indexes among 654 indexes
|
|
sample = sample(1:rows, size = round(rows/5), replace = FALSE)
|
|
|
|
#training dataset selected by excluding the 1/5th of the sample
|
|
training = credit_data[-sample,]
|
|
#testing will cover the remaining portion and include the 1/5th previously exlcuded in training
|
|
testing = credit_data[sample,]
|
|
|
|
#training of kknn method via leave-one-out crossvalidation, we want to find the optimal value of 'k'.
|
|
#The training function will determine the optimal hyperparameter values required for the best model performance
|
|
kknn_fit=train.kknn(formula = V11 ~ .,
|
|
data = training,
|
|
kmax = 100,
|
|
kernel = c("optimal","rectangular", "inv", "gaussian", "triangular"),
|
|
scale = TRUE)
|
|
kknn_fit
|
|
|
|
#Call:
|
|
# train.kknn(formula = V11 ~ ., data = d.train, kmax = 100, kernel = c("optimal", "rectangular", "inv", "gaussian", "triangular"), scale = TRUE)#
|
|
#
|
|
#Type of response variable: continuous
|
|
#minimal mean absolute error: 0.2006881
|
|
#Minimal mean squared error: 0.1062738
|
|
#Best kernel: inv
|
|
#Best k: 31
|
|
|
|
#Testing the model on test data
|
|
pred<-predict(kknn_fit, testing)
|
|
#This to ensure we're receiving a binary response for classifcation instead of a rounded result
|
|
pred_bin<-round(pred)
|
|
pred_accuracy<-table(pred_bin,testing$V11)
|
|
pred_accuracy
|
|
|
|
#KKNN accuracy for the prediction
|
|
sum(pred_bin==testing$V11)/length(testing$V11)
|
|
|
|
#[1] 0.8549618
|
|
|
|
#The most effective train/test split was 1/5. Multiple combinations were tried including 1/3, 1/4 and 1/2. |