Files
tunmnlu/task_2/others-answer/omsa-main/ISYE-6501-OAN/hw2/homework2-4.1.R
louiscklaw 9035c1312b update,
2025-02-01 02:09:32 +08:00

48 lines
1.8 KiB
R

library(kknn)
library(caret)
library(tidyverse)
credit_data <- read.table("C:/Users/mjpearl/Downloads/data 2.2/credit_card_data.txt", stringsAsFactors = FALSE, header = FALSE)
#Number of rows in credit card table data
rows = nrow(credit_data)
#Randomly selecting 1 5th of the rows indexes among 654 indexes
sample = sample(1:rows, size = round(rows/5), replace = FALSE)
#training dataset selected by excluding the 1/5th of the sample
training = credit_data[-sample,]
#testing will cover the remaining portion and include the 1/5th previously exlcuded in training
testing = credit_data[sample,]
#training of kknn method via leave-one-out crossvalidation, we want to find the optimal value of 'k'.
#The training function will determine the optimal hyperparameter values required for the best model performance
kknn_fit=train.kknn(formula = V11 ~ .,
data = training,
kmax = 100,
kernel = c("optimal","rectangular", "inv", "gaussian", "triangular"),
scale = TRUE)
kknn_fit
#Call:
# train.kknn(formula = V11 ~ ., data = d.train, kmax = 100, kernel = c("optimal", "rectangular", "inv", "gaussian", "triangular"), scale = TRUE)#
#
#Type of response variable: continuous
#minimal mean absolute error: 0.2006881
#Minimal mean squared error: 0.1062738
#Best kernel: inv
#Best k: 31
#Testing the model on test data
pred<-predict(kknn_fit, testing)
#This to ensure we're receiving a binary response for classifcation instead of a rounded result
pred_bin<-round(pred)
pred_accuracy<-table(pred_bin,testing$V11)
pred_accuracy
#KKNN accuracy for the prediction
sum(pred_bin==testing$V11)/length(testing$V11)
#[1] 0.8549618
#The most effective train/test split was 1/5. Multiple combinations were tried including 1/3, 1/4 and 1/2.