# -------------------- Code for Question 2.2 part 1 -----------------------------
# Clear environment

rm(list = ls())

# Load the kernlab library (which contains the ksvm function) and read in the data
#

library(kernlab)

# ---------------------------- Data manipulation -------------------------------------

data <- read.table("/Users/Chewy/Downloads/credit_card_data.txt", stringsAsFactors = FALSE, header = FALSE)

#
# optional check to make sure the data is read correctly
#

head(data)

# Console output for head(data)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11
## 1 1 30.83 0.000 1.25 1 0 1 1 202 0 1
## 2 0 58.67 4.460 3.04 1 0 6 1 43 560 1
## 3 0 24.50 0.500 1.50 1 1 0 1 280 824 1
## 4 1 27.83 1.540 3.75 1 0 5 0 100 3 1
## 5 1 20.17 5.625 1.71 1 1 0 1 120 0 1
## 6 1 32.08 4.000 2.50 1 1 0 0 360 0 1
# NOTE: ALL ROWS OF THIS FILE STARTING WITH "##" DENOTE R OUTPUT
#

# Setting the random number generator seed so that our results are reproducible
# (Your solution doesn't need this, but it's usually good practice to do)

set.seed(1)

# -------------------------- Creating the models ------------------------------------

# -------------------------- Scaled=TRUE model ------------------------------------

# Fit the model using scaled=TRUE.
# V11 is response, other variables are predictors
#

model_scaled <- ksvm(as.matrix(data[,1:10]),as.factor(data[,11]),
		   type = "C-svc", # Use C-classification method
              kernel = "vanilladot", # Use simple linear kernel
              C = 100,
		   scaled=TRUE) # have ksvm scale the data for you
              
# or you could use this call; it does the same thing

model_scaled <- ksvm(V11~.,data=data,
              type = "C-svc", # Use C-classification method
              kernel = "vanilladot", # Use simple linear kernel
              C = 100,
		   scaled=TRUE) # have ksvm scale the data for you

#Attributes model show what the data structure model has to reference
#For example, we use model@b to get the intercept and model@coef to get the coefficients
#Those references (b and coef) can be found listed in the console by using attributes(model)

attributes(model_scaled)

# Console output for attributes(model_scaled) is left out since it is a long output

#model lists some high level information about the model data structure

model_scaled

# Console output for model_scaled
##
## Support Vector Machine object of class "ksvm"
## SV type: C-svc (classification)
## parameter : cost C = 100
## Linear (vanilla) kernel function.
## Number of Support Vectors : 189 
## Objective Function Value : -17887.92 
## Training error : 0.136086

# -------------------------- Calculating the a coefficients ------------------------------------
#
#Classification is done using linear kernel, a*scaled(x) + a0. 
# Unfortunately, the model does not output a directly, but we can use the model output to find a.
# calculate a1 to am using the stored data point values in the model data structure and corresponding coefficients
# multiplying the xmatrix by the coef gives the linear combination of data points that define a1,...,am
# we use the xmatrix attribute since the model stores these data points as scaled

a_scaled <- colSums(model_scaled@xmatrix[[1]] * model_scaled@coef[[1]])

#
# a0 is just -model_scaled@b

a0_scaled<- -model_scaled@b

#

a_scaled
a0_scaled

#Console output for a_scaled 
## V1            V2            V3            V4            V5           
## -0.0010065348 -0.0011729048 -0.0016261967  0.0030064203  1.0049405641 
## V6            V7            V8            V9           V10 
## -0.0028259432 0.0002600295 -0.0005349551 -0.0012283758  0.1063633995 

#Console output for a0_scaled
## [1] 0.08158492

# -------------------------- Calculating the predicted values ------------------------------------
#
#The ksvm package provides a predict() function that implements this for us, but we also
#show how to get the predicted values using the a coefficients

# Calculate the predicted values using the a's we got above and our data set.
# The coefficients for this model are based on the SCALED data points, so we need to 
# scale our data points to get the correct predictions. We do this by using the scaled
# mean and standard deviation values for V1 to V10 stored in the model data structure as:
# model@scaling$x.scale$`scaled:center` (means for V1 to V10)
# model@scaling$x.scale$`scaled:scale` (standard deviation for V1 to V10)
# Then we transform the data points into their scaled equivalent by using the function:
# scaled data point[i,1:10] = (data point[i,1:10] - model@scaling$x.scale$`scaled:center`)/model@scaling$x.scale$`scaled:scale`
#
#Create predicted vector (to hold our calculated predicted values)

predicted_scaled<-rep(0,nrow(data))

#For each data point, perform the transformation, calculate a*scaled(data point)+a0, 
#and predict value of data point based on the resulting value

for (i in 1:nrow(data)){

  #If the data point is above the classifier, predicted value = 1

  if (sum(a_scaled*(data[i,1:10]-model_scaled@scaling$x.scale$`scaled:center`)/model_scaled@scaling$x.scale$`scaled:scale`) + a0_scaled >= 0){
    predicted_scaled[i] <- 1
  }

  #If the data point is below the classifier, predicted value = 0

  if (sum(a_scaled*(data[i,1:10]-model_scaled@scaling$x.scale$`scaled:center`)/model_scaled@scaling$x.scale$`scaled:scale`) + a0_scaled < 0){
    predicted_scaled[i] <- 0
  }
}
predicted_scaled

# Output from predicted_scaled
##  [1] 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [42] 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1
##  [83] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [124] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [165] 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [206] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
##  [247] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [288] 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [329] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [370] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [411] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [452] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [493] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [534] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1
##  [575] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [616] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


# Get prediction from ksvm model we created, model_scaled
# Note that we could also get the predicted values of the model using model_scaled@fitted
#

pred_scaled <- predict(model_scaled,data[,1:10])
pred_scaled

#Output from pred_scaled
## [1] 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [42] 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1
## [83] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [124] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [165] 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [206] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## [247] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [288] 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [329] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [370] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [411] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [452] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [493] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [534] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1
## [575] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [616] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

# typing "pred_scaled" will give the sequence of 1s and 0s showing the model's classification
# As you can see in the outputs, pred and predicted have the same predicted values
# so we know that our a coefficients are correct for the SCALED data version of the model

# -------------------------- Calculating the model's accuracy ------------------------------------
#
# I will use a simple accuracy measure that outputs the
# percent of testing observations that are correctly classified.

sum(pred_scaled == data$V11) / nrow(data)
sum(predicted_scaled == data$V11) / nrow(data)

#Output from sum(pred_scaled == data$V11) / nrow(data)
## [1] 0.8639144
#
#Output from sum(predicted_scaled == data$V11) / nrow(data)
## [1] 0.8639144
# Note that this result is found by a wide range of values of C.


# -------------------------- Scaled=FALSE model ------------------------------------

# Fit the model using scaled=FALSE.
# V11 is response, other variables are predictors
#

model_unscaled <- ksvm(as.matrix(data[,1:10]),as.factor(data[,11]),
		   type = "C-svc", # Use C-classification method
              kernel = "vanilladot", # Use simple linear kernel
              C = 100,
		   scaled=FALSE) # ksvm will not scale the data for you
              
# or you could use this call; it does the same thing

model_unscaled <- ksvm(V11~.,data=data,
              type = "C-svc", # Use C-classification method
              kernel = "vanilladot", # Use simple linear kernel
              C = 100,
		   scaled=FALSE) # ksvm will not scale the data for you

#Attributes model show what the data structure model has to reference
#For example, we use model_unscaled@b to get the intercept and model_unscaled@coef to get the coefficients
#Those references (b and coef) can be found listed in the console by using attributes(model_unscaled)

attributes(model_unscaled)

# Console output for attributes(model_unscaled) is left out since it is a long output

#model lists some high level information about the model data structure

model_unscaled

# Console output for model_unscaled
##
## Support Vector Machine object of class "ksvm" 
## SV type: C-svc  (classification) 
## parameter : cost C = 100 
## Linear (vanilla) kernel function. 
## Number of Support Vectors : 186 
## Objective Function Value : -2213.731 
## Training error : 0.278287 

# -------------------------- Calculating the a coefficients ------------------------------------
#
#Classification is done using linear kernel, a*unscaled(x) + a0 = a*x + a0. 
# Unfortunately, the model does not output a directly, but we can use the model output to find a.
# calculate a1 to am using the stored data point values in the model data structure and corresponding coefficients
# multiplying the xmatrix by the coef gives the linear combination of data points that define a1,...,am
# we use the xmatrix attribute since the model stores these data points as unscaled

a_unscaled <- colSums(model_unscaled@xmatrix[[1]] * model_unscaled@coef[[1]])

#
# a0 is just -model_unscaled@b

a0_unscaled <- -model_unscaled@b

#

a_unscaled
a0_unscaled

#Console output for a_unscaled
## V1            V2            V3            V4            V5            
## -0.0483050561 -0.0083148473 -0.0836550114  0.1751121271  1.8254844547   
## V6           V7            V8            V9           V10 
## 0.2763673361 0.0654782414 -0.1108211169 -0.0047229653 -0.0007764962 

#Console output for a0_unscaled
## 0.5255393

# -------------------------- Calculating the predicted values ------------------------------------
#
#The ksvm package provides a predict() function that implements this for us, but we also
#show how to get the predicted values using the a coefficients

# Calculate the predicted values using the a's we got above and our data set
# The coefficients for this model are based on the UNSCALED data points, so we do not need to 
# scale our data points to get the correct predictions.

#Create predicted vector (to hold our calculated predicted values)

predicted_unscaled<-rep(0,nrow(data))

#For each data point, calculate a*(data point)+a0, 
#and predict value of data point based on the resulting value

for (i in 1:nrow(data)){

  #If the data point is above the classifier, predicted value = 1

  if (sum(a_unscaled*data[i,1:10]) + a0_unscaled >= 0){
    predicted_unscaled[i] <- 1
  }

  #If the data point is below the classifier, predicted value = 0

  if (sum(a_unscaled*data[i,1:10]) + a0_unscaled < 0){
    predicted_unscaled[i] <- 0
  }
}
predicted_unscaled

# Output from predicted_unscaled
## [1] 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0 1 1 1
## [42] 1 1 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 0 1 1 0 1 1 1 0 1 0 0 0 0 0 1 0 1 0 1 1 1 0 1 1
## [83] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1
## [124] 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 0 1 1 0 1
## [165] 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
## [206] 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 0 0 0 1 0 1 0 1 0 1 1 0 1 1 1 1 1 0 1 0
## [247] 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [288] 1 0 0 0 0 0 1 0 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## [329] 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## [370] 1 0 1 0 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0
## [411] 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## [452] 1 1 0 1 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 0 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1
## [493] 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 0
## [534] 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 0 1 0 0 1 1 0 1 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 1
## [575] 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1
## [616] 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1

# Get prediction from ksvm model we created
#

pred_unscaled <- predict(model_unscaled,data[,1:10])
pred_unscaled

#Output from pred_unscaled
## [1] 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0 1 1 1
## [42] 1 1 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 0 1 1 0 1 1 1 0 1 0 0 0 0 0 1 0 1 0 1 1 1 0 1 1
## [83] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1
## [124] 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 0 1 1 0 1
## [165] 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
## [206] 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 0 0 0 1 0 1 0 1 0 1 1 0 1 1 1 1 1 0 1 0
## [247] 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [288] 1 0 0 0 0 0 1 0 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## [329] 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## [370] 1 0 1 0 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0
## [411] 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## [452] 1 1 0 1 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 0 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1
## [493] 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 0
## [534] 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 0 1 0 0 1 1 0 1 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 1
## [575] 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1
## [616] 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1

# typing "pred_unscaled" will give the sequence of 1s and 0s showing the model's classification
# As you can see in the outputs, pred and predicted have the same predicted values
# so we know that our a coefficients are correct for the SCALED data version of the model

# -------------------------- Calculating the model's accuracy ------------------------------------
#
# I will use a simple accuracy measure that outputs the
# percent of testing observations that are correctly classified.

sum(pred_unscaled == data$V11) / nrow(data)
sum(predicted_unscaled == data$V11) / nrow(data)

#Output from sum(pred_unscaled == data$V11) / nrow(data)
## [1] 0.7217125
#
#Output from sum(predicted_unscaled == data$V11) / nrow(data)
## [1] 0.7217125