136 lines
3.9 KiB
Plaintext
136 lines
3.9 KiB
Plaintext
---
|
|
title: "exam2"
|
|
author: "Mark Pearl"
|
|
date: "7/28/2021"
|
|
output: html_document
|
|
---
|
|
|
|
```{r import libraries}
|
|
library(glmnet)
|
|
library(tidyverse)
|
|
library(ggplot2)
|
|
library(caret)
|
|
library(glmnet)
|
|
library(tidyr)
|
|
library(dplyr)
|
|
library(ggrepel)
|
|
```
|
|
|
|
```{r read data and split into train/test}
|
|
beans <- read_csv('./Question1.csv')
|
|
|
|
#Split data into test and train
|
|
train <- beans[0:2800,]
|
|
test <- beans[2801:3871,]
|
|
|
|
#Modify class variable to map back to a factor
|
|
train <-
|
|
train %>% mutate(ClassFactor = case_when(
|
|
Class == 1 ~ "Barbunya",
|
|
Class == 2 ~ "Bombay",
|
|
TRUE ~ "Seker"))
|
|
|
|
train$Class <- as.factor(train$Class)
|
|
|
|
test <-
|
|
test %>% mutate(ClassFactor = case_when(
|
|
Class == 1 ~ "Barbunya",
|
|
Class == 2 ~ "Bombay",
|
|
TRUE ~ "Seker"))
|
|
|
|
test_x <- test[,0:16]
|
|
y_actual <- test[,17]
|
|
```
|
|
|
|
```{r part 1 muliti-nomial logistic regression}
|
|
#Use glmnet function and plot the results
|
|
fit <- glmnet(x = as.matrix(train[,0:16]),y = train$Class, family = "multinomial",alpha=0,lambda=0)
|
|
options(scipen=999)
|
|
fit$beta
|
|
```
|
|
|
|
```{r part 1 muliti-nomial logistic regression}
|
|
create_ggplot <- function(beta_index, lambda_var) {
|
|
betas = matrix(mod$glmnet.fit$beta)[beta_index,][[1]]
|
|
lambdas = mod$lambda
|
|
names(lambdas) = betas@Dimnames[[2]]
|
|
as.data.frame(as.matrix(betas)) %>%
|
|
tibble::rownames_to_column("variable") %>%
|
|
pivot_longer(-variable) %>%
|
|
mutate(lambda=lambdas[name]) %>%
|
|
ggplot(aes(x=lambda,y=value,col=variable)) +
|
|
geom_line() +
|
|
geom_vline(xintercept = lambda_var,color='red')+
|
|
geom_label_repel(data=~subset(.x,lambda==min(lambda)),
|
|
aes(label=variable),nudge_x=-0.5) +
|
|
scale_x_log10()
|
|
}
|
|
```
|
|
|
|
|
|
```{r part 1 muliti-nomial logistic regression get prediction / confusion matrix result}
|
|
cnf <- confusion.glmnet(fit, newx=as.matrix(test_x), newy = y_actual[[1]])
|
|
cnf
|
|
```
|
|
|
|
|
|
```{r part 2 ridge muliti-nomial logistic regression}
|
|
#Create cv.fit object for ridge regression
|
|
#p=9
|
|
mod <- cv.glmnet(x = as.matrix(train[,0:16]),y = train$Class, family = "multinomial",alpha = 0, nfolds=20, grouped = FALSE,type.measure = 'class')
|
|
lambda_ridge = mod$lambda.min
|
|
ridge_final = glmnet(as.matrix(train[,0:16]), y = train$Class, family = "multinomial",alpha = 0,lambda=lambda_ridge)
|
|
ridge_final$beta
|
|
```
|
|
```{r get output ridge}
|
|
create_ggplot(1, lambda_ridge)
|
|
create_ggplot(2, lambda_ridge)
|
|
create_ggplot(3, lambda_ridge)
|
|
```
|
|
|
|
|
|
```{r part 1 muliti-nomial logistic regression get prediction / confusion matrix result}
|
|
cnf_ridge <- confusion.glmnet(ridge_final, newx=as.matrix(test_x), newy = y_actual[[1]])
|
|
cnf_ridge
|
|
```
|
|
|
|
```{r part 3 lasso muliti-nomial logistic regression}
|
|
#Create cv.fit object for ridge regression
|
|
#p=9
|
|
mod <- cv.glmnet(x = as.matrix(train[,0:16]),y = train$Class, family = "multinomial",alpha = 1, nfolds=20)
|
|
lambda_lasso = mod$lambda.min
|
|
lasso_final = glmnet(as.matrix(train[,0:16]), y = train$Class, family = "multinomial",alpha = 1,lambda=lambda_lasso)
|
|
lasso_final$beta
|
|
```
|
|
|
|
```{r get output lasso}
|
|
create_ggplot(1, lambda_lasso)
|
|
create_ggplot(2, lambda_lasso)
|
|
create_ggplot(3, lambda_lasso)
|
|
```
|
|
|
|
```{r part 1 muliti-nomial logistic regression get prediction / confusion matrix result}
|
|
cnf_lasso <- confusion.glmnet(lasso_final, newx=as.matrix(test_x), newy = y_actual[[1]])
|
|
cnf_lasso
|
|
```
|
|
```{r adaptive lasso}
|
|
gamma = 0.5
|
|
b.ridge = do.call(cbind, coef(ridge_final))
|
|
w1 <- 1 / abs(as.matrix(b.ridge)[-1,])^gamma
|
|
mod <- cv.glmnet(x = as.matrix(train[,0:16]),y = train$Class,nfold = 20,family = "multinomial",alpha = 1,penalty.factor = rowNorms(w1))
|
|
lambda_alasso = mod$lambda.min
|
|
alasso_final = glmnet(as.matrix(train[,0:16]), y = train$Class, family = "multinomial",alpha = 1,lambda=lambda_alasso)
|
|
alasso_final$beta
|
|
```
|
|
|
|
```{r get output lasso}
|
|
create_ggplot(1, lambda_alasso)
|
|
create_ggplot(2, lambda_alasso)
|
|
create_ggplot(3, lambda_alasso)
|
|
```
|
|
|
|
```{r part 1 muliti-nomial logistic regression get prediction / confusion matrix result}
|
|
cnf_alasso <- confusion.glmnet(alasso_final, newx=as.matrix(test_x), newy = y_actual[[1]])
|
|
cnf_alasso
|
|
```
|