106 lines
2.8 KiB
Plaintext
106 lines
2.8 KiB
Plaintext
---
|
|
title: "hw1"
|
|
author: "Mark Pearl"
|
|
date: "5/30/2021"
|
|
output: html_document
|
|
---
|
|
|
|
|
|
```{r question4a}
|
|
library(randomForest)
|
|
#Read in data files and construct
|
|
ecg_train <- read.table(file = "./ECG200TRAIN", sep = ",")
|
|
ecg_train_y <- ecg_train$V1
|
|
ecg_train <- ecg_train[, !(colnames(ecg_train) %in% c("V1"))]
|
|
ecg_train <- as.matrix(ecg_train)
|
|
ecg_train_y[ecg_train_y < 0] <- 0
|
|
|
|
ecg_test <- read.table(file = "./ECG200TEST", sep = ",")
|
|
ecg_test_y <- ecg_test$V1
|
|
ecg_test_y[ecg_test_y < 0] <- 0
|
|
ecg_test <- ecg_test[, !(colnames(ecg_test) %in% c("V1"))]
|
|
ecg_test <- as.matrix(ecg_test)
|
|
|
|
|
|
#Conduct feature extraction using B-splines
|
|
x = seq(0,1,length.out=length(colnames(ecg_train)))
|
|
```
|
|
|
|
```{r question4a}
|
|
# Option 1: B-splines
|
|
library(splines)
|
|
knots = seq(0,1,length.out = 8)
|
|
B = bs(x, knots = knots, degree = 3)[,1:10]
|
|
Bcoef_train = matrix(0,dim(ecg_train)[1],10)
|
|
Bcoef_test = matrix(0,dim(ecg_test)[1],10)
|
|
train <- seq(1, dim(ecg_train)[1])
|
|
test <- seq(dim(ecg_train)[1]+1,dim(ecg_train)[1]+dim(ecg_test)[1])
|
|
train_test_split <- c(train,test)
|
|
for(i in train_test_split)
|
|
{
|
|
if (i %in% train) {
|
|
Bcoef_train[i,] = solve(t(B)%*%B)%*%t(B)%*%ecg_train[i,]
|
|
}
|
|
else {
|
|
Bcoef_test[i-100,] = solve(t(B)%*%B)%*%t(B)%*%ecg_test[i-100,]
|
|
}
|
|
}
|
|
|
|
fit = randomForest(factor(ecg_train_y) ~ .,
|
|
data=cbind.data.frame(as.data.frame(Bcoef_train),ecg_train_y))
|
|
|
|
|
|
pred4a = predict(fit,Bcoef_test)
|
|
cf_matrix <- table(ecg_test_y,pred4a)
|
|
|
|
matplot(x,t(ecg_test[pred4a==0,]),type="l",col = "blue",ylab = "y",ylim = c(-4,4),main="Classification using B-spline coefficients")
|
|
#
|
|
X2 = ecg_test[pred4a == 1,]
|
|
for(i in 1:length(pred4a[pred4a==1]))
|
|
{
|
|
lines(x,X2[i,],col = "red")
|
|
}
|
|
|
|
|
|
```
|
|
```{r question4a accuracy}
|
|
library(caret)
|
|
library(e1071)
|
|
confusionMatrix(data=pred4a, reference=factor(ecg_test_y))
|
|
```
|
|
|
|
```{r question4a}
|
|
# Option 2: FPCA
|
|
library(fda)
|
|
splinebasis = create.bspline.basis(c(0,1),10)
|
|
|
|
smooth = smooth.basis(x,t(rbind(ecg_train,ecg_test)),splinebasis)
|
|
Xfun = smooth$fd
|
|
pca = pca.fd(Xfun, 10)
|
|
var.pca = cumsum(pca$varprop)
|
|
nharm = sum(var.pca < 0.95) + 1
|
|
pc = pca.fd(Xfun, nharm)
|
|
|
|
#(matrix(c(1,1,2,2), ncol = 1, byrow = TRUE))
|
|
plot(pc$scores[ecg_train_y==0,],xlab = "FPC-score 1", ylab = "FPC-score 2",col = "blue",ylim=c(-1,1))
|
|
points(pc$scores[ecg_train_y==1,],col = "red")
|
|
FPCcoef = pc$scores
|
|
|
|
fit = randomForest(factor(ecg_train_y) ~ .,
|
|
data=cbind.data.frame(as.data.frame(FPCcoef[train,]),ecg_train_y))
|
|
pred4b = predict(fit,FPCcoef[-train,])
|
|
#cf_matrix <- table(labtest,pred4b)
|
|
matplot(x,t(ecg_test[pred4b==0,]),type="l",col = "blue",ylab = "y",ylim = c(-4,4),main="Classification using FPCA scores")
|
|
#
|
|
X2 = ecg_test[pred4b == 1,]
|
|
for(i in 1:length(pred4b[pred4b==1]))
|
|
{
|
|
lines(x,X2[i,],col = "red")
|
|
}
|
|
```
|
|
```{r question4a accuracy}
|
|
confusionMatrix(data=pred4b, reference=factor(ecg_test_y))
|
|
```
|
|
|
|
|