220 lines
5.9 KiB
Plaintext
220 lines
5.9 KiB
Plaintext
---
|
|
title: "hw1"
|
|
author: "Mark Pearl"
|
|
date: "5/30/2021"
|
|
output: html_document
|
|
---
|
|
|
|
```{r setup, include=FALSE}
|
|
#Import the required libraries
|
|
library(splines)
|
|
library(tidyverse)
|
|
|
|
#Question 3
|
|
#read_csv("1,2,3\n4,5,6", col_names = c("x", "y", "z"))
|
|
coal_df <- read_csv('./P04.csv',col_names=c("year","energy"))
|
|
y = coal_df$energy
|
|
|
|
```
|
|
|
|
```{r question3}
|
|
|
|
#Question 3
|
|
#read_csv("1,2,3\n4,5,6", col_names = c("x", "y", "z"))
|
|
coal_df <- read_csv('./P04.csv',col_names=c("year","energy"))
|
|
|
|
#Initialize variables used throughout the question
|
|
n = length(coal_df$year)
|
|
y = coal_df$energy
|
|
```
|
|
```{r question3a}
|
|
#3a
|
|
knots_vector <- c(6:15)
|
|
error_a <- rep(0,length(y))
|
|
mse_a <- rep(0,length(knots_vector))
|
|
#Create lower order basis functions for cublic spline
|
|
x = seq(0,1,length.out=length(coal_df$year))
|
|
h1 = rep(1,length(x))
|
|
h2 = x
|
|
h3 = x^2
|
|
h4 = x^3
|
|
#Generate b-spline basis
|
|
for (n_knots in c(1:length(knots_vector))) {
|
|
H = cbind(h1, h2, h3, h4)
|
|
knots = seq(0,1,length.out = knots_vector[n_knots]+2)
|
|
k = knots[2:(length(knots)-1)]
|
|
for (n in c(1:length(k))) {
|
|
h = (x-k[n])^3
|
|
h[h <= 0] = 0
|
|
H = cbind(H,h)
|
|
colnames(H)[n+4] <- paste("h", n+4, sep="")
|
|
}
|
|
for(i in c(1:length(y))){
|
|
H1 = H[-i,];
|
|
Y1 = y[-i];
|
|
H_removed = H[i,];
|
|
y_removed = y[i];
|
|
HS = solve(t(H1)%*%H1)%*%t(H1)%*%Y1
|
|
error_a[i] = y_removed -H_removed%*%HS
|
|
}
|
|
mse_a[n_knots]=sum(error_a^2)
|
|
flush.console()
|
|
}
|
|
|
|
layout(matrix(c(1,1,2,2), ncol = 1, byrow = TRUE))
|
|
plot(knots_vector,mse_a,type = "l",ylab = 'MSE',xlab='# of Knots',main = 'MSE Chart for Cubic Splines')
|
|
min_knot = knots_vector[which.min(mse_a)]
|
|
points(min_knot,mse_a[which.min(mse_a)],col = "red", lwd=5)
|
|
text(mse_a[which.min(mse_a)]~min_knot, labels=sprintf("%s Knots MSE: %s",min_knot,round(min(mse_a),2)),cex=0.9, font=2,pos=3)
|
|
|
|
|
|
H = cbind(h1, h2, h3, h4)
|
|
knots = seq(0,1,length.out = min_knot)
|
|
k = knots[2:(length(knots)-1)]
|
|
for (n in c(1:length(k))) {
|
|
h = (x-k[n])^3
|
|
h[h <= 0] = 0
|
|
H = cbind(H,h)
|
|
colnames(H)[n+4] <- paste("h", n+4, sep="")
|
|
}
|
|
|
|
HS = solve(t(H)%*%H)%*%t(H)%*%y
|
|
plot(x,y,type = "l",ylab = 'Y Response for Energy',xlab='Year',main = 'Optimal Cubic Spline Function')
|
|
lines(x,H%*%HS,col = "blue")
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r question3b}
|
|
#3b
|
|
|
|
x = seq(1,length(coal_df$year))
|
|
knots_vector <- c(6:15)
|
|
error <- rep(0,length(y))
|
|
mse_b <- rep(0,length(knots_vector))
|
|
#Generate b-spline basis
|
|
for (k in c(1:length(knots_vector))) {
|
|
knots = seq(1,length(coal_df$year),length.out = knots_vector[k])
|
|
order = 3
|
|
nbasis = knots_vector[k] + order - 2
|
|
print(nbasis)
|
|
B = bs(x, knots = knots, degree = 2,intercept = FALSE)[,1:nbasis]
|
|
for(i in c(1:length(y))){
|
|
|
|
B_new = B[-i,];
|
|
y_new = y[-i];
|
|
B_removed = B[i,];
|
|
y_removed = y[i];
|
|
BS = solve(t(B_new)%*%B_new)%*%t(B_new)%*%y_new
|
|
error[i] = y_removed -B_removed%*%BS
|
|
|
|
}
|
|
mse_b[k]=sum(error^2)
|
|
flush.console()
|
|
}
|
|
|
|
layout(matrix(c(1,1,2,2), ncol = 1, byrow = TRUE))
|
|
plot(knots_vector,mse_b,type = "l",ylab = 'MSE',xlab='# of Knots',main = 'MSE Chart for B-Splines with Degree 2')
|
|
min_knot = knots_vector[which.min(mse_b)]
|
|
points(min_knot,mse_b[which.min(mse_b)],col = "red", lwd=5)
|
|
text(mse_b[which.min(mse_b)]~min_knot, labels=sprintf("%s Knots MSE: %s",min_knot,round(min(mse_b),2)),cex=0.9, font=2,pos=3)
|
|
|
|
|
|
knots = seq(1,length(coal_df$year),length.out = min_knot)
|
|
order = 3
|
|
nbasis = min_knot + order - 2
|
|
B = bs(x, knots = knots, degree = 2,intercept = FALSE)[,1:nbasis]
|
|
BS = B%*%solve(t(B)%*%B)%*%t(B)%*%y
|
|
plot(x,y,type = "l",col = "red",ylab = 'y response for Energy',xlab='x for Year (1-69)',main = 'Optimal B-Splines Function Output')
|
|
lines(x,BS,col = "blue")
|
|
```
|
|
|
|
|
|
|
|
```{r question3c}
|
|
library(tuple)
|
|
#3c
|
|
#k = length(coal_df$year)
|
|
#k = 40
|
|
x = c(1:length(coal_df$year))
|
|
allspar = seq(0,1,length.out = 1000)
|
|
p = length(allspar)
|
|
error_c <- rep(0,length(y))
|
|
mse_c <- rep(0,length(allspar))
|
|
error_test <- matrix(0, nrow = length(y), ncol = 2)
|
|
|
|
for(lambda in 1:p) {
|
|
for(i in c(1:length(y))){
|
|
y_new = y[-i];
|
|
y_removed = y[i];
|
|
sm = smooth.spline(y_new, spar = allspar[lambda], df=18)
|
|
ypred = predict(sm,x[i])
|
|
yhat = ypred$y
|
|
error_c[i] = y_removed - yhat
|
|
#error_test <- rbind(error_test, c(y_removed,yhat))
|
|
}
|
|
mse_c[lambda]=sum(error_c^2)
|
|
}
|
|
#plot(x,y,type = "l",ylab = 'Energy',xlab='Year Index',main = 'Plot for Smoothing Splines')
|
|
#lines(x_new,sm$y,col = "black",lwd=3)
|
|
layout(matrix(c(1,1,2,2), ncol = 1, byrow = TRUE))
|
|
plot(allspar,mse_c,type = "l",ylab = 'MSE',xlab='Lambda Value',main = 'MSE Chart for Smoothing Splines')
|
|
min_lambda = allspar[which.min(mse_c)]
|
|
points(min_lambda,mse_c[which.min(mse_c)],col = "red", lwd=5)
|
|
text(mse_c[which.min(mse_c)]~min_lambda, labels=sprintf("Lambda with min MSE: %s",min(mse_c)),cex=0.9, font=2,pos=3)
|
|
|
|
sm_optimal = smooth.spline(y, spar = min_lambda, df=18)
|
|
plot(x,y,type = "l",col = "red",ylab = 'y response for Energy',xlab='x for Year (1-69)',main = 'Optimal Smoothing Splines Function Output')
|
|
lines(x,sm_optimal$y,col = "blue")
|
|
|
|
```
|
|
|
|
|
|
```{r question3d}
|
|
library(tuple)
|
|
#3d
|
|
# Data Genereation
|
|
x = c(1:length(coal_df$year))
|
|
kerf = function(z){exp(-z*z/2)/sqrt(2*pi)}
|
|
# leave-one-out CV
|
|
h1=seq(0,1,length.out=1000)
|
|
error_d = rep(0, length(y))
|
|
mse_d = rep(0, length(h1))
|
|
for(j in 1:length(h1))
|
|
{
|
|
h=h1[j]
|
|
for(i in 1:length(y))
|
|
{
|
|
X1=x;
|
|
Y1=y;
|
|
X1=x[-i];
|
|
Y1=y[-i];
|
|
z=kerf((x[i]-X1)/h)
|
|
yke=sum(z*Y1)/sum(z)
|
|
error_d[i]=y[i]-yke
|
|
}
|
|
mse_d[j]=sum(error_d^2)
|
|
}
|
|
|
|
yke_final <- rep(0, length(y))
|
|
for(i in 1:length(y))
|
|
{
|
|
X1=x;
|
|
Y1=y;
|
|
X1=x[-i];
|
|
Y1=y[-i];
|
|
z=kerf((x[i]-X1)/h)
|
|
yke_final[i]=sum(z*Y1)/sum(z)
|
|
}
|
|
|
|
layout(matrix(c(1,1,2,2), ncol = 1, byrow = TRUE))
|
|
plot(h1,mse_d,type = "l")
|
|
h = h1[which.min(mse_d)]
|
|
points(h,mse_d[which.min(mse_d)],col = "red", lwd=5)
|
|
text(mse_d[which.min(mse_d)]~h, labels=sprintf("%s lambda with min MSE: %s",h,mse_d[which.min(mse_d)]),cex=1, font=2,pos=4)
|
|
plot(x,y,type = "l",col = "red",ylab = 'y response for Energy',xlab='x for Year (1-69)',main = 'Optimal Gaussian Kernel')
|
|
lines(x,yke_final,col = "blue")
|
|
``` |