{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "d7e90f45", "metadata": {}, "outputs": [], "source": [ "#### Pandas is for using data structures\n", "import pandas as pd\n", "# statsmodels contain modules for regression and time series analysis\n", "import statsmodels.api as sm\n", "# numpy is for numerical computing of array and mayatrix\n", "import numpy as np\n", "# Matplotlib, Seaborn: plotting package\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns \n", "# matplotlib Showing the plot right after the current code \n", "%matplotlib inline\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "# basic statistics package\n", "import scipy.stats as stats\n", "from statsmodels.stats.outliers_influence import variance_inflation_factor\n", "import datetime" ] }, { "cell_type": "code", "execution_count": 2, "id": "5159ee37", "metadata": {}, "outputs": [], "source": [ "# functions from last lab\n", "def four_in_one(dataframe,model):\n", " fitted_y = model.fittedvalues\n", " studentized_residuals = model.get_influence().resid_studentized_internal\n", " plt.figure(figsize=(10,10))\n", " ax1 = plt.subplot(221)\n", " stats.probplot(studentized_residuals, dist=\"norm\", plot=plt)\n", " ax1.set_title('Normal Q-Q')\n", " ax1.set_xlabel('Normal Quantiles')\n", " ax1.set_ylabel('Studentized Residuals');\n", "\n", " ax2 = plt.subplot(222)\n", " ax2.hist(studentized_residuals)\n", " ax2.set_xlabel('Studentized Residuals')\n", " ax2.set_ylabel('Count')\n", " ax2.set_title('Histogram')\n", "\n", " ax3 = plt.subplot(223)\n", " t = range(dataframe.shape[0])\n", " ax3.scatter(t, studentized_residuals)\n", " ax3.set_xlabel('Observation order')\n", " ax3.set_ylabel('Residuals')\n", " ax3.set_title('Time series plot of studentized residuals')\n", "\n", " ax4 = plt.subplot(224)\n", " temp = pd.DataFrame({'fitted_y':fitted_y,'studentized_residuals':studentized_residuals})\n", " ax4 = sns.residplot(data=temp,x=fitted_y, y=studentized_residuals,\n", " lowess=True,\n", " scatter_kws={'alpha': 0.5},\n", " line_kws={'color': 'red', 'lw': 1, 'alpha': 0.8})\n", " ax4.set_title('Internally Studentized Residuals vs Fitted values')\n", " ax4.set_xlabel('Fitted values')\n", " ax4.set_ylabel('Studentized Residuals');\n", " \n", "def getvif(X):\n", " X = sm.add_constant(X)\n", " vif = pd.DataFrame()\n", " vif[\"VIF\"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]\n", " vif[\"Predictors\"] = X.columns\n", " return(vif.drop(index = 0).round(2)) " ] }, { "cell_type": "code", "execution_count": 3, "id": "16326102", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | rBH | \n", "rSP | \n", "SmB | \n", "HmL | \n", "
---|---|---|---|---|
Date | \n", "\n", " | \n", " | \n", " | \n", " |
1/2/2009 | \n", "-0.121807 | \n", "-0.109931 | \n", "0.0005 | \n", "-0.0695 | \n", "
1/3/2009 | \n", "0.103053 | \n", "0.085404 | \n", "0.0004 | \n", "0.0348 | \n", "
1/4/2009 | \n", "0.084198 | \n", "0.093925 | \n", "0.0539 | \n", "0.0536 | \n", "
1/5/2009 | \n", "-0.025532 | \n", "0.053081 | \n", "-0.0252 | \n", "0.0027 | \n", "
1/6/2009 | \n", "-0.017467 | \n", "0.000196 | \n", "0.0263 | \n", "-0.0273 | \n", "