Files
004_comission/raymondyaaa/quotation1/lecture_notes/lab10/Lab10_solutions.ipynb
louiscklaw 63361c7658 update,
2025-01-31 21:17:06 +08:00

764 lines
191 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "xWLouqcZ4i8i"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "DepNBAuA4i8l"
},
"source": [
"# 1. Linearly separable case"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WIohIKsw4i8n"
},
"outputs": [],
"source": [
"nbpts1 = 50 # Number of points in first cluster (vary to explore effect of number of points)\n",
"mu1 = [-2,4] # the mean of the first cluster (this is a vector as we are in 2D) -- move it around\n",
"sigma1 = [[1.5,0],[0,1.9]] # the covariance matrix of the first cluster -- this one is not correlated\n",
"data1 = np.random.multivariate_normal(mu1,sigma1,nbpts1) #\n",
"\n",
"nbpts2 = 50 # Number of points in second cluster (vary to explore effect of number of points)\n",
"mu2 = [1,-3] # the mean of the second cluster (this is a vector as we are in 2D) -- move it around\n",
"sigma2 = [[1.2,0.2],[0.2,2.3]] # the covariance matrix of the second cluster -- this one is correlated\n",
"data2 = np.random.multivariate_normal(mu2,sigma2,nbpts2) #\n",
"\n",
"data = np.concatenate((data1,data2),axis=0) # Concatenate the data by row"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "-rgFEyss4i8o"
},
"outputs": [],
"source": [
"# Normalise - let's deal with pre-processing straight up here. In general you do it after you've received the data!\n",
"data= (data-np.mean(data,0))/np.std(data,0);\n",
"\n",
"# Assign class labels.\n",
"classes = np.concatenate([np.zeros(nbpts1,dtype=int),np.ones(nbpts2,dtype=int)]) #integers"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 430
},
"id": "qAAuvPqL4i8p",
"outputId": "fdc37e32-e5ea-40a7-fc9a-d16bb20b253d"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
],
"source": [
"# Plot the data, different colours for each class\n",
"plt.figure(1)\n",
"\n",
"plt.scatter(data[classes == 0, 0], data[classes ==0 , 1],c='red',s=12)\n",
"plt.scatter(data[classes == 1, 0], data[classes ==1 , 1],c='blue',s=12)\n",
"axes = plt.gca()\n",
"(x_min,x_max) = axes.get_xlim()\n",
"(y_min,y_max) = axes.get_ylim()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "fXex93FK4i8q"
},
"source": [
"Perceptron"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 75
},
"id": "_Kt3iZ0Z4i8q",
"outputId": "db40ff9a-4d50-44b0-a0aa-f112386adde8"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Perceptron()"
],
"text/html": [
"<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Perceptron()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Perceptron</label><div class=\"sk-toggleable__content\"><pre>Perceptron()</pre></div></div></div></div></div>"
]
},
"metadata": {},
"execution_count": 5
}
],
"source": [
"from sklearn.linear_model import Perceptron\n",
"clf = Perceptron() # Default without option: This will return a warning on max_iter and tol -- adjust to your liking\n",
"# You should check what the defaults are. For example, alpha=0.0001 but alpha is regularisation which is not something we discussed in the context of Perceptron. The learning rate eta0 = 1.\n",
"\n",
"clf.fit(data,classes) # Learning."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 447
},
"id": "QFV90BJ44i8r",
"outputId": "9b294767-cfa1-426c-a505-2f02fdb432a8"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x7b1ea322fcd0>"
]
},
"metadata": {},
"execution_count": 6
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
],
"source": [
"#Plotting decision regions\n",
"\n",
"# Generate a meshgrid over which to make predictions\n",
"xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))\n",
"plt.figure(2)\n",
"Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n",
"Z = Z.reshape(xx.shape)\n",
"plt.contourf(xx, yy, Z, alpha=0.4) # Will reveal decision boundary\n",
"plt.scatter(data[classes == 0, 0], data[classes ==0 , 1],c='red',s=12)\n",
"plt.scatter(data[classes == 1, 0], data[classes ==1 , 1],c='blue',s=12)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "DGW5ajIx4i8r"
},
"source": [
"SVM"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "-1oO-ZHw4i8t"
},
"outputs": [],
"source": [
"from sklearn.svm import SVC"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "C116dfNg4i8t"
},
"outputs": [],
"source": [
"#clf=SVC(kernel='linear',C=np.inf)\n",
"\n",
"# If the above line gives you an error message (which it does in some versions of Python),\n",
"# then use the following line instead:\n",
"\n",
"clf=SVC(kernel='linear',C=1000000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 75
},
"id": "OaZfTg854i8u",
"outputId": "0fe621f4-c6fd-409b-b39d-55164d25dfa2"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"SVC(C=1000000, kernel='linear')"
],
"text/html": [
"<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>SVC(C=1000000, kernel=&#x27;linear&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SVC</label><div class=\"sk-toggleable__content\"><pre>SVC(C=1000000, kernel=&#x27;linear&#x27;)</pre></div></div></div></div></div>"
]
},
"metadata": {},
"execution_count": 11
}
],
"source": [
"clf.fit(data,classes)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 447
},
"id": "h8TjMkbT4i8u",
"outputId": "cbfc7498-4282-4a17-ceb7-86c2b39aa31c"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x7b1ea2e5eaa0>"
]
},
"metadata": {},
"execution_count": 12
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
],
"source": [
"# Plotting decision regions\n",
"\n",
"# Generate a meshgrid over which to make predictions\n",
"xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))\n",
"plt.figure(2)\n",
"Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n",
"Z = Z.reshape(xx.shape)\n",
"plt.contourf(xx, yy, Z, alpha=0.4) # Will reveal decision boundary\n",
"plt.scatter(data[classes == 0, 0], data[classes ==0 , 1],c='red',s=12)\n",
"plt.scatter(data[classes == 1, 0], data[classes ==1 , 1],c='blue',s=12)\n",
"\n",
"#plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "zsmD5_7K4i8v"
},
"source": [
"This plot looks much more like the optimal solution."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ZLjqJVwV4i8v"
},
"source": [
"# 2. Non-linearly separable case 1"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "vWr5EI7Z4i8v"
},
"source": [
"This is how the data were generated"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "N0CvxHQY4i8v"
},
"outputs": [],
"source": [
"nbpts1 = 50 # Number of points in first cluster (vary to explore effect of number of points)\n",
"mu1 = [-2,2] # the mean of the first cluster (this is a vector as we are in 2D) -- move it around\n",
"sigma1 = [[1.5,0],[0,1.9]] # the covariance matrix of the first cluster -- this one is not correlated\n",
"data1 = np.random.multivariate_normal(mu1,sigma1,nbpts1) #\n",
"\n",
"nbpts2 = 50 # Number of points in second cluster (vary to explore effect of number of points)\n",
"mu2 = [2,-2] # the mean of the second cluster (this is a vector as we are in 2D) -- move it around\n",
"sigma2 = [[1.2,0.2],[0.2,2.3]] # the covariance matrix of the second cluster -- this one is correlated\n",
"data2 = np.random.multivariate_normal(mu2,sigma2,nbpts2) #\n",
"\n",
"data = np.concatenate((data1,data2),axis=0) # Concatenate the data by row"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "vp-O_tko4i8w"
},
"source": [
"Load the data you used"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NP0znSEt4i8w"
},
"outputs": [],
"source": [
"data=np.load('data1.npy')\n",
"classes=np.load('classes1.npy')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 430
},
"id": "D7g4i5Bm4i8w",
"outputId": "d86e6c36-41e4-44d6-af3a-72e2b157ba37"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
],
"source": [
"# Plot the data, different colours for each class\n",
"plt.figure(1)\n",
"\n",
"plt.scatter(data[classes == 0, 0], data[classes ==0 , 1],c='red',s=14)\n",
"plt.scatter(data[classes == 1, 0], data[classes ==1 , 1],c='blue',s=14)\n",
"axes = plt.gca()\n",
"(x_min,x_max) = axes.get_xlim()\n",
"(y_min,y_max) = axes.get_ylim()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4g02cxAk4i8w"
},
"source": [
"## Prioritising large margin separating majority of data points:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 447
},
"id": "5xUNI-v74i8w",
"outputId": "0ce4de2b-8ffc-4478-eb09-5102c036ee7d"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x7b1ea2c17970>"
]
},
"metadata": {},
"execution_count": 18
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
],
"source": [
"clf=SVC(kernel='linear',C=0.01)\n",
"clf.fit(data,classes)\n",
"\n",
"#Plotting decision regions\n",
"\n",
"# Generate a meshgrid over which to make predictions\n",
"xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))\n",
"plt.figure(2)\n",
"Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n",
"Z = Z.reshape(xx.shape)\n",
"plt.contourf(xx, yy, Z, alpha=0.4) # Will reveal decision boundary\n",
"plt.scatter(data[classes == 0, 0], data[classes ==0 , 1],c='red',s=12)\n",
"plt.scatter(data[classes == 1, 0], data[classes ==1 , 1],c='blue',s=12)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "oPfYxRy74i8x"
},
"source": [
"Here the decision boundary is roughly midway between the two main clusters of points, but there are 4 points classified incorrectly and 5 points very close to the decision boundary."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Ps4zuOzZ4i8x"
},
"source": [
"## Prioritising points being on correct side of margin:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 447
},
"id": "wu88ZJ6O4i8x",
"outputId": "b1973719-96b1-4d7e-b87e-aa2dd119909a"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x7b1edc724580>"
]
},
"metadata": {},
"execution_count": 19
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
],
"source": [
"clf=SVC(kernel='linear',C=100)\n",
"clf.fit(data,classes)\n",
"\n",
"#Plotting decision regions\n",
"\n",
"# Generate a meshgrid over which to make predictions\n",
"xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))\n",
"plt.figure(2)\n",
"Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n",
"Z = Z.reshape(xx.shape)\n",
"plt.contourf(xx, yy, Z, alpha=0.4) # Will reveal decision boundary\n",
"plt.scatter(data[classes == 0, 0], data[classes ==0 , 1],c='red',s=12)\n",
"plt.scatter(data[classes == 1, 0], data[classes ==1 , 1],c='blue',s=12)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "aU3GIm1Q4i8y"
},
"source": [
"Here the decision boundary has ended up closer to the blue cluster, but only 2 points are classified incorrectly, and there are only really 3 points very close to the decision boundary."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wCMPgtl44i8y"
},
"source": [
"# 3. Non-linearly separable case 2\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "gAb1elWK4i8y"
},
"outputs": [],
"source": [
"def distance_feature(x1,x2): # this function computes the distance of a point from (x1,x2)=(5,5)\n",
" return np.sqrt((x1-5)**2+(x2-5)**2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "EWBG6Baw4i8y"
},
"outputs": [],
"source": [
"nbpts=100\n",
"\n",
"classes=np.zeros(nbpts)\n",
"data=np.random.uniform(low=0,high=10,size=(nbpts,2))\n",
"for i in range(nbpts):\n",
" d=distance_feature(data[i,0],data[i,1])\n",
" if d<3:\n",
" classes[i]=1\n",
" else:\n",
" classes[i]=0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 430
},
"id": "w15OXk0k4i8z",
"outputId": "24f495c7-d1a1-4381-ff64-6f7aae3e03c9"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
],
"source": [
"plt.figure(1)\n",
"\n",
"plt.scatter(data[classes == 0, 0], data[classes ==0 , 1],c='red',s=14)\n",
"plt.scatter(data[classes == 1, 0], data[classes ==1 , 1],c='blue',s=14)\n",
"axes = plt.gca()\n",
"(x_min,x_max) = axes.get_xlim()\n",
"(y_min,y_max) = axes.get_ylim()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "sU4k9qpk4i8z"
},
"outputs": [],
"source": [
"X=np.zeros((nbpts,3))\n",
"X[:,0:2]=data\n",
"X[:,2]=distance_feature(data[:,0],data[:,1])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 447
},
"id": "xl2qGrjm4i8z",
"outputId": "1ed842f9-57cf-4ef4-a7b5-7698fb40e52b"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x7b1ea2bd3df0>"
]
},
"metadata": {},
"execution_count": 24
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
],
"source": [
"clf=SVC(kernel='linear',C=100)\n",
"clf.fit(X,classes)\n",
"\n",
"#Plotting decision regions\n",
"\n",
"# Generate a meshgrid over which to make predictions\n",
"xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))\n",
"zz=distance_feature(xx.ravel(), yy.ravel())\n",
"plt.figure(2)\n",
"Z = clf.predict(np.c_[xx.ravel(), yy.ravel(), zz.ravel()])\n",
"Z = Z.reshape(xx.shape)\n",
"plt.contourf(xx, yy, Z, alpha=0.4) # Will reveal decision boundary\n",
"plt.scatter(data[classes == 0, 0], data[classes ==0 , 1],c='red',s=12)\n",
"plt.scatter(data[classes == 1, 0], data[classes ==1 , 1],c='blue',s=12)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 447
},
"id": "34HHaEAE4i8z",
"outputId": "626ef7ff-aa30-4eff-9fec-11893bcaa8a7"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x7b1ea2a72e00>"
]
},
"metadata": {},
"execution_count": 25
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
],
"source": [
"clf=SVC(kernel='linear',C=0.05)\n",
"clf.fit(X,classes)\n",
"\n",
"#Plotting decision regions\n",
"\n",
"# Generate a meshgrid over which to make predictions\n",
"xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))\n",
"zz=distance_feature(xx.ravel(), yy.ravel())\n",
"plt.figure(2)\n",
"Z = clf.predict(np.c_[xx.ravel(), yy.ravel(), zz.ravel()])\n",
"Z = Z.reshape(xx.shape)\n",
"plt.contourf(xx, yy, Z, alpha=0.4) # Will reveal decision boundary\n",
"plt.scatter(data[classes == 0, 0], data[classes ==0 , 1],c='red',s=12)\n",
"plt.scatter(data[classes == 1, 0], data[classes ==1 , 1],c='blue',s=12)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "cmit_l2E4i80"
},
"source": [
"The decision boundary moves in towards the centre. There are more data points further out, so you get fewer points close to the boundary if it shrinks in towards the centre."
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "m057KFMW5Ge1"
},
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"colab": {
"provenance": []
}
},
"nbformat": 4,
"nbformat_minor": 0
}