You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1526 lines
467 KiB

2 years ago
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"___\n",
"\n",
"<a href='http://www.pieriandata.com'><img src='../Pierian_Data_Logo.png'/></a>\n",
"___\n",
"<center><em>Copyright by Pierian Data Inc.</em></center>\n",
"<center><em>For more information, visit us at <a href='http://www.pieriandata.com'>www.pieriandata.com</a></em></center>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Hierarchal Clustering"
]
},
{
"cell_type": "code",
"execution_count": 217,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## The Data"
]
},
{
"cell_type": "code",
"execution_count": 218,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('../DATA/cluster_mpg.csv')"
]
},
{
"cell_type": "code",
"execution_count": 219,
"metadata": {},
"outputs": [],
"source": [
"df = df.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 220,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mpg</th>\n",
" <th>cylinders</th>\n",
" <th>displacement</th>\n",
" <th>horsepower</th>\n",
" <th>weight</th>\n",
" <th>acceleration</th>\n",
" <th>model_year</th>\n",
" <th>origin</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18.0</td>\n",
" <td>8</td>\n",
" <td>307.0</td>\n",
" <td>130.0</td>\n",
" <td>3504</td>\n",
" <td>12.0</td>\n",
" <td>70</td>\n",
" <td>usa</td>\n",
" <td>chevrolet chevelle malibu</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>15.0</td>\n",
" <td>8</td>\n",
" <td>350.0</td>\n",
" <td>165.0</td>\n",
" <td>3693</td>\n",
" <td>11.5</td>\n",
" <td>70</td>\n",
" <td>usa</td>\n",
" <td>buick skylark 320</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18.0</td>\n",
" <td>8</td>\n",
" <td>318.0</td>\n",
" <td>150.0</td>\n",
" <td>3436</td>\n",
" <td>11.0</td>\n",
" <td>70</td>\n",
" <td>usa</td>\n",
" <td>plymouth satellite</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>16.0</td>\n",
" <td>8</td>\n",
" <td>304.0</td>\n",
" <td>150.0</td>\n",
" <td>3433</td>\n",
" <td>12.0</td>\n",
" <td>70</td>\n",
" <td>usa</td>\n",
" <td>amc rebel sst</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>17.0</td>\n",
" <td>8</td>\n",
" <td>302.0</td>\n",
" <td>140.0</td>\n",
" <td>3449</td>\n",
" <td>10.5</td>\n",
" <td>70</td>\n",
" <td>usa</td>\n",
" <td>ford torino</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mpg cylinders displacement horsepower weight acceleration \\\n",
"0 18.0 8 307.0 130.0 3504 12.0 \n",
"1 15.0 8 350.0 165.0 3693 11.5 \n",
"2 18.0 8 318.0 150.0 3436 11.0 \n",
"3 16.0 8 304.0 150.0 3433 12.0 \n",
"4 17.0 8 302.0 140.0 3449 10.5 \n",
"\n",
" model_year origin name \n",
"0 70 usa chevrolet chevelle malibu \n",
"1 70 usa buick skylark 320 \n",
"2 70 usa plymouth satellite \n",
"3 70 usa amc rebel sst \n",
"4 70 usa ford torino "
]
},
"execution_count": 220,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 221,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mpg</th>\n",
" <th>cylinders</th>\n",
" <th>displacement</th>\n",
" <th>horsepower</th>\n",
" <th>weight</th>\n",
" <th>acceleration</th>\n",
" <th>model_year</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>23.445918</td>\n",
" <td>5.471939</td>\n",
" <td>194.411990</td>\n",
" <td>104.469388</td>\n",
" <td>2977.584184</td>\n",
" <td>15.541327</td>\n",
" <td>75.979592</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>7.805007</td>\n",
" <td>1.705783</td>\n",
" <td>104.644004</td>\n",
" <td>38.491160</td>\n",
" <td>849.402560</td>\n",
" <td>2.758864</td>\n",
" <td>3.683737</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>9.000000</td>\n",
" <td>3.000000</td>\n",
" <td>68.000000</td>\n",
" <td>46.000000</td>\n",
" <td>1613.000000</td>\n",
" <td>8.000000</td>\n",
" <td>70.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>17.000000</td>\n",
" <td>4.000000</td>\n",
" <td>105.000000</td>\n",
" <td>75.000000</td>\n",
" <td>2225.250000</td>\n",
" <td>13.775000</td>\n",
" <td>73.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>22.750000</td>\n",
" <td>4.000000</td>\n",
" <td>151.000000</td>\n",
" <td>93.500000</td>\n",
" <td>2803.500000</td>\n",
" <td>15.500000</td>\n",
" <td>76.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>29.000000</td>\n",
" <td>8.000000</td>\n",
" <td>275.750000</td>\n",
" <td>126.000000</td>\n",
" <td>3614.750000</td>\n",
" <td>17.025000</td>\n",
" <td>79.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>46.600000</td>\n",
" <td>8.000000</td>\n",
" <td>455.000000</td>\n",
" <td>230.000000</td>\n",
" <td>5140.000000</td>\n",
" <td>24.800000</td>\n",
" <td>82.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mpg cylinders displacement horsepower weight \\\n",
"count 392.000000 392.000000 392.000000 392.000000 392.000000 \n",
"mean 23.445918 5.471939 194.411990 104.469388 2977.584184 \n",
"std 7.805007 1.705783 104.644004 38.491160 849.402560 \n",
"min 9.000000 3.000000 68.000000 46.000000 1613.000000 \n",
"25% 17.000000 4.000000 105.000000 75.000000 2225.250000 \n",
"50% 22.750000 4.000000 151.000000 93.500000 2803.500000 \n",
"75% 29.000000 8.000000 275.750000 126.000000 3614.750000 \n",
"max 46.600000 8.000000 455.000000 230.000000 5140.000000 \n",
"\n",
" acceleration model_year \n",
"count 392.000000 392.000000 \n",
"mean 15.541327 75.979592 \n",
"std 2.758864 3.683737 \n",
"min 8.000000 70.000000 \n",
"25% 13.775000 73.000000 \n",
"50% 15.500000 76.000000 \n",
"75% 17.025000 79.000000 \n",
"max 24.800000 82.000000 "
]
},
"execution_count": 221,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 222,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"usa 245\n",
"japan 79\n",
"europe 68\n",
"Name: origin, dtype: int64"
]
},
"execution_count": 222,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['origin'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 223,
"metadata": {},
"outputs": [],
"source": [
"df_w_dummies = pd.get_dummies(df.drop('name',axis=1))"
]
},
{
"cell_type": "code",
"execution_count": 224,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mpg</th>\n",
" <th>cylinders</th>\n",
" <th>displacement</th>\n",
" <th>horsepower</th>\n",
" <th>weight</th>\n",
" <th>acceleration</th>\n",
" <th>model_year</th>\n",
" <th>origin_europe</th>\n",
" <th>origin_japan</th>\n",
" <th>origin_usa</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18.0</td>\n",
" <td>8</td>\n",
" <td>307.0</td>\n",
" <td>130.0</td>\n",
" <td>3504</td>\n",
" <td>12.0</td>\n",
" <td>70</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>15.0</td>\n",
" <td>8</td>\n",
" <td>350.0</td>\n",
" <td>165.0</td>\n",
" <td>3693</td>\n",
" <td>11.5</td>\n",
" <td>70</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18.0</td>\n",
" <td>8</td>\n",
" <td>318.0</td>\n",
" <td>150.0</td>\n",
" <td>3436</td>\n",
" <td>11.0</td>\n",
" <td>70</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>16.0</td>\n",
" <td>8</td>\n",
" <td>304.0</td>\n",
" <td>150.0</td>\n",
" <td>3433</td>\n",
" <td>12.0</td>\n",
" <td>70</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>17.0</td>\n",
" <td>8</td>\n",
" <td>302.0</td>\n",
" <td>140.0</td>\n",
" <td>3449</td>\n",
" <td>10.5</td>\n",
" <td>70</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>387</th>\n",
" <td>27.0</td>\n",
" <td>4</td>\n",
" <td>140.0</td>\n",
" <td>86.0</td>\n",
" <td>2790</td>\n",
" <td>15.6</td>\n",
" <td>82</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>388</th>\n",
" <td>44.0</td>\n",
" <td>4</td>\n",
" <td>97.0</td>\n",
" <td>52.0</td>\n",
" <td>2130</td>\n",
" <td>24.6</td>\n",
" <td>82</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>389</th>\n",
" <td>32.0</td>\n",
" <td>4</td>\n",
" <td>135.0</td>\n",
" <td>84.0</td>\n",
" <td>2295</td>\n",
" <td>11.6</td>\n",
" <td>82</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>390</th>\n",
" <td>28.0</td>\n",
" <td>4</td>\n",
" <td>120.0</td>\n",
" <td>79.0</td>\n",
" <td>2625</td>\n",
" <td>18.6</td>\n",
" <td>82</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>391</th>\n",
" <td>31.0</td>\n",
" <td>4</td>\n",
" <td>119.0</td>\n",
" <td>82.0</td>\n",
" <td>2720</td>\n",
" <td>19.4</td>\n",
" <td>82</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>392 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" mpg cylinders displacement horsepower weight acceleration \\\n",
"0 18.0 8 307.0 130.0 3504 12.0 \n",
"1 15.0 8 350.0 165.0 3693 11.5 \n",
"2 18.0 8 318.0 150.0 3436 11.0 \n",
"3 16.0 8 304.0 150.0 3433 12.0 \n",
"4 17.0 8 302.0 140.0 3449 10.5 \n",
".. ... ... ... ... ... ... \n",
"387 27.0 4 140.0 86.0 2790 15.6 \n",
"388 44.0 4 97.0 52.0 2130 24.6 \n",
"389 32.0 4 135.0 84.0 2295 11.6 \n",
"390 28.0 4 120.0 79.0 2625 18.6 \n",
"391 31.0 4 119.0 82.0 2720 19.4 \n",
"\n",
" model_year origin_europe origin_japan origin_usa \n",
"0 70 0 0 1 \n",
"1 70 0 0 1 \n",
"2 70 0 0 1 \n",
"3 70 0 0 1 \n",
"4 70 0 0 1 \n",
".. ... ... ... ... \n",
"387 82 0 0 1 \n",
"388 82 1 0 0 \n",
"389 82 0 0 1 \n",
"390 82 0 0 1 \n",
"391 82 0 0 1 \n",
"\n",
"[392 rows x 10 columns]"
]
},
"execution_count": 224,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_w_dummies"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"-----"
]
},
{
"cell_type": "code",
"execution_count": 225,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import MinMaxScaler"
]
},
{
"cell_type": "code",
"execution_count": 226,
"metadata": {},
"outputs": [],
"source": [
"scaler = MinMaxScaler()"
]
},
{
"cell_type": "code",
"execution_count": 227,
"metadata": {},
"outputs": [],
"source": [
"scaled_data = scaler.fit_transform(df_w_dummies)"
]
},
{
"cell_type": "code",
"execution_count": 228,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.2393617 , 1. , 0.61757106, ..., 0. , 0. ,\n",
" 1. ],\n",
" [0.15957447, 1. , 0.72868217, ..., 0. , 0. ,\n",
" 1. ],\n",
" [0.2393617 , 1. , 0.64599483, ..., 0. , 0. ,\n",
" 1. ],\n",
" ...,\n",
" [0.61170213, 0.2 , 0.17312661, ..., 0. , 0. ,\n",
" 1. ],\n",
" [0.50531915, 0.2 , 0.13436693, ..., 0. , 0. ,\n",
" 1. ],\n",
" [0.58510638, 0.2 , 0.13178295, ..., 0. , 0. ,\n",
" 1. ]])"
]
},
"execution_count": 228,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scaled_data"
]
},
{
"cell_type": "code",
"execution_count": 229,
"metadata": {},
"outputs": [],
"source": [
"scaled_df = pd.DataFrame(scaled_data,columns=df_w_dummies.columns)"
]
},
{
"cell_type": "code",
"execution_count": 230,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAxwAAAHXCAYAAAA2t1xWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAACMqUlEQVR4nO3deZxcZZn28d/dSzp7OgsJIQmEJSAQIAiGTSWgOIBo1EEWHQRkjOOAyztuoKPgwgyog4PioFEQwiCLypJBlE0QEMIOAcIWSCArgexrJ919v3+cc1LVSXd19cbz9Knry6c/VJ86VXXVSXX3uc+zmbsjIiIiIiLSE6pCBxARERERkfxSwSEiIiIiIj1GBYeIiIiIiPQYFRwiIiIiItJjVHCIiIiIiEiPUcEhIiIiIiI9RgWHiIiIiIgAYGZXmtkyM3uujfvNzH5mZnPNbLaZvbu95+yxgsPMjjWzl9Iw5/bU64iIiIiISLe5Cji2xP3HARPSr2nA5e09YY8UHGZWDfwiDbQPcKqZ7dMTryUiIiIiIt3D3e8HVpTYZSowwxOzgHozG13qOXuqhWMyMNfdX3P3zcD1aTgREREREem9xgALir5fmG5rU807GOSQtnb+yLBvew/l6HVuefuY0BGisfQT14aOEI17X98pdIRo7DZwQ+gI0bjwudrQEaJw+6ofh44gErXGprtCR4hKddUUC52hHE3N93X7+XFN9VGfJ+kGlZnu7tO7+3W2e92efoG2mNk00je8Y//JDK3bI1QUidTI/TeHjhCN4/q9ETpCNJavGBA6QjTWe3PoCCIi0oukxUVXC4xFwLii78em29rUUwVHu0GK37BaOKQ1a17RJGqZhW8OCR0hGss39Q0dIRprqkp1sRURkV6tuQcuKnXPqdVM4Bwzu56kB9Nqd19S6gE9VXA8Bkwws11JCo1TgE+1tfNSX9lDMaQ327C+T+gIIlEb7SNCR4jCU6EDiIjkiJldB0wBRpjZQuB8oBbA3X8J3A4cD8wFNgBntvecPVJwuHujmZ0D3AFUA1e6+/Nt7T/Q1UVCttfUpBaOzLKN/UJHiEZtlRpEM69UzQ0dQUREekpPtHCUwd1Pbed+B87uyHP22BgOd7+dpAJq15tVi3sqhvRiO192aOgI0djlyTmhI0TDl64OHSEa6/99eegIIiIi7Qo2aLzYRnQCIdvzXXYJHSEedXWhE8Tj5vtCJ4jGyo2vhY4gIiI9JVALR0+IouAQadVKje3J2Nq1oSNEo3lzU+gI0djcqJ8REZHc8vx0Ie5SwWFmVwInAMvcfWK6bRhwAzAemA+c5K5R4dIJQ4eGThCN5j4aQJ+pGqrxLJlRgw4KHSEKi1fdHzqCiIiU0NVRuVcBx26z7VzgHnefANyTfi8iIiIiIuVqbu7+r0C61MLh7veb2fhtNk8lmUoL4GrgPuCbpZ6nH1pjQLZX9X93hI4QDQ2ULnjr7obQEaLx5tonQkcQERFpV0+M4RhVtPjHUmBUD7yGVIKNWmk8s/5xjeHILHhrh9ARoqEuVQl1qRKRXNKg8fK4u5tZqyNezGwaMA1geP/9GFS3c09GkV6o4ZFloSNEY81bWl07U1OVn1/AXaUTbRGRHFPBUdKbZjba3ZeY2Wig1bNGd58OTAfYZ9in8zMMX7pN3YcnhI4Qjbf/Y0XoCNGoH7ApdIRoDB24b+gIUVi5rs11ZUVEJAI9UXDMBE4HLkr/f2t7D9A6HNKa5qPfHzpCNCYesj50hGhcftjjoSNEo1/N8NARoqBpEEUkl9TCkTCz60gGiI8ws4XA+SSFxo1mdhbwOnBSe89zzo6TuxIjV44c1m59VjFes6dCR4jG+2uPDB0hGm9t3hg6QjTOG6uiHOCL6lomrWhsuit0hGjUVB8TOkJU3LeEjlBxujpL1alt3PWBjjzPMvWQ2Oqaw3UwMpfNOTp0hGgs2qDF7jIHDxsYOkI0BtXk5+qXSHfTSXaBiq9eSi0c3WvOKk1zKdszC50gHtU6Flttzs/vXxERkTaZ5+cPXhQFxy4D60JHkAhV6SR7q8UNG0JHiEj/0AGiMWBEfv4YiYhIfnW64DCzccAMknU2HJju7pea2TDgBmA8MB84yd1Ljuk7epT60mVOe0jTn2Zmb7k+dIRoHN5naugI0Xi0UTMSZfZft3/oCFG4aK/vcO5LPwgdQyKkrkQF6mJW0GvGcOSoS1VVFx7bCHzV3fcBDgXONrN9gHOBe9x9AnBP+r2IiEiPULEhrVGxUaBiQ0LrdAtHupr4kvT2WjN7ARgDTCWZuQrgauA+4JulnmtdY1fqHpH8u2PVT0JHiEb/vlokNPPVzwwKHSEK5347dIJ46CS7QCfZ0us152eZum4Zw2Fm44EDgUeAUWkxArCUpMtVSetVcEgr+tUOCx0hGofWfzV0hGjs0r9f6AjReP5urUAhIiLx63LBYWYDgT8CX3H3NVY0tZC7u5m1Wp6Z2TRgGsCJO36EQ4ce3NUokjPNvaWP5Tugrqo6dIRoLNqgqaMzf1msolxEJLdyNIajqwv/1ZIUG9e6+03p5jfNbLS7LzGz0cCy1h7r7tOB6QCX7/c9T8adixSo4CgYUBPFhHJR0MJ/BcfutCJ0hCic+1LoBCIiPUAFB1jSlHEF8IK7X1J010zgdJIVx08H2l06e4AWr5JWDKkZFzpCNPYaooIj8/LyVaEjROM/nxsdOoKIiEi7unIWcwRwGvCsmT2dbvsWSaFxo5mdBbwOnNTeEz26XN1FMlppvOAXLxwUOkI0nlyuz0XmwL5jQkeIxoHqUQXADW+FThAPDZSW1mgygV5KC/+Buz8ItLU02wc68lyXL9IMPJlL588MHSEaF//XNaEjRKPqo4eGjhCPeQtDJ4jGd85Ry5eIiMRPf60kWkv/rtnLMju9a37oCNHwxZqZKbNj33YnARQRkd5KYzi6V23NkNARJEJ9+zWGjhCPnXYInSAeKji2emVtW43MleVLO3+Hn72hxf9EJGdytA5Hpy8hm1lfM3vUzJ4xs+fN7Hvp9l3N7BEzm2tmN5hZn+6LKyIi0pKKDRGRuHWlhaMBONrd16XT4z5oZn8G/g34qbtfb2a/BM4CLi/5RJuXdiGG5NWG9apVM8OefS10hGjYpN1DR4jGN9/9YugIUfjZG6ETiIj0AHWpShb1A9al39amXw4cDXwq3X41cAHtFBx/PPjczsbInSOHtTuLcMWYveWB0BGicfgDU0NHiMarVQ+HjhCNf95h/9ARolA/YO/QEaKxav0LoSNEQzMzFWj2spZc63y947q68F818ASwB/AL4FVglbtnne8XAu3OYTmguqkrMSSn9uozJXSEaNSaBtBn6ptHho4QjR375ufql0h300l2gYqvXkotHAl3bwImmVk9cDPwrnIfa2bTgGkAn9rpBN477OCuRMkRraKcWcSc0BGisXvNkaEjRGNL4+bQEaLxyjqtYSTSFp1kF6j4aqm3tHCYCo6W3H2Vmd0LHAbUm1lN2soxFljUxmOmA9MBThl5vv/tzfwc1K7Qwn8Ff1lwVOgI0Xh8eegE8TiwWivQZ97cqN+bIm3RSXaBii8JrdMFh5ntAGxJi41+wDHAxcC9wInA9cDpQLuDEl7domViZXubmjXlZ2ZAFBNYx+FjYzeEjhCNBRv6ho4QhT+sDp1ARKQHeH6mxe3Kacxo4Op0HEcVcKO732Zmc4DrzeyHwFPAFe090UAf0IUYebOu/V0qxFitw7HVcyvVdSaztlHVl4iISG/SlVmqZgMHtrL9NWByR55rVJ/+nY2RQyo4ZHuLNqqrXabJVXCIiEgF0BiO7rW+UVeyZXtvN+iqfmZMPx2LTLPr94W09N97fIKvzL0pdIwo1A/YW1Pjikh0oig4REREOkvFRoGKDZEcUQtHQTqG43FgkbufYGa7kgwYH06yRsdp7l5yHsvneKarMXJj59u+FjpCNP75gt+GjhCNk17ROhyZwXvnZxBdV33spzuFjiAiIj2lOT9/77qjhePLwAvA4PT7i4Gfuvv1ZvZL4CzaWWlcpDU17989dIRoDFj7SugI0ajZbXjoCNFY7w2hI4i
"text/plain": [
"<Figure size 1080x576 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(15,8))\n",
"sns.heatmap(scaled_df,cmap='magma');"
]
},
{
"cell_type": "code",
"execution_count": 231,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<seaborn.matrix.ClusterGrid at 0x236b1aaabb0>"
]
},
"execution_count": 231,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAsgAAALJCAYAAACp99XTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAACVLUlEQVR4nOzdeZycVZX/8c/pJUlnDySQFcK+iAghLMoOooALijiK/lQEjDoi4jYqLjA6zqgoMziuLYuiCCKLMggqKsoiIGFfwhoCJISEEEL2pJfz+6OeYBGSXip1c+s5+b5fr37R/dRTVd+iO92n7nPuvebuiIiIiIhIRVPuACIiIiIijUQFsoiIiIhIFRXIIiIiIiJVVCCLiIiIiFRRgSwiIiIiUkUFsoiIiIhIldAFspmdb2bzzez+9dxuZvZdM3vMzO41sykbO6OIiIiINJbQBTLwU+DIHm4/Ctih+JgG/HAjZBIRERGRBha6QHb3G4CFPZxyDHChV9wKjDSzcRsnnYiIiIg0otAFch9MAJ6u+np2cUxERERENlEt/Tm5Y8HMhtqXesCY7T5MpTVijXZ3b8+VR0RERETKr18FMt1diWLUpiiGN6QgngNMqvp6YnFMRERERDZR/SuQuzoTxcjmKuAUM7sE2Bd40d3nZs4ka5k6derngUG5c0jDWDl9+vRv5A4hIiJx9atA9pIVyGZ2MXAIMNrMZgNnAK0A7v4j4BrgaOAxYDnwwTxJpReDpk+ffmbuENIYpk6dembuDCIiEls/R5A7EsVIw92P7+V2Bz62keKIvESj4htksorkmmjkXUSkjzb1FguRXDQqLhuV3lSIiPRd6BYLEREREZH+Ct1iISIiIiLSX2qxEBERERGpogJZRERERKRK/3qQu9ViISIiIiKxaQRZRERERKSKJumJiIiIiFTRCLKIiIiISJX+FcidKpBFZP20Q2BD0w6EjUk7HIo0oH5uFKIWCxHpkXYIFOkHvWkRaUxqsRARERERqaICWURERESkigpkEREREZEqKpBFRERERKqoQBYRERERqaICWURERESkSuh1kM3sSOAcoBk4192/sdbtWwE/A0YW53ze3a/Z2DlFREREpHH0cwS5K1GM+jOzZuD7wBHAbOB2M7vK3R+sOu1LwKXu/kMz2xW4Bpi80cOKiIiISMOI3GKxD/CYu88EMLNLgGOA6gLZgeHF5yOAZzZqQhERERFpOGFHkIEJwNNVX88G9l3rnDOBP5rZx4EhwOs3TjQRERERaVSl7kE2s2nAtKpD7e7e3o+HOB74qbt/x8xeC/zczHZz9+66BhURERGR0uhXgewNNoJcFMPrK4jnAJOqvp5YHKt2EnBk8Vi3mNkgYDQwv85RRURERKQkIvcg3w7sYGbbUCmM3w28Z61zngIOB35qZrsAg4DnNmpKEREREWko/WyxaKwR5J64e6eZnQL8gcoSbue7+wNm9lVgurtfBXwa+ImZfZLKhL0T3N3zpRYRERGR3CJP0qNY0/iatY59perzB4H9N3YuEREREWlcYUeQRUQEpk6d+nkq7WPSmCZPnTr1zNwhZL1WTp8+/Ru9nybRhB5BFhERBk2fPv3M3CFEykhvXjZd/VvFQiPIIiIiIhKcRpBFRERERKqoB1lEREREpIpGkEVEREREqqgHWURERESkikaQRURERESq9HMEuTtVDhERERGRhqBJeiIiIiIiVfpXIHd7ohgiIiIiIo2hfy0WXWqxEBEREZHY+tlioQJZRERERGLTJD0RERERkSr9LJDVgywiIiIisfWzxUIFsoiIiIjEphFkEREREZEqTf052Tu9oT56Y2ZHmtnDZvaYmX1+Pef8i5k9aGYPmNkv+/P/Q0RERETi6ecIcqoY9WdmzcD3gSOA2cDtZnaVuz9Ydc4OwBeA/d39BTPbIk9aEREREWkUYQtkYB/gMXefCWBmlwDHAA9WnfMh4Pvu/gKAu8/f6ClFREREpKH0q0DuLleBPAF4uurr2cC+a52zI4CZ3Qw0A2e6++83TjwRERERaUT93EnPUuWoiZlNA6ZVHWp39/Z+PEQLsANwCDARuMHMXu3ui+oWUkRERERKpZ8jyI1VIBfF8PoK4jnApKqvJxbHqs0GbnP3DuAJM3uESsF8e72zioiIiEg59K9AbrAR5F7cDuxgZttQKYzfDbxnrXN+AxwPXGBmo6m0XMzcmCFFREREpLGUegS5J+7eaWanAH+g0l98vrs/YGZfBaa7+1XFbW8wsweBLuCz7v58vtQiIiIiklvkEWTc/RrgmrWOfaXqcwc+VXyIiIiIiMQukEVERERE+qufBXK/Nt4TERERESmdfhXIXRpBFhEREZHgNIIsIiIiIlJFPcgiIiIiIlX612LRrRFkERERqY+pU6d+HhiUO0cPJk+dOvXM3CF6sXL69OnfyB0imn4WyBpBFhERkboZNH369DNzhyizEhTwpdS/FgsVyCIiIiISnFosRERERESq9K9Ado0gi4iIiEhsGkEWEREREanSvwIZjSCLiIiISGz9KpA71WIhIiIiIsFpBFlEREREpIoKZBERERGRKiqQRURERESq9K8H2VQgi4iIiEhs/Vq3ravBPnpjZkea2cNm9piZfb6H895hZm5mU/v6/0JEREREYupfi0WJRpDNrBn4PnAEMBu43cyucvcH1zpvGPAJ4LaNn1JEREREGk0/WyxSxUhiH+Axd58JYGaXAMcAD6513teAbwKf3bjxRKQspk6d+nlgUO4cNZo8derUM3OHqNHK6dOnfyN3CBHZ9ESepDcBeLrq69nAvtUnmNkUYJK7/87MVCCLyPoMmj59+pm5Q2xqSlzYi0jJlXoE2cymAdOqDrW7e3sf79sEnA2ckCCaiIiIbKI28lWnjXmVaJO5qtPPHuRUMWpTFMPrK4jnAJOqvp5YHFtjGLAb8Fer9FaPBa4ys7e6+/QEcUVERGTTEPKq06Z0Vad/I8ipUqRxO7CDmW1DpTB+N/CeNTe6+4vA6DVfm9lfgc+oOBYRERHZtJV6BLkn7t5pZqcAfwCagfPd/QEz+yow3d2vyptQRERERBpRvwrk7lQpEnH3a4Br1jr2lfWce8jGyCQiIiIijS3sCLKIiIiISC0i9yCLiIiIiPSbRpBFRERERKr0c6MQEREREZHY+tli4alyiIiIiIg0BI0gi4iIiIhU6edW0xpBFhEREZHYNIIsIiIiIlKlnwWyRpBFREREJDZN0hMRERERqaIWCxERERGRKmqxEBHZRE2dOvXzwKDcOXoweerUqWfmDtGDldOnT/9G7hAiUn8qkEVENl2Dpk+ffmbuEGXV4MW7iGwA9SCLiIiIiFTRCLKIiISyEVtHNmYLiNo5RDYiFcgiIhJNuNYRtXOIbFz9K5BdBbKIiIiIxNbUn5O76G6oj96Y2ZFm9rCZPWZmn1/H7Z8yswfN7F4z+7OZbd2f/x8iIiIiEk/YSXpm1gx8HzgCmA3cbmZXufuDVafdBUx19+Vm9lHgW8C7Nn5aEREREWkUkXuQ9wEec/eZAGZ2CXAM8FKB7O7XV51/K/D/NmpCEREREWk4/exB7r2toYFMAJ6u+no2sG8P558EXJs0kYiIiIg0vFKPIJvZNGBa1aF2d2+v4XH+HzAVOLhe2URERESknEo9glwUw+sriOcAk6q+nlgcexkzez3wReBgd19V95AiIiIiUiqlHkHuxe3ADma2DZXC+N3Ae6pPMLM9gR8DR7r7/I0fUUREREQaTalHkHvi7p1mdgrwB6AZON/dHzCzrwLT3f0q4CxgKPBrMwN4yt3fmi20iIiIiGTXzxHk8hTIAO5+DXDNWse+UvX56zd6KBERERFpaNpJT0RERESkSj83CinXCLKIiIiISH+F7UEWEREREalFvwrkbhXIIiIiIhKcRpBFRERERKqEXsVCRERERKS/NIIsIiIiIlJFBbKIiIiISBUVyCIiIiIiVVQgi4iIiIhU6WeB3JUqh4iIiIhIQzDvx/bREzfbraH2mp698H7LnUFEREREYunfCHK3WixEREREJC8zOxI4B2gGznX3b9Tz8ftVIDsNNYAsIiI
"text/plain": [
"<Figure size 720x720 with 4 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.clustermap(scaled_df,row_cluster=False)"
]
},
{
"cell_type": "code",
"execution_count": 232,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<seaborn.matrix.ClusterGrid at 0x236a3d03280>"
]
},
"execution_count": 232,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAsgAAALJCAYAAACp99XTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAChwUlEQVR4nOzde5zcdXX/8dfZSzZXEiAhJCGw3MECAkbUIhVBFK1KtYqgoniLtVov9Yb2V0m1tnipFi+13QooFrUUQakCihcKqCALcg8ock0IhEvuySZ7Ob8/5rtxMpmZndmd735mzryfPvJg9zvf+c5ZN5k585nzOcfcHRERERERKehIHYCIiIiISDNRgiwiIiIiUkQJsoiIiIhIESXIIiIiIiJFlCCLiIiIiBRRgiwiIiIiUiR0gmxm55vZajO7s8LtZmZfMrP7zOx2Mzt6smMUERERkeYSOkEGvgGcXOX2lwIHZn+WAl+bhJhEREREpImFTpDd/Vrg6SqnnAJc6AU3AHPMbMHkRCciIiIizSh0glyDRcAjRd+vyI6JiIiISJvqqufkwSfvb6q51FPm7f9OCqURo/rcvS9VPCIiIiLS+upKkBkZzimM8cmS4YkkxCuBxUXf75UdExEREZE2VV+CPDyUUxjJXA68x8y+CzwHWOfuqxLHJCIiIiIJ1ZUge4slyGb2HeB4YK6ZrQDOBroB3P3fgSuAlwH3AZuBt6SJVERERESahbnXXla87aFbmqsGeZ+jLXUMIiIiIhJLu5dYiIiIiIjsIHSJhYiIiIhIvepcQR7MKQwRERERkeagEgsRERERkSJKkEVEREREitRXgzyiEgsRERERiU0ryCIiIiIiRbRJT0RERESkiFaQRURERESK1JcgDylBFhEREZHY6hwUohILEREREYlNJRYiIiIiIkWUIIuIiIiIFFGCLCIiIiJSRAmyiIiIiEgRJcgiIiIiIkWUIIuIiIiIFAndB9nMTgbOBTqBr7v7OSW37w18E5iTnXOWu18x2XGKiIiISPOocwV5OKcwGs/MOoGvAicBK4CbzOxyd7+76LT/B1zs7l8zs2cAVwC9kx6siIiIiDSNyCUWxwD3ufv9AGb2XeAUoDhBdmCX7OvZwKOTGqGIiIiINJ2wK8jAIuCRou9XAM8pOWcZ8BMz+xtgBvCiyQlNRERERJpVS9cgm9lSYGnRoT5376vjEqcD33D3fzGz5wHfMrPD3H2koYGKiIiISMuoK0H2JltBzpLhSgnxSmBx0fd7ZceKvQ04ObvWr81sKjAXWN3gUEVERESkRUSuQb4JONDM9qWQGJ8GvL7knIeBE4FvmNmhwFTgiUmNUkRERESaSp0lFs21glyNuw+Z2XuAH1No4Xa+u99lZp8E+t39cuCDwH+a2QcobNg70909XdQiIiIiklrkTXpkPY2vKDn2iaKv7waOney4RERERKR5hV1BFhEREREZj9AryCIiIiIi9aqvi4VWkEVEREQkOK0gi4iIiIgUUQ2yiIiIiEgRrSCLiIiIiBRRDbKIiIiISBGtIIuIiIiIFKlzBXkkrzhERERERJqCNumJiIiIiBSpL0Ee8ZzCEBERERFpDvWVWAyrxEJEREREYquzxEIJsoiIiIjEpk16IiIiIiJF6kyQVYMsIiIiIrHVWWKhBFlEREREYtMKsoiIiIhIkY56TvYhb6o/YzGzk83sXjO7z8zOqnDOqWZ2t5ndZWbfruf/DxERERGJp84V5LzCaDwz6wS+CpwErABuMrPL3f3uonMOBD4GHOvua8xsjzTRioiIiEizCJsgA8cA97n7/QBm9l3gFODuonPeAXzV3dcAuPvqSY9SRERERJpKXQnySGslyIuAR4q+XwE8p+ScgwDM7JdAJ7DM3a+anPBEREREpBnVOUnP8opjXMxsKbC06FCfu/fVcYku4EDgeGAv4FozO9zd1zYsSBERERFpKXWuIDdXgpwlw5US4pXA4qLv98qOFVsB3Ojug8ADZvY7CgnzTY2OVURERERaQ30JcpOtII/hJuBAM9uXQmJ8GvD6knO+D5wOXGBmcymUXNw/mUGKiIiISHNp6RXkatx9yMzeA/yYQn3x+e5+l5l9Euh398uz215sZncDw8CH3f2pdFGLiIiISGrmXvvwj4eOflFTTQrZ55aftk7GLiIiIiItIXKJhYiIiIhI3epMkOsavCciIiIi0nLqSpCHtYIsIiIiIsFpBVlEREREpIhqkEVEREREitRXYjGiFWQRERERia3OBFkryCIiIiISW30lFkqQRURERCQ4lViIiIiIiBSpL0F2rSCLiIiISGxaQRYRERERKVJfgoxWkEVEREQktroS5CGVWIiIiIhIcFpBFhEREREpogRZRERERKSIEmQRERERkSL11SCbEmQRERERia2uvm3DTfZnLGZ2spnda2b3mdlZVc77SzNzM1tS6/8XIiIiIhJTfSUWLbSCbGadwFeBk4AVwE1mdrm7311y3izgfcCNkx+liIiIiDSbOkss8gojF8cA97n7/QBm9l3gFODukvM+BXwG+PDkhiciIiIizajOEgtrqj9jWAQ8UvT9iuzYdmZ2NLDY3X9Uz/8PIiIiIhJXS68gm9lSYGnRoT5376vxvh3AF4AzcwhNRERERFpUnTXIeYUxPlkyXCkhXgksLvp+r+zYqFnAYcA1Vqit3hO43Mxe6e79OYQrIiIiIi2gvhXkvKLIx03AgWa2L4XE+DTg9aM3uvs6YO7o92Z2DfAhJcciIiIi7a2lV5CrcfchM3sP8GOgEzjf3e8ys08C/e5+edoIRURERKQZ1ZUgj+QVRU7c/QrgipJjn6hw7vGTEZOIiIiINLewK8giIiIiIuMRuQZZRERERKRuWkEWERERESlSX4KcVxQiIiIiIk2izhILzysOEREREZGmoBVkEREREZEidY6a1gqyiIiIiMSmFWQRERERkSJ1JshaQRYRERGR2LRJT0RERESkiEosRERERESKqMRCRERERKSIEmQRERERkSKqQRYRERERKaIVZBERERGRIkqQRURERESK1JcguxJkEREREYmto56Thxlpqj9jMbOTzexeM7vPzM4qc/vfmtndZna7mf3MzPap5/8PEREREYkn7CY9M+sEvgqcBKwAbjKzy9397qLTfgsscffNZvYu4LPA6yY/WhERERFpFpFrkI8B7nP3+wHM7LvAKcD2BNndf1F0/g3AGyc1QhERERFpOnXWII9d1tBEFgGPFH2/AnhOlfPfBlyZa0QiIiIi0vRaegXZzJYCS4sO9bl73ziu80ZgCfCCRsUmIiIiIq2ppVeQs2S4UkK8Elhc9P1e2bEdmNmLgL8DXuDuWxsepIiIiIi0lJZeQR7DTcCBZrYvhcT4NOD1xSeY2VHAfwAnu/vqyQ9RRERERJpNS68gV+PuQ2b2HuDHQCdwvrvfZWafBPrd/XLgc8BM4H/MDOBhd39lsqBFREREJDnzOoZ/HL/Xi5pqCfmaFT+11DGIiIiISCyapCciIiIiUqTOQSGtU2IhIiIiIjIeYWuQRURERETGo64EeUQJsoiIiIgEpxVkEREREZEidfZBVoIsIiIiIrFpBVlEREREpIgSZBERERGRIkqQRURERESKKEEWERERESlSZ4I8nFccIiIiIiJNQSvIIiIiIiJF6kuQR5Qgi4iIiEhaZnY+8HJgtbsflh37FHAKMAKsBs5090fHc/2Oek72JvufiIiIiLSlbwAnlxz7nLsf4e5HAj8EPjHei6vEQkRERERairtfa2a9JcfWF307A8a/mqoSCxmXJQuOC7eE/+s7vpk6BKmDb9mQOoSGu+yYz6YOIRef9vtTh5CLu59+OHUI0ua2PHpd6hBy0T13Pxvvfc3s08CbgHXAC8d7Ha0gt5glS5acBUxNHce0jtmpQ5AajTz6+9Qh5GJk+Q2pQ2i4V3w85r+rT386dQQi0krMbCmwtOhQn7v31XJfd/874O/M7GPAe4CzxxND6BVkMzsZOBfoBL7u7ueU3N4DXAg8C3gKeJ27PzjZcdZpan9//7I8LlxP8r1lZFseIUgOOhYemDqEXNiue6Y
"text/plain": [
"<Figure size 720x720 with 4 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.clustermap(scaled_df,col_cluster=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using Scikit-Learn"
]
},
{
"cell_type": "code",
"execution_count": 233,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.cluster import AgglomerativeClustering"
]
},
{
"cell_type": "code",
"execution_count": 234,
"metadata": {},
"outputs": [],
"source": [
"model = AgglomerativeClustering(n_clusters=4)"
]
},
{
"cell_type": "code",
"execution_count": 235,
"metadata": {},
"outputs": [],
"source": [
"cluster_labels = model.fit_predict(scaled_df)"
]
},
{
"cell_type": "code",
"execution_count": 236,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 0, 0, 0, 3, 2, 2, 2,\n",
" 2, 2, 0, 1, 1, 1, 1, 3, 0, 3, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,\n",
" 0, 0, 0, 0, 0, 2, 2, 2, 3, 3, 2, 0, 3, 0, 2, 0, 0, 1, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 3, 1, 1, 1, 1, 2, 2, 2, 2, 0, 3, 3, 0, 3, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 2, 1, 1, 1, 1, 0, 3, 0, 3,\n",
" 3, 0, 0, 2, 1, 1, 2, 2, 2, 2, 1, 2, 3, 1, 0, 0, 0, 3, 0, 3, 0, 0,\n",
" 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 0, 2, 2, 3, 3, 2, 0, 0, 0, 0,\n",
" 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 3, 0, 0, 0, 3, 2, 3, 0, 2, 0, 2,\n",
" 2, 2, 2, 3, 2, 2, 0, 0, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2, 3, 0,\n",
" 0, 0, 0, 2, 3, 3, 0, 2, 1, 2, 3, 2, 1, 1, 1, 1, 3, 0, 2, 0, 3, 1,\n",
" 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 2, 0, 3, 0, 0, 0, 3, 2, 3, 2, 3,\n",
" 2, 0, 3, 3, 3, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1,\n",
" 0, 3, 3, 0, 3, 0, 0, 3, 2, 2, 2, 2, 2, 3, 0, 0, 0, 0, 0, 1, 1, 1,\n",
" 1, 1, 1, 1, 1, 2, 3, 0, 0, 2, 1, 2, 1, 0, 0, 3, 2, 0, 0, 0, 0, 2,\n",
" 3, 0, 3, 0, 0, 0, 0, 2, 3, 3, 3, 3, 3, 0, 3, 2, 2, 2, 2, 3, 3, 2,\n",
" 3, 3, 2, 3, 0, 0, 0, 0, 0, 3, 0, 3, 3, 3, 3, 3, 0, 0, 0, 2, 3, 3,\n",
" 3, 3, 2, 2, 3, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 3, 0, 0,\n",
" 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0], dtype=int64)"
]
},
"execution_count": 236,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cluster_labels"
]
},
{
"cell_type": "code",
"execution_count": 237,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='mpg', ylabel='weight'>"
]
},
"execution_count": 237,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAB+0AAALaCAYAAAD0lsEwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAB7CAAAewgFu0HU+AAEAAElEQVR4nOz9d3ic13nve//WM4MZ9F5YUFhQ2cSqTvVmS7LkJstxYstxtpNzclKdtpOTnGTvJE6c16ln77yxE1t24kSybLmoWMUqtEQ1UiLFCoJgAwkQvQMDYOZ51vkDAAWSKDNDAAOQ38914eLMPOtec2MwAHHhXutexlorAAAAAAAAAAAAAAAw/5xEJwAAAAAAAAAAAAAAwJWKoj0AAAAAAAAAAAAAAAlC0R4AAAAAAAAAAAAAgAShaA8AAAAAAAAAAAAAQIJQtAcAAAAAAAAAAAAAIEEo2gMAAAAAAAAAAAAAkCAU7QEAAAAAAAAAAAAASBCK9gAAAAAAAAAAAAAAJAhFewAAAAAAAAAAAAAAEoSiPQAAAAAAAAAAAAAACULRHgAAAAAAAAAAAACABKFoDwAAAAAAAAAAAABAglC0BwAAAAAAAAAAAAAgQSjaAwAAAAAAAAAAAACQIBTtAQAAAAAAAAAAAABIEIr2AAAAAAAAAAAAAAAkiD/RCeDyYowJSlo/drdNkpvAdAAAAAAAAAAAAABgNvkkFYzd3m+tHb7UCSnaY7atl7Qr0UkAAAAAAAAAAAAAwBzbJmn3pU5Ce3wAAAAAAAAAAAAAABKEnfaYbW3jN9555x0tXbo0kbkAAAAAAAAAAAAAwKw5e/asrr766vG7bdONjRZFe8y2c2fYL126VMXFxYnMBQAAAAAAAAAAAADmijvzkJnRHh8AAAAAAAAAAAAAgAShaA8AAAAAAAAAAAAAQIJQtAcAAAAAAAAAAAAAIEEo2gMAAAAAAAAAAAAAkCAU7QEAAAAAAAAAAAAASBCK9gAAAAAAAAAAAAAAJAhFewAAAAAAAAAAAAAAEoSiPQAAAAAAAAAAAAAACULRHgAAAAAAAAAAAACABKFoDwAAAAAAAAAAAABAgvgTnQAAAAAAAAAAAAAALBZDQ0Pq7u7W4OCgXNdNdDqIgc/nUyAQUGZmptLT0+U4C2OPO0V7AAAAAAAAAAAAAJiBtVZnz55VT09PolNBnCKRiIaHh9XX1ydjjJYvX66MjIxEp0XRHgAAAAAAAAAAAABm0tHRcVHB3u+n3LqYuK4ra62k0UUYjY2NC6Jwz7sIAAAAAAAAAAAAAKYxMjKitra2c/cLCwuVnZ0tn8+XwKwQK2utBgcH1dnZqf7+/nOF+8rKyoS2yl8YTfoBAAAAAAAAAAAAYIHq7+8/dzsvL095eXkU7BchY4zS0tJUXFys9PR0SaOF/Ilf30SgaA8AAAAAAAAAAAAA0xgYGDh3OzMzM4GZYDYYY5Sbm3vufm9vbwKzoWgPAJgH1vPOnREjja5as56XwIwAAAAAAAAAAIjeyMiIpNFibzAYTHA2mA2pqakyxkj64OubKJxpDwCYM57ryvH51HemVW37j2qkd0AyRsHMNBVsqFBGcdG5MQAAAAAAAAAALFTe2EY0n893rtCLxc0YI5/Pp0gkItd1E5oLRXsAwJyw1qr72Bmd3vGuhjp6Lrretu+oUvKzVXLLVmWvKpZx+CUHAAAAAAAAAABceWiPDwCYddZatbx3WEeffHnSgv24UHu36r7/ktr21cl6dspxAAAAAAAAAAAAlyuK9gCAWeW5nrqPndGpF9+OLsBanXj+TfU2nJXncs49AAAAAAAAAAC4slC0BwDMKsfnqPH1vbEFWavGnXvl+PhvCQAAAAAAAAAAXFmojgAAZo31rAZaOjTQ3B5zbN/pFoU6emQtbfIBAAAAAAAAAMCVg6I9AGDWWGvVfqA+7vj2A/WyHi3yAQAAAAAAAADAlYOiPQBg1hjHKNwfijs+PBCSMWYWMwIAAAAAAAAAAIvdqVOn9KUvfUnV1dVKS0tTbm6utm3bpr/5m7/R4OBgotO7ZP5EJwAAuMw4l1B0p2APAAAAAAAAAAAmeOqpp/TzP//z6u3tPffY4OCgdu/erd27d+tf//Vf9cwzz6i8vDyBWV4adtoDAGaN9TwlZ2fEHR/MSudMewAAAAAAAAAAIEnas2ePPvWpT6m3t1fp6en6i7/4C73xxht66aWX9N/+23+TJNXV1enee+9VX19fgrONHzvtAQCzxvH5VHBVlRrf2CfFWHw3jlHhVZVyfL45yg4AAAAAAAAAgMuLdV154RFZz5VxfHKSAjKX0d/Zf+M3fkOhUEh+v18vvPCCrrvuunPXbrvtNlVUVOj3fu/3VFdXp69+9av60z/908QlewnYaQ8AmFXBzDRlrVwec1x2Ran8qclzkBEAAAAAAAAAAJcPa63C/b3qO3VMXQf3qKfuoHrra9VTd1BdB/eo79Qxhft7F31n23feeUevvfaaJOkLX/jCeQX7cV/60pdUU1MjSfqHf/gHhcPhec1xtlC0BwDMKs/1VHrrVjlJ0Tdz8QWTVHLzlph35wMAAAAAAAAAcCWJDA6ot+6g+o7XKdzTNemYcE+X+o7XqbfuoCKDA/Oc4ez54Q9/eO725z//+UnHOI6jz372s5Kk7u5uvfLKK/OR2qyjaA8AmFWOz1FybpYqP3GHfIGkGcf7ggFVffJOBbMyZBz+WwIAAAAAAAAAYDLhvh71Hj8id3goqvHu8JB6jx9RuK9njjObG6+//rokKS0tTVu2bJly3M0333zu9s6dO+c8r7lAdQQAMOscn6PM4iKtfeR+5a9dPen5OcbvU/76cq175H6lLS2Q4+O/JAAAAAAAAAAAJhMZHFDfqWOS58UW6HnqO3VsUe64P3z4sCSpvLxcfv/U3X2rq6svillsou9dDABADIzPUXJ2plbdt11ld16rjoPHNNI3IBmjQEaq8teulhNIkqxlhz0AAAAAAAAAAFOw1mrg9InYC/bjPE8DZ04qs2KNjDGzm9wcGRoaUnt7uySpuLh42rE5OTlKS0vTwMCATp8+PR/pzTqK9gCAOWOc0f/8/ckBFWyslMaPrDeSM777fpH8ggAAAAAAAAAAQCJEBvqibok/FXcopMhAn5LSM2cpq7nV19d37nZ6evqM48eL9v39/XOZ1pxhayMAYF44Pp8c/9jHJO3yAQAAAAAAAADAxYY62mZlnuFZmmc+DA19sEghEAjMOD4YDEqSQqHQnOU0lyjaAwAAAAAAAAAAAMACZF1X4Z6uWZlrpKdL1nVnZa65lpycfO72yMjIjOOHh4clSSkpKXOW01yiaA8AAAAAAAAAAAAAC5AXnrlgncj55kpGRsa529G0vB8YGJAUXSv9heiKLdobY2yUH69GMdeHjDE/MMacMcYMj/37A2PMh2LIx2+M+RVjzGvGmDZjTMgYc8wY8y/GmLUxzJNvjPkfxph9xpjesY99Y4/lRTsPAAAAAAAAAAAAgMSy3uzujJ/t+eZKcnKy8vJGS5tnzpyZdmxXV9e5on1JScmc5zYXrtii/WwwxjjGmH+V9KykByUtlxQY+/dBSc8aY75ujJn2dTbG5Et6Q9I/S7pRUr6kZEmrJH1R0rvGmF+KIp9rJO2X9MeS1kvKGPtYP/bYfmPM1TF/osBlzvM8eRF39MP1Ep0OAAAAAAAAAACAJMk4vgU931xas2aNJKm+vl6RSGTKcbW1tedu19TUzHlec4Gi/WihfP00H5+fJvYvJH1h7PYeSZ+WdPXYv3vGHv8lSX8+1QTGGJ+kH0jaNvbQk5I+JOkaSb8uqVVSUNK/TLdz3xhTIukpSUskRSR9RdJNYx9fGXtsqaSnjDHF03xOwBXBWivrefJcV11HTql510E17zqojsPH5YYjsp4na22i0wQAAAAAAAAAAFcwJykwi7OZWZ5vbt14442SRlvfv/vuu1OO27Fjx7nbN9xww5znNRf8iU5gAWi11h6INcgYUynpd8bu7pZ0k7U2NHZ/lzHmx5J2SNoq6XeNMd+w1tZPMtXnNLq7XpL+t7X2Vydce8cY8xNJ70rKlPS
"text/plain": [
"<Figure size 2400x800 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(12,4),dpi=200)\n",
"sns.scatterplot(data=df,x='mpg',y='weight',hue=cluster_labels)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Exploring Number of Clusters with Dendograms\n",
"\n",
"Make sure to read the documentation online!\n",
"https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.dendrogram.html\n",
"\n",
"#### Assuming every point starts as its own cluster"
]
},
{
"cell_type": "code",
"execution_count": 238,
"metadata": {},
"outputs": [],
"source": [
"model = AgglomerativeClustering(n_clusters=None,distance_threshold=0)"
]
},
{
"cell_type": "code",
"execution_count": 239,
"metadata": {},
"outputs": [],
"source": [
"cluster_labels = model.fit_predict(scaled_df)"
]
},
{
"cell_type": "code",
"execution_count": 240,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([247, 252, 360, 302, 326, 381, 384, 338, 300, 279, 217, 311, 377,\n",
" 281, 232, 334, 272, 375, 354, 333, 317, 345, 329, 289, 305, 383,\n",
" 290, 205, 355, 269, 202, 144, 245, 297, 386, 358, 199, 337, 330,\n",
" 339, 293, 352, 283, 196, 253, 168, 378, 331, 201, 268, 256, 361,\n",
" 250, 197, 246, 371, 324, 230, 203, 261, 380, 376, 308, 389, 332,\n",
" 306, 236, 391, 350, 274, 288, 313, 231, 298, 100, 295, 210, 248,\n",
" 187, 390, 373, 266, 307, 379, 212, 357, 191, 314, 208, 249, 343,\n",
" 294, 374, 322, 323, 362, 188, 296, 369, 286, 251, 229, 244, 285,\n",
" 349, 365, 259, 213, 276, 215, 222, 204, 359, 287, 166, 387, 291,\n",
" 220, 216, 260, 129, 367, 340, 346, 301, 342, 228, 388, 370, 218,\n",
" 255, 327, 347, 278, 271, 258, 282, 318, 273, 123, 172, 382, 363,\n",
" 356, 195, 280, 239, 364, 267, 351, 186, 257, 277, 299, 127, 366,\n",
" 234, 385, 192, 372, 292, 233, 270, 263, 133, 165, 161, 198, 97,\n",
" 315, 134, 207, 147, 175, 262, 348, 98, 214, 48, 353, 177, 325,\n",
" 128, 284, 275, 182, 184, 145, 344, 321, 200, 149, 240, 241, 235,\n",
" 226, 160, 341, 193, 320, 101, 224, 162, 243, 146, 99, 185, 119,\n",
" 219, 209, 265, 221, 335, 66, 121, 316, 319, 254, 264, 124, 336,\n",
" 304, 206, 106, 148, 368, 122, 164, 131, 142, 95, 173, 194, 152,\n",
" 138, 157, 110, 159, 107, 312, 328, 225, 150, 211, 140, 163, 242,\n",
" 116, 81, 93, 96, 72, 189, 303, 167, 73, 115, 143, 132, 181,\n",
" 141, 103, 170, 130, 49, 83, 309, 120, 82, 227, 310, 151, 117,\n",
" 104, 109, 57, 75, 79, 169, 71, 84, 153, 35, 47, 238, 180,\n",
" 74, 237, 176, 190, 139, 125, 135, 156, 108, 171, 136, 53, 23,\n",
" 67, 94, 113, 112, 41, 70, 174, 61, 102, 40, 64, 65, 60,\n",
" 118, 223, 137, 63, 86, 155, 178, 36, 31, 88, 87, 58, 54,\n",
" 114, 111, 158, 78, 92, 50, 26, 17, 85, 183, 80, 42, 69,\n",
" 32, 154, 51, 20, 76, 34, 179, 68, 39, 59, 33, 56, 126,\n",
" 19, 15, 37, 89, 62, 77, 29, 38, 105, 52, 28, 90, 46,\n",
" 55, 43, 9, 91, 18, 16, 25, 7, 45, 27, 44, 8, 30,\n",
" 22, 24, 21, 10, 4, 14, 13, 12, 11, 5, 6, 2, 3,\n",
" 1, 0], dtype=int64)"
]
},
"execution_count": 240,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cluster_labels"
]
},
{
"cell_type": "code",
"execution_count": 241,
"metadata": {},
"outputs": [],
"source": [
"from scipy.cluster.hierarchy import dendrogram\n",
"from scipy.cluster import hierarchy"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Linkage Model"
]
},
{
"cell_type": "code",
"execution_count": 242,
"metadata": {},
"outputs": [],
"source": [
"linkage_matrix = hierarchy.linkage(model.children_)"
]
},
{
"cell_type": "code",
"execution_count": 243,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 67. , 161. , 1.41421356, 2. ],\n",
" [ 10. , 45. , 1.41421356, 2. ],\n",
" [ 47. , 99. , 1.41421356, 2. ],\n",
" ...,\n",
" [340. , 777. , 56.40035461, 389. ],\n",
" [332. , 778. , 58.69412236, 390. ],\n",
" [349. , 779. , 75.32595834, 391. ]])"
]
},
"execution_count": 243,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"linkage_matrix"
]
},
{
"cell_type": "code",
"execution_count": 244,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABH8AAAI+CAYAAAAl/6ZxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAABsHklEQVR4nO3dedweVX338e8vkLAFCHtQQBBxQdFbCaDE5QZUFBUUFRUVcEtrW1vUR8Sl6qO10j6tS1tLvd1C1YgLKtSqiEjcBQncGlQURBFpErAsJhEJhPP88TuT69yTufZtZq7P+/XixZXrnmvmzDlnzpz5zZkzFkIQAAAAAAAA6mneuBMAAAAAAACA4SH4AwAAAAAAUGMEfwAAAAAAAGqM4A8AAAAAAECNEfwBAAAAAACoMYI/AAAAAAAANbbtKDe25557hgMPPHCUmwQAAAAAAKi1VatW/T6EsFezv480+HPggQfqyiuvHOUmAQAAAAAAas3Mbmz1dx77AgAAAAAAqDGCPwAAAAAAADVG8AcAAAAAAKDGCP4AAAAAAADUGMEfAAAAAACAGiP4AwAAAAAAUGMEfwAAAAAAAGqM4A8AAAAAAECNEfwBAAAAAACoMYI/AAAAAAAANUbwBwAAAAAAoMYI/gAAAAAAANQYwR8AAAAAAIAaI/gDAAAAAABQYwR/AAAAAAAAaozgDwAAAAAAQI0R/AEAAAAAAKgxgj8AAAAAAAA1RvAHAAAAAACgxgj+AAAAAAAA1BjBHwAAAAAAgBoj+AMAAAAAAFBj245yY7feKk1Pd7bsqadKy5YNNTkAAAAAAAC1N9KRP7fdJs3Otl9udlZasWLYqQEAAAAAAKi/kY78kaSpKWnlytbLdDo6CAAAAAAAAK0x5w8AAAAAAECNEfwBAAAAAACoMYI/AAAAAAAANUbwBwAAAAAAoMYI/gAAAAAAANQYwR8AAAAAAIAaI/gDAAAAAABQYwR/AAAAAAAAaozgDwAAAAAAQI0R/AEAAAAAAKgxgj8AAAAAAAA1RvAHAAAAAACgxgj+AAAAAAAA1BjBHwAAAAAAgBoj+AMAAAAAAFBjBH8AAAAAAABqjOAPAAAAAABAjRH8AQAAAAAAqDGCPwAAAAAAADVG8AcAAAAAAKDGCP4AAAAAAADUGMEfAAAAAACAGiP4AwAAAAAAUGMEfwAAAAAAAGqM4A8AAAAAAECNEfwBAAAAAACoMYI/AAAAAAAANUbwBwAAAAAAoMYI/gAAAAAAANQYwR8AAAAAAIAaI/gDAAAAAABQYwR/AAAAAAAAaqxt8MfMHmJms8l/fzCzM81sdzO7xMyui//fbRQJBgAAAAAAQOfaBn9CCL8IIUyFEKYkHS7pj5K+KOlsSZeGEA6RdGn8NwAAAAAAAEqk28e+jpP0qxDCjZJOknRe/P48Sc8eYLoAAAAAAAAwAN0Gf14o6dPx8z4hhDXx81pJ+xT9wMyWmdmVZnblPffc02MyAQAAAAAA0IuOgz9mtkDSiZI+l/9bCCFICkW/CyHMhBCWhBCWzJ8/v+eEAgAAAAAAoHvdjPx5uqSrQgjr4r/Xmdm+khT/f8ugEwcAAAAAAID+dBP8eZEaj3xJ0kWSTo+fT5d04aASBQAAAAAAgMHoKPhjZjtJeoqkLyRfnyPpKWZ2naQnx38DAAAAAACgRLbtZKEQwkZJe+S++1/5278AAAAAAABQUt2+7QsAAAAAAAAVQvAHAAAAAACgxgj+AAAAAAAA1BjBHwAAAAAAgBoj+AMAAAAAAFBjBH8AAAAAAABqjOAPAAAAAABAjRH8AQAAAAAAqDGCPwAAAAAAADVG8AcAAAAAAKDGCP4AAAAAAADUGMEfAAAAAACAGiP4AwAAAAAAUGMEfwAAAAAAAGqM4A8AAAAAAECNEfwBAAAAAACoMYI/AAAAAAAANUbwBwAAAAAAoMYI/gAAAAAAANQYwR8AAAAAAIAaI/gDAAAAAABQYwR/AAAAAAAAaozgDwAAAAAAQI0R/AEAAAAAAKgxgj8AAAAAAAA1RvAHAAAAAACgxgj+AAAAAAAA1BjBHwAAAAAAgBoj+AMAAAAAAFBjBH8AAAAAAABqjOAPAAAAAABAjRH8AQAAAAAAqDGCPwAAAAAAADVG8AcAAAAAAKDGCP4AAAAAAADUGMEfAAAAAACAGiP4AwAAAAAAUGMEfwAAAAAAAGqM4A8AAAAAAECNEfwBAAAAAACoMYI/AAAAAAAANUbwBwAAAAAAoMYI/gAAAAAAANQYwR8AAAAAAIAaI/gDAAAAAABQYwR/AAAAAAAAaozgDwAAAAAAQI0R/AEAAAAAAKgxgj8AAAAAAAA1RvAHAAAAAACgxgj+AAAAAAAA1BjBHwAAAAAAgBoj+AMAAAAAAFBjBH8AAAAAAABqjOAPAAAAAABAjRH8AQAAAAAAqDGCPwAAAAAAADVG8AcAAAAAAKDGCP4AAAAAAADUWEfBHzNbZGafN7NrzeznZvY4M9vdzC4xs+vi/3cbdmIBAAAAAADQnU5H/nxA0tdCCA+V9ChJP5d0tqRLQwiHSLo0/hsAAAAAAAAl0jb4Y2a7SnqipI9KUghhUwjhDkknSTovLnaepGcPJ4kAAAAAAADoVScjfw6SdKukj5vZ1Wb2ETPbSdI+IYQ1cZm1kvYZViIBAAAAAADQm06CP9tKeoykc0MIj5a0UblHvEIIQVIo+rGZLTOzK83synvuuaff9AIAAAAAAKALnQR/fifpdyGEy+O/Py8PBq0zs30lKf7/lqIfhxBmQghLQghL5s+fP4g0AwAAAAAAoENtgz8hhLWSbjKzh8SvjpP0M0kXSTo9fne6pAuHkkIAAAAAAAD0bNsOl3uNpE+Z2QJJN0h6mTxw9Fkze4WkGyWdMpwkAgAAAAAAoFcdBX9CCLOSlhT86biBpgYAAAAAAAAD1cmcPwAAAAAAAKgogj8AAAAAAAA1RvAHAAAAAACgxjqd8HngZmakFSuK/zY76/+fnt76b6eeKi1bNqxUAQAAAAAA1MvYRv6sWNEI8uRNTfl/ebOzzQNGAAAAAAAA2NrYRv5IHuBZubLz5YtGAgEAAAAAAKA55vwBAAAAAACoMYI/AAAAAAAANUbwBwAAAAAAoMYI/gAAAAAAANQYwR8AAAAAAIAaI/gDAAAAAABQYwR/AAAAAAAAaozgDwAAAAAAQI0R/AEAAAAAAKgxgj8AAAAAAAA1RvAHAAAAAACgxgj+AAAAAAAA1BjBHwAAAAAAgBrbdtwJkKSZGWnFivbLzc76/6en2y976qnSsmX9pAoAAAAAAKD6SjHyZ8WKRmCnlakp/6+d2dnOgkkAAAAAAAB1V4qRP5IHdVauHMy6OhkZBAAAAAAAMAlKMfIHAAAAAAAAw0HwBwAAAAAAoMYI/gAAAAAAANQYwR8AAAAAAIAaI/gDAAAAAABQYwR/AAAAAAAAaozgDwAAAAAAQI0R/AEAAAAAAKgxgj8AAAAAAAA1RvAHAAAAAACgxgj+AAAAAAAA1BjBHwAAAAAAgBoj+AMAAAAAAFBjBH8AAAAAAABqjOAPAAAAAABAjRH8AQAAAAAAqDGCPwAAAAAAADVG8AcAAAAAAKDGCP4AAAAAAADUGMEfAAAAAACAGiP4AwAAAAAAUGMEfwAAAAAAAGqM4A8AAAAAAECNEfwBAAAAAACoMYI/AAAAAAAANUbwBwAAAAAAoMYI/gAAAAAAANQYwR8AAAAAAIAaI/gDAAAAAABQYwR/AAAAAAAAaozgDwAAAAAAQI0R/AEAAAAAAKgxgj8AAAAAAAA1RvAHAAAAAACgxgj+AAAAAAAA1BjBHwAAAAAAgBoj+AMAAAAAAFBjBH8AAAAAAABqbNtOFjKz30haL2mzpHtDCEvMbHdJn5F0oKTfSDolhHD7cJIJAAAAAACAXnQz8ueYEMJUCGFJ/PfZki4NIRwi6dL4bwAAAAAAAJRIP499nSTpvPj5PEnP7js1AAAAAAAAGKhOgz9B0tfNbJWZLYvf7RNCWBM/r5W0z8BTBwAAAAAAgL50NOePpMeHEG42s70lXWJm16Z/DCEEMwtFP4zBomW
"text/plain": [
"<Figure size 1440x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(20,10))\n",
"# Warning! This plot will take awhile!!\n",
"dn = hierarchy.dendrogram(linkage_matrix)"
]
},
{
"cell_type": "code",
"execution_count": 245,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABH4AAAJKCAYAAABajoKYAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAxl0lEQVR4nO3dfZRtaV0f+O9DX1qJ7dii176thDRGokGgC/uKokYvbxEcI2jQ6M1iGoO5w8SoJMaIzqwxk8kaycwY42QZ8PoSWmMFECHdyhJomr6A8bUbSlp5CYqNgn27OyrKi6Ft+M0fe1d3UdTlVtXZu+qcpz6ftXrVOafq/s7T++y9zz7f8+zfblUVAAAAAPrzgMMeAAAAAADzEPwAAAAAdErwAwAAANApwQ8AAABApwQ/AAAAAJ0S/AAAAAB06thBPtlnfuZn1lVXXXWQTwkAAADQtVtvvfW/VdXxnX53oMHPVVddlVtuueUgnxIAAACga621d1/od071AgAAAOiU4AcAAACgU4IfAAAAgE4JfgAAAAA6JfgBAAAA6JTgBwAAAKBTgh8AAACATgl+AAAAADol+AEAAADolOAHAAAAoFOCHwAAAIBOCX4AAAAAOiX4AQAAAOiU4AcAAACgU4IfAAAAgE4JfgAAAAA6JfgBAAAA6JTgBwAAAKBTgh8AAACATgl+AAAAADol+AEAAADolOAHAAAAoFOCHwAAAIBOHTvsAezk7Nlkff2wR3GwTp9Ozpw57FEAAAAAPVnKGT/r68nGxmGP4uBsbBy9oAsAAACY31LO+EmStbXk3LnDHsXBOHXqsEcAAAAA9GgpZ/wAAAAAsDjBDwAAAECnBD8AAAAAnRL8AAAAAHRK8AMAAADQKcEPAAAAQKcEPwAAAACdEvwAAAAAdErwAwAAANApwQ8AAABApwQ/AAAAAJ0S/AAAAAB0SvADAAAA0CnBDwAAAECnBD8AAAAAnRL8AAAAAHRK8AMAAADQKcEPAAAAQKcEPwAAAACdEvwAAAAAdErwAwAAANApwQ8AAABApwQ/AAAAAJ0S/AAAAAB0SvADAAAA0CnBDwAAAECnBD8AAAAAnRL8AAAAAHRK8AMAAADQKcEPAAAAQKcEPwAAAACdumjw01r7/Nbaxpb//ry19tzW2oNbaze21t45/vz0gxgwAAAAALtz0eCnqt5RVWtVtZbkmiQfSvKKJM9LclNVPTzJTeN9AAAAAJbEXk/1emKS36uqdyd5WpLrxsevS/L0CccFAAAAwIL2Gvx8c5L/NN6+oqruGG+fT3LFTv+gtXamtXZLa+2Wu+++e5/DBAAAAGCvdh38tNYuTfJ1SX5u+++qqpLUTv+uqs5W1cmqOnn8+PF9DxQAAACAvdnLjJ+nJnlTVd053r+ztXZlkow/75p6cAAAAADs316Cn2/J/ad5JckNSa4db1+b5PqpBgUAAADA4nYV/LTWPiXJk5O8fMvDz0/y5NbaO5M8abwPAAAAwJI4tps/qqoPJvmMbY/9cYarfAEAAACwhPZ6VS8AAAAAVoTgBwAAAKBTgh8AAACATgl+AAAAADol+AEAAADolOAHAAAAoFOCHwAAAIBOCX4AAAAAOiX4AQAAAOiU4AcAAACgU4IfAAAAgE4JfgAAAAA6JfgBAAAA6JTgBwAAAKBTgh8AAACATgl+AAAAADol+AEAAADolOAHAAAAoFOCHwAAAIBOCX4AAAAAOiX4AQAAAOiU4AcAAACgU4IfAAAAgE4JfgAAAAA6JfgBAAAA6JTgBwAAAKBTgh8AAACATgl+AAAAADol+AEAAADolOAHAAAAoFOCHwAAAIBOCX4AAAAAOiX4AQAAAOiU4AcAAACgU4IfAAAAgE4JfgAAAAA6JfgBAAAA6JTgBwAAAKBTgh8AAACATgl+AAAAADol+AEAAADolOAHAAAAoFOCHwAAAIBOCX4AAAAAOiX4AQAAAOiU4AcAAACgU4IfAAAAgE4JfgAAAAA6JfgBAAAA6JTgBwAAAKBTgh8AAACATgl+AAAAADol+AEAAADolOAHAAAAoFOCHwAAAIBOCX4AAAAAOiX4AQAAAOiU4AcAAACgU4IfAAAAgE7tKvhprV3eWntZa+3trbW3tdYe11p7cGvtxtbaO8efnz73YAEAAADYvd3O+PmRJK+qqi9IcnWStyV5XpKbqurhSW4a7wMAAACwJC4a/LTWPi3JVyb5ySSpqnuq6n1JnpbkuvHPrkvy9HmGCAAAAMB+7GbGz8OS3J3kP7TW3txa+4nW2qckuaKq7hj/5nySK+YaJAAAAAB7t5vg51iSL0rygqp6TJIPZttpXVVVSWqnf9xaO9Nau6W1dsvdd9+96HgBAAAA2KXdBD/vSfKeqvr18f7LMgRBd7bWrkyS8eddO/3jqjpbVSer6uTx48enGDMAAAAAu3DR4Keqzif5w9ba548PPTHJW5PckOTa8bFrk1w/ywgBAAAA2Jdju/y770jys621S5O8K8m3ZgiNXtpae3aSdyf5pnmGCAAAAMB+7Cr4qaqNJCd3+NUTJx0NAAAAAJPZTY8fAAAAAFaQ4AcAAACgU4IfAAAAgE7ttrlzV86eTdbXD3sU99vYGH6eOnWYo/h4p08nZ84c9igAAACA/TqSM37W1+8PW5bB2trw3zLZ2FiucAwAAADYuyM54ycZgpZz5w57FMtr2WYfAQAAAHt3JGf8AAAAABwFgh8AAACATgl+AAAAADol+AEAAADolOAHAAAAoFOCHwAAAIBOCX4AAAAAOiX4AQAAAOiU4AcAAACgU4IfAAAAgE4JfgAAAAA6JfgBAAAA6JTgBwAAAKBTxw57AL05ezZZXz/sUSxuY2P4eerUYY5iGqdPJ2fOHPYoAAAA4OCZ8TOx9fX7Q5NVtrY2/LfqNjb6COIAAABgP8z4mcHaWnLu3GGPgqSPGUsAAACwX2b8AAAAAHRK8AMAAADQKcEPAAAAQKcEPwAAAACdEvwAAAAAdErwAwAAANApwQ8AAABApwQ/AAAAAJ0S/AAAAAB0SvADAAAA0CnBDwAAAECnBD8AAAAAnRL8AAAAAHRK8AMAAADQKcEPAAAAQKcEPwAAAACdEvwAAAAAdErwAwAAANApwQ8AAABApwQ/AAAAAJ0S/AAAAAB0SvADAAAA0CnBDwAAAECnBD8AAAAAnRL8AAAAAHRK8AMAAADQKcEPAAAAQKcEPwAAAACdEvwAAAAAdErwAwAAANApwQ8AAABApwQ/AAAAAJ0S/AAAAAB0SvADAAAA0CnBDwAAAECnBD8AAAAAnRL8AAAAAHRK8AMAAADQqWO7+aPW2u1J3p/kI0nuraqTrbUHJ3lJkquS3J7km6rqT+cZJgAAAAB7tZcZP4+vqrWqOjnef16Sm6rq4UluGu8DAAAAsCQWOdXraUmuG29fl+TpC48GAAAAgMnsNvipJK9prd3aWjszPnZFVd0x3j6f5IrJRwcAAADAvu2qx0+Sr6iq97bWPivJja21t2/9ZVVVa612+odjUHQmSR760IcuNFgAAAAAdm9XM36q6r3jz7uSvCLJY5Pc2Vq7MknGn3dd4N+eraqTVXXy+PHj04waAAAAgIu6aPDTWvuU1tqnbt5O8reT/HaSG5JcO/7ZtUmun2uQAAAAAOzdbk71uiLJK1prm3+/XlWvaq39ZpKXttaeneTdSb5pvmECAAAAsFcXDX6q6l1Jrt7h8T9O8sQ5BgUAAADA4ha5nDsAAAAAS0zwAwAAANApwQ8AAABApwQ/AAAAAJ0S/AAAAAB0ajeXc2dJnL31bNZvWz/sYayUjfP/Nkly6kXPPdRxrJLTjzqdM9ecOexhAAAAMAHBzwpZv209G+c3snZi7bCHsjLWnvfcwx7CStk4v5Ekgh8AAIBOCH5WzNqJtZx71rnDHgadOvWiU4c9BAAAACYk+GElOM3tYGzO+BEAzc8pdQAAwEHQ3JmVsHmaG/NaO7HmVMIDsHF+Q5AJAAAcCDN+WBlOc6MXZlQBAAAHxYwfAAAAgE6Z8QMciqPct+mo91LS3wgAAA6OGT/AoTjKfZuOci8l/Y0AAOBgmfEDHBp9m46
"text/plain": [
"<Figure size 1440x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(20,10))\n",
"dn = hierarchy.dendrogram(linkage_matrix,truncate_mode='lastp',p=48)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Choosing a Threshold Distance\n",
"\n",
"**What is the distance between two points?**"
]
},
{
"cell_type": "code",
"execution_count": 246,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mpg</th>\n",
" <th>cylinders</th>\n",
" <th>displacement</th>\n",
" <th>horsepower</th>\n",
" <th>weight</th>\n",
" <th>acceleration</th>\n",
" <th>model_year</th>\n",
" <th>origin_europe</th>\n",
" <th>origin_japan</th>\n",
" <th>origin_usa</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" <td>392.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>0.384200</td>\n",
" <td>0.494388</td>\n",
" <td>0.326646</td>\n",
" <td>0.317768</td>\n",
" <td>0.386897</td>\n",
" <td>0.448888</td>\n",
" <td>0.498299</td>\n",
" <td>0.173469</td>\n",
" <td>0.201531</td>\n",
" <td>0.625000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.207580</td>\n",
" <td>0.341157</td>\n",
" <td>0.270398</td>\n",
" <td>0.209191</td>\n",
" <td>0.240829</td>\n",
" <td>0.164218</td>\n",
" <td>0.306978</td>\n",
" <td>0.379136</td>\n",
" <td>0.401656</td>\n",
" <td>0.484742</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.212766</td>\n",
" <td>0.200000</td>\n",
" <td>0.095607</td>\n",
" <td>0.157609</td>\n",
" <td>0.173589</td>\n",
" <td>0.343750</td>\n",
" <td>0.250000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.365691</td>\n",
" <td>0.200000</td>\n",
" <td>0.214470</td>\n",
" <td>0.258152</td>\n",
" <td>0.337539</td>\n",
" <td>0.446429</td>\n",
" <td>0.500000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>0.531915</td>\n",
" <td>1.000000</td>\n",
" <td>0.536822</td>\n",
" <td>0.434783</td>\n",
" <td>0.567550</td>\n",
" <td>0.537202</td>\n",
" <td>0.750000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mpg cylinders displacement horsepower weight \\\n",
"count 392.000000 392.000000 392.000000 392.000000 392.000000 \n",
"mean 0.384200 0.494388 0.326646 0.317768 0.386897 \n",
"std 0.207580 0.341157 0.270398 0.209191 0.240829 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.212766 0.200000 0.095607 0.157609 0.173589 \n",
"50% 0.365691 0.200000 0.214470 0.258152 0.337539 \n",
"75% 0.531915 1.000000 0.536822 0.434783 0.567550 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" acceleration model_year origin_europe origin_japan origin_usa \n",
"count 392.000000 392.000000 392.000000 392.000000 392.000000 \n",
"mean 0.448888 0.498299 0.173469 0.201531 0.625000 \n",
"std 0.164218 0.306978 0.379136 0.401656 0.484742 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.343750 0.250000 0.000000 0.000000 0.000000 \n",
"50% 0.446429 0.500000 0.000000 0.000000 1.000000 \n",
"75% 0.537202 0.750000 0.000000 0.000000 1.000000 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 "
]
},
"execution_count": 246,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scaled_df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 247,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"320"
]
},
"execution_count": 247,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scaled_df['mpg'].idxmax()"
]
},
{
"cell_type": "code",
"execution_count": 248,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"28"
]
},
"execution_count": 248,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scaled_df['mpg'].idxmin()"
]
},
{
"cell_type": "code",
"execution_count": 249,
"metadata": {},
"outputs": [],
"source": [
"# https://stackoverflow.com/questions/1401712/how-can-the-euclidean-distance-be-calculated-with-numpy\n",
"a = scaled_df.iloc[320]\n",
"b = scaled_df.iloc[28]\n",
"dist = np.linalg.norm(a-b)"
]
},
{
"cell_type": "code",
"execution_count": 250,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2.3852929970374714"
]
},
"execution_count": 250,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dist"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Max possible distance?\n",
"\n",
"Recall Euclidean distance: https://en.wikipedia.org/wiki/Euclidean_distance"
]
},
{
"cell_type": "code",
"execution_count": 251,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10"
]
},
"execution_count": 251,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(scaled_df.columns)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Creating a Model Based on Distance Threshold\n",
"\n",
"* distance_threshold\n",
" * The linkage distance threshold above which, clusters will not be merged."
]
},
{
"cell_type": "code",
"execution_count": 252,
"metadata": {},
"outputs": [],
"source": [
"model = AgglomerativeClustering(n_clusters=None,distance_threshold=2)"
]
},
{
"cell_type": "code",
"execution_count": 253,
"metadata": {},
"outputs": [],
"source": [
"cluster_labels = model.fit_predict(scaled_data)"
]
},
{
"cell_type": "code",
"execution_count": 254,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 4, 4,\n",
" 4, 1, 0, 0, 0, 0, 0, 4, 3, 3, 3, 3, 1, 7, 1, 4, 4,\n",
" 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 4, 7, 4, 4, 7, 0, 0,\n",
" 0, 1, 1, 0, 7, 1, 7, 0, 7, 7, 3, 3, 3, 3, 3, 3, 3,\n",
" 3, 3, 1, 3, 3, 3, 3, 0, 0, 0, 0, 7, 1, 1, 7, 1, 3,\n",
" 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0,\n",
" 3, 3, 3, 3, 4, 1, 7, 1, 1, 7, 4, 0, 3, 3, 0, 0, 0,\n",
" 0, 3, 0, 10, 3, 4, 4, 4, 1, 7, 1, 7, 4, 4, 4, 3, 3,\n",
" 3, 3, 3, 0, 0, 0, 1, 1, 7, 0, 0, 1, 1, 0, 4, 4, 4,\n",
" 4, 5, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 1, 7, 4, 7, 1,\n",
" 0, 1, 4, 0, 4, 0, 0, 0, 0, 1, 0, 0, 7, 7, 0, 5, 5,\n",
" 5, 5, 4, 4, 4, 4, 7, 7, 0, 1, 9, 4, 9, 4, 0, 1, 1,\n",
" 7, 0, 5, 8, 10, 0, 5, 5, 5, 5, 1, 2, 8, 7, 1, 5, 5,\n",
" 5, 5, 9, 9, 9, 9, 5, 5, 5, 5, 0, 7, 1, 7, 2, 2, 1,\n",
" 0, 10, 0, 10, 8, 2, 1, 6, 1, 5, 5, 5, 9, 9, 9, 7, 9,\n",
" 9, 9, 9, 9, 9, 5, 9, 5, 5, 2, 10, 10, 2, 10, 2, 2, 10,\n",
" 0, 0, 0, 0, 8, 1, 9, 9, 2, 9, 9, 5, 5, 5, 5, 5, 5,\n",
" 5, 5, 8, 1, 2, 2, 8, 5, 8, 5, 2, 2, 1, 8, 2, 9, 9,\n",
" 2, 8, 6, 2, 6, 2, 2, 2, 9, 8, 6, 6, 6, 6, 6, 2, 6,\n",
" 8, 8, 8, 8, 6, 6, 8, 10, 10, 8, 6, 2, 2, 2, 9, 2, 6,\n",
" 2, 6, 6, 6, 6, 6, 2, 2, 2, 8, 6, 6, 6, 6, 8, 8, 10,\n",
" 10, 9, 5, 9, 9, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 2, 2,\n",
" 6, 6, 6, 6, 6, 6, 9, 9, 2, 9, 6, 2, 2, 2, 8, 2, 2,\n",
" 2], dtype=int64)"
]
},
"execution_count": 254,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cluster_labels"
]
},
{
"cell_type": "code",
"execution_count": 255,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=int64)"
]
},
"execution_count": 255,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.unique(cluster_labels)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Linkage Matrix\n",
"\n",
"Source: https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage\n",
"\n",
" A (n-1) by 4 matrix Z is returned. At the i-th iteration, clusters with indices Z[i, 0] and Z[i, 1] are combined to form cluster n + i. A cluster with an index less than n corresponds to one of the original observations. The distance between clusters Z[i, 0] and Z[i, 1] is given by Z[i, 2]. The fourth value Z[i, 3] represents the number of original observations in the newly formed cluster."
]
},
{
"cell_type": "code",
"execution_count": 256,
"metadata": {},
"outputs": [],
"source": [
"linkage_matrix = hierarchy.linkage(model.children_)"
]
},
{
"cell_type": "code",
"execution_count": 257,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 67. , 161. , 1.41421356, 2. ],\n",
" [ 10. , 45. , 1.41421356, 2. ],\n",
" [ 47. , 99. , 1.41421356, 2. ],\n",
" ...,\n",
" [340. , 777. , 56.40035461, 389. ],\n",
" [332. , 778. , 58.69412236, 390. ],\n",
" [349. , 779. , 75.32595834, 391. ]])"
]
},
"execution_count": 257,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"linkage_matrix"
]
},
{
"cell_type": "code",
"execution_count": 258,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABH4AAAJCCAYAAAC23QD1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAj50lEQVR4nO3de7Sld13f8c9XxoByMUHGJCtBg4ooQjnIiFhbjQYsVCusiiwYtImiY62o1C41XhdWvNBWxXZ5m8olXo6ACCtZFm1pytjaCzqBAyEEBGJYBjLJiImEiCD46x/7GTk5nJNz3bPnfOf1Wmuvs/dzmfM7efLsfeY9z6XGGAEAAACgn09a9AAAAAAAmA/hBwAAAKAp4QcAAACgKeEHAAAAoCnhBwAAAKAp4QcAAACgqQOn85s95CEPGZdccsnp/JYAAAAArV133XV/McY4uN680xp+Lrnkkhw/fvx0fksAAACA1qrqPRvNc6oXAAAAQFPCDwAAAEBTwg8AAABAU8IPAAAAQFPCDwAAAEBTwg8AAABAU8IPAAAAQFPCDwAAAEBTwg8AAABAU8IPAAAAQFPCDwAAAEBTwg8AAABAU8IPAAAAQFPCDwAAAEBTwg8AAABAU8IPAAAAQFPCDwAAAEBTwg8AAABAU8IPAAAAQFPCDwAAAEBTwg8AAABAU8IPAAAAQFPCDwAAAEBTBxY9gP3o6NFkeXnRo4Czw+HDyZEjix4FAADA/uSInx1YXk5WVhY9CuhvZUVkBQAA2A1H/OzQ0lJy7NiiRwG9XXrpokcAAACwvzniBwAAAKAp4QcAAACgKeEHAAAAoCnhBwAAAKAp4QcAAACgKeEHAAAAoCnhBwAAAKAp4QcAAACgKeEHAAAAoCnhBwAAAKAp4QcAAACgKeEHAAAAoCnhBwAAAKAp4QcAAACgKeEHAAAAoCnhBwAAAKAp4QcAAACgKeEHAAAAoCnhBwAAAKAp4QcAAACgKeEHAAAAoCnhBwAAAKAp4QcAAACgKeEHAAAAoCnhBwAAAKAp4QcAAACgKeEHAAAAoCnhBwAAAKAp4QcAAACgKeEHAAAAoCnhBwAAAKCpTcNPVT2iqlZWPT5QVc+rqgdX1euq6p3T1/NOx4ABAAAA2JpNw88Y4x1jjKUxxlKSxyX56ySvSXJlkmvHGA9Pcu30GgAAAIAzxHZP9bosybvHGO9J8tQkV03Tr0rytD0cFwAAAAC7tN3w88wkvz09P3+Mcev0/ESS8/dsVAAAAADs2pbDT1Wdk+TrkvzO2nljjJFkbLDekao6XlXHT548ueOBAgAAALA92zni5ylJ3jjGuG16fVtVXZgk09fb11tpjHF0jHFojHHo4MGDuxstAAAAAFu2nfDzrHz8NK8kuSbJ5dPzy5NcvVeDAgAAAGD3thR+qur+SZ6U5NWrJv9MkidV1TuTPHF6DQAAAMAZ4sBWFhpj3J3k09dMe39md/kCAAAA4Ay03bt6AQAAALBPCD8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATQk/AAAAAE0JPwAAAABNCT8AAAAATW0p/FTVuVX1qqp6e1XdWFVfWlUPrqrXVdU7p6/nzXuwAAAAAGzdVo/4+YUkfzDG+Pwkj0lyY5Irk1w7xnh4kmun1wAAAACcITYNP1X1aUm+PMmLk2SM8ZExxp1Jnprkqmmxq5I8bT5DBAAAAGAntnLEz8OSnEzy0qp6U1X9WlXdP8n5Y4xbp2VOJDl/XoMEAAAAYPu2En4OJPmiJL88xnhskruz5rSuMcZIMtZbuaqOVNXxqjp+8uTJ3Y4XAAAAgC3aSvi5JcktY4w3TK9flVkIuq2qLkyS6evt6608xjg6xjg0xjh08ODBvRgzAAAAAFuwafgZY5xI8udV9Yhp0mVJ3pbkmiSXT9MuT3L1XEYIAAAAwI4c2OJy35Xkt6rqnCQ3JfnmzKLRK6vqOUnek+QZ8xkiAAAAADuxpfAzxlhJcmidWZft6WgAAAAA2DNbucYPAAAAAPuQ8AMAAADQlPADAAAA0NRWL+4MZ7SjR5Pl5UWPgr22sjL7eumlixwF83D4cHLkyKJHAQAA/TnihxaWlz8eCehjaWn2oJeVFaEWAABOF0f80MbSUnLs2KJHAWzGEVwAAHD6OOIHAAAAoCnhBwAAAKAp4QcAAACgKeEHAAAAoCnhBwAAAKAp4QcAAACgKeEHAAAAoCnhBwAAAKAp4QcAAACgKeEHAAAAoCnhBwAAAKAp4QcAAACgKeEHAAAAoKkDix4A0NPRo8ny8qJHwZloZWX29dJLFzkKzlSHDydHjix6FAAAfTjiB5iL5eWP/wUfVltamj1grZUVwRgAYK854geYm6Wl5NixRY8C2C8cBQYAsPcc8QMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQlPADAAAA0NSBrSxUVTcnuSvJx5J8dIxxqKoenOQVSS5JcnOSZ4wx7pjPMAEAAADYru0c8fOVY4ylMcah6fWVSa4dYzw8ybXTawAAAADOELs51eupSa6anl+V5Gm7Hg0AAAAAe2ar4Wck+W9VdV1VHZmmnT/GuHV6fiLJ+Xs+OgAAAAB2bEvX+Enyj8YY762qz0jyuqp6++qZY4xRVWO9FadQdCRJPvMzP3NXgwUAAABg67Z0xM8Y473T19uTvCbJ45PcVlUXJsn09fYN1j06xjg0xjh08ODBvRk1AAAAAJvaNPxU1f2r6oGnnif56iRvTXJNksunxS5PcvW8BgkAAADA9m3lVK/zk7ymqk4tvzzG+IOq+pMkr6yq5yR5T5JnzG+YAAAAAGzXpuFnjHFTksesM/39SS6bx6AAAAAA2L3d3M4dAAAAgDOY8AMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQ1FZu5w5AE0evO5rl65cXPQxY18qJFyVJLn3Z8xY6DtjI4UcfzpHHHVn0MABgW4QfgLPI8vXLWTmxkqULlhY9FPgES1c+b9FDgA2tnFhJEuEHgH1H+AE4yyxdsJRjVxxb9DAA9pVLX3bpoocAADsi/AAA+4bTFVmUU0f8CEAsgtMMgd1wcWcAYN84dboinG5LFyw5TZaFWDmxIngDu+KIHwBgX3G6InA2cZQZsFuO+AEAAABoyhE/AADAjrju1vy5vtT8uYYS3TniBwAA2BHX3Zo/15eaL9dQ4mzgiB8AAGDHXHeL/cyRVJwNHPEDAAAA0JTwAwAAANCU8AMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQlPADAAAA0JTwAwAAANCU8AMAAADQlPA
"text/plain": [
"<Figure size 1440x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(20,10))\n",
"dn = hierarchy.dendrogram(linkage_matrix,truncate_mode='lastp',p=11)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 1
}