diff --git a/DBSCAN_1-zmiany_df.ipynb b/DBSCAN_1-zmiany_df.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..e73fa9a39778f2f7174671da8c26cde128aeb617
--- /dev/null
+++ b/DBSCAN_1-zmiany_df.ipynb
@@ -0,0 +1,4517 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pyodbc\n",
+    "import pandas as pd\n",
+    "from sqlalchemy import create_engine\n",
+    "import urllib\n",
+    "import seaborn as sns\n",
+    "from matplotlib import pyplot as plt\n",
+    "import numpy as np\n",
+    "import random"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "params = urllib.parse.quote_plus(\"DRIVER={ODBC Driver 17 for SQL Server};\"\n",
+    "                                 #\"SERVER=dbserver.mif.pg.gda.pl,1433;\"\n",
+    "                                 \"SERVER=127.0.0.1,1433;\"\n",
+    "                                 \"DATABASE=silkycoders;\"\n",
+    "                                 \"UID=;\"\n",
+    "                                 \"PWD=\")\n",
+    "\n",
+    "engine = create_engine(\"mssql+pyodbc:///?odbc_connect={}\".format(params))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query = \"\"\"SELECT rfid.*, it.*, sub.*, cl.*, dep.*, br.*\n",
+    "    FROM rfid.Logs rfid \n",
+    "    JOIN rfid.EanEpc ean \n",
+    "    ON rfid.EPC = ean.EPC \n",
+    "    JOIN dw.Item it  \n",
+    "    ON ean.EAN = it.EAN \n",
+    "    JOIN dw.Subclass sub \n",
+    "    ON sub.SubclassID = it.SubclassID\n",
+    "    JOIN dw.Class cl\n",
+    "    ON sub.ClassID = cl.ClassID\n",
+    "    JOIN dw.Department dep\n",
+    "    ON dep.DepartmentID = cl.DepartmentID\n",
+    "    JOIN dw.Brand br\n",
+    "    ON dep.BrandID = br.BrandID\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_sql_query(query, engine)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def prepare_timestamp(df : pd.DataFrame):\n",
+    "    \n",
+    "    dt = df.sort_values(by=\"TIMESTAMP\").reset_index(drop=True)\n",
+    "    dt[\"HOUR\"] = dt.TIMESTAMP.astype('datetime64[ns]').dt.hour.astype(int)\n",
+    "    dt[\"MIN\"] = dt.TIMESTAMP.astype('datetime64[ns]').dt.minute.astype(int)\n",
+    "    dt[\"SEC\"] = dt.TIMESTAMP.astype('datetime64[ns]').dt.second.astype(int)\n",
+    "    dt[\"MICROSEC\"] = dt.TIMESTAMP.astype('datetime64[ns]').dt.microsecond.astype(int)\n",
+    "\n",
+    "    dt[\"MILISEC\"] = dt.MICROSEC/1000 + dt.SEC*1000 + dt.MIN*60000 + dt.HOUR*3600000\n",
+    "    dt[\"TIME_MS\"] = dt.MILISEC - dt.MILISEC[0]\n",
+    "    \n",
+    "    dt['TIME_PER_MEASUREMENT_MS'] = 0\n",
+    "    dt['NUMBER_OF_SIGNALS'] = 0\n",
+    "    dt['LENGTH_OF_MEASUREMENT'] = 0\n",
+    "    \n",
+    "    for m in dt.MEASUREMENT.unique():\n",
+    "        filtr = (dt.MEASUREMENT == m)\n",
+    "        dt.loc[filtr,'TIME_PER_MEASUREMENT_MS'] = dt[filtr].MILISEC - dt[filtr].MILISEC.iloc[0]\n",
+    "        dt.loc[filtr, \"NUMBER_OF_SIGNALS\"] = len(dt[filtr])\n",
+    "        dt.loc[filtr, 'LENGTH_OF_MEASUREMENT'] = dt[filtr].TIME_PER_MEASUREMENT_MS.max()\n",
+    "        \n",
+    "    dt[\"TIME_KMS\"] = np.floor(dt.TIME_MS/1000)    \n",
+    "    dt = dt.merge(dt.groupby(['EPC','TIME_KMS'])[\"PROXIMITY\"].max().reset_index(name=\"MAX_PROXIMITY_KMS\"), how=\"left\",\n",
+    "                                 on = ['EPC','TIME_KMS'])\n",
+    "    dt = dt.merge(dt.groupby(['EPC','TIME_KMS'])[\"PROXIMITY\"].sum().reset_index(name=\"SUM_PROXIMITY_KMS\"), how=\"left\",\n",
+    "                                 on = ['EPC','TIME_KMS'])\n",
+    "    return dt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = prepare_timestamp(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('df.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 502689 entries, 0 to 502688\n",
+      "Data columns (total 36 columns):\n",
+      " #   Column                   Non-Null Count   Dtype  \n",
+      "---  ------                   --------------   -----  \n",
+      " 0   EPC                      502689 non-null  object \n",
+      " 1   PROXIMITY                502689 non-null  float64\n",
+      " 2   TIMESTAMP                502689 non-null  object \n",
+      " 3   MEASUREMENT              502689 non-null  int64  \n",
+      " 4   ITEMID                   502689 non-null  int64  \n",
+      " 5   EAN                      502689 non-null  int64  \n",
+      " 6   StyleColorSize           502689 non-null  object \n",
+      " 7   StyleColor               502689 non-null  object \n",
+      " 8   Size                     502689 non-null  object \n",
+      " 9   SubclassID               502689 non-null  int64  \n",
+      " 10  ItemSeason               502689 non-null  object \n",
+      " 11  FashionLevel             369997 non-null  object \n",
+      " 12  SubclassID.1             502689 non-null  int64  \n",
+      " 13  SubclassName             502689 non-null  object \n",
+      " 14  ClassID                  502689 non-null  int64  \n",
+      " 15  ClassID.1                502689 non-null  int64  \n",
+      " 16  ClassName                502689 non-null  object \n",
+      " 17  DepartmentID             502689 non-null  int64  \n",
+      " 18  DepartmentID.1           502689 non-null  int64  \n",
+      " 19  DepartmentName           502689 non-null  object \n",
+      " 20  BrandID                  502689 non-null  int64  \n",
+      " 21  BrandID.1                502689 non-null  int64  \n",
+      " 22  BrandName                502689 non-null  object \n",
+      " 23  Active                   502689 non-null  bool   \n",
+      " 24  HOUR                     502689 non-null  int64  \n",
+      " 25  MIN                      502689 non-null  int64  \n",
+      " 26  SEC                      502689 non-null  int64  \n",
+      " 27  MICROSEC                 502689 non-null  int64  \n",
+      " 28  MILISEC                  502689 non-null  float64\n",
+      " 29  TIME_MS                  502689 non-null  float64\n",
+      " 30  TIME_PER_MEASUREMENT_MS  502689 non-null  float64\n",
+      " 31  NUMBER_OF_SIGNALS        502689 non-null  int64  \n",
+      " 32  LENGTH_OF_MEASUREMENT    502689 non-null  int64  \n",
+      " 33  TIME_KMS                 502689 non-null  float64\n",
+      " 34  MAX_PROXIMITY_KMS        502689 non-null  float64\n",
+      " 35  SUM_PROXIMITY_KMS        502689 non-null  float64\n",
+      "dtypes: bool(1), float64(7), int64(17), object(11)\n",
+      "memory usage: 134.7+ MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>PROXIMITY</th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>ITEMID</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>SubclassID</th>\n",
+       "      <th>SubclassID.1</th>\n",
+       "      <th>ClassID</th>\n",
+       "      <th>ClassID.1</th>\n",
+       "      <th>DepartmentID</th>\n",
+       "      <th>DepartmentID.1</th>\n",
+       "      <th>...</th>\n",
+       "      <th>SEC</th>\n",
+       "      <th>MICROSEC</th>\n",
+       "      <th>MILISEC</th>\n",
+       "      <th>TIME_MS</th>\n",
+       "      <th>TIME_PER_MEASUREMENT_MS</th>\n",
+       "      <th>NUMBER_OF_SIGNALS</th>\n",
+       "      <th>LENGTH_OF_MEASUREMENT</th>\n",
+       "      <th>TIME_KMS</th>\n",
+       "      <th>MAX_PROXIMITY_KMS</th>\n",
+       "      <th>SUM_PROXIMITY_KMS</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>502689.000000</td>\n",
+       "      <td>502689.000000</td>\n",
+       "      <td>5.026890e+05</td>\n",
+       "      <td>5.026890e+05</td>\n",
+       "      <td>502689.000000</td>\n",
+       "      <td>502689.000000</td>\n",
+       "      <td>502689.000000</td>\n",
+       "      <td>502689.000000</td>\n",
+       "      <td>502689.000000</td>\n",
+       "      <td>502689.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>502689.000000</td>\n",
+       "      <td>502689.000000</td>\n",
+       "      <td>5.026890e+05</td>\n",
+       "      <td>5.026890e+05</td>\n",
+       "      <td>502689.000000</td>\n",
+       "      <td>502689.000000</td>\n",
+       "      <td>502689.000000</td>\n",
+       "      <td>502689.00000</td>\n",
+       "      <td>502689.000000</td>\n",
+       "      <td>502689.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>-75.406017</td>\n",
+       "      <td>23.412382</td>\n",
+       "      <td>2.169862e+06</td>\n",
+       "      <td>5.902835e+12</td>\n",
+       "      <td>83.920704</td>\n",
+       "      <td>83.920704</td>\n",
+       "      <td>18.231477</td>\n",
+       "      <td>18.231477</td>\n",
+       "      <td>2.609574</td>\n",
+       "      <td>2.609574</td>\n",
+       "      <td>...</td>\n",
+       "      <td>29.193547</td>\n",
+       "      <td>499773.110213</td>\n",
+       "      <td>3.721192e+07</td>\n",
+       "      <td>2.018186e+06</td>\n",
+       "      <td>100132.210719</td>\n",
+       "      <td>15383.906986</td>\n",
+       "      <td>199835.398777</td>\n",
+       "      <td>2017.68607</td>\n",
+       "      <td>-72.497318</td>\n",
+       "      <td>-398.108291</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>5.698062</td>\n",
+       "      <td>12.175284</td>\n",
+       "      <td>7.798483e+04</td>\n",
+       "      <td>7.380986e+07</td>\n",
+       "      <td>142.489244</td>\n",
+       "      <td>142.489244</td>\n",
+       "      <td>8.844056</td>\n",
+       "      <td>8.844056</td>\n",
+       "      <td>0.937828</td>\n",
+       "      <td>0.937828</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.223297</td>\n",
+       "      <td>288469.414710</td>\n",
+       "      <td>1.121487e+06</td>\n",
+       "      <td>1.121487e+06</td>\n",
+       "      <td>81859.831696</td>\n",
+       "      <td>8217.121271</td>\n",
+       "      <td>101049.072703</td>\n",
+       "      <td>1121.48684</td>\n",
+       "      <td>5.893956</td>\n",
+       "      <td>262.167663</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>-110.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>2.028742e+06</td>\n",
+       "      <td>5.902691e+12</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>3.519374e+07</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>4597.000000</td>\n",
+       "      <td>53538.000000</td>\n",
+       "      <td>0.00000</td>\n",
+       "      <td>-100.500000</td>\n",
+       "      <td>-2629.400000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>-79.900000</td>\n",
+       "      <td>13.000000</td>\n",
+       "      <td>2.113407e+06</td>\n",
+       "      <td>5.902805e+12</td>\n",
+       "      <td>11.000000</td>\n",
+       "      <td>11.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.000000</td>\n",
+       "      <td>250000.000000</td>\n",
+       "      <td>3.624898e+07</td>\n",
+       "      <td>1.055248e+06</td>\n",
+       "      <td>38108.000000</td>\n",
+       "      <td>8533.000000</td>\n",
+       "      <td>127122.000000</td>\n",
+       "      <td>1055.00000</td>\n",
+       "      <td>-76.400000</td>\n",
+       "      <td>-515.900000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>-75.700000</td>\n",
+       "      <td>24.000000</td>\n",
+       "      <td>2.155604e+06</td>\n",
+       "      <td>5.902806e+12</td>\n",
+       "      <td>82.000000</td>\n",
+       "      <td>82.000000</td>\n",
+       "      <td>16.000000</td>\n",
+       "      <td>16.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>29.000000</td>\n",
+       "      <td>500000.000000</td>\n",
+       "      <td>3.719645e+07</td>\n",
+       "      <td>2.002711e+06</td>\n",
+       "      <td>78477.000000</td>\n",
+       "      <td>13321.000000</td>\n",
+       "      <td>176026.000000</td>\n",
+       "      <td>2002.00000</td>\n",
+       "      <td>-72.900000</td>\n",
+       "      <td>-342.900000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>-71.900000</td>\n",
+       "      <td>33.000000</td>\n",
+       "      <td>2.226340e+06</td>\n",
+       "      <td>5.902852e+12</td>\n",
+       "      <td>82.000000</td>\n",
+       "      <td>82.000000</td>\n",
+       "      <td>25.000000</td>\n",
+       "      <td>25.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>44.000000</td>\n",
+       "      <td>749000.000000</td>\n",
+       "      <td>3.815973e+07</td>\n",
+       "      <td>2.965991e+06</td>\n",
+       "      <td>139431.000000</td>\n",
+       "      <td>22217.000000</td>\n",
+       "      <td>265127.000000</td>\n",
+       "      <td>2965.00000</td>\n",
+       "      <td>-68.400000</td>\n",
+       "      <td>-225.700000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>-38.900000</td>\n",
+       "      <td>43.000000</td>\n",
+       "      <td>2.304122e+06</td>\n",
+       "      <td>5.902975e+12</td>\n",
+       "      <td>630.000000</td>\n",
+       "      <td>630.000000</td>\n",
+       "      <td>41.000000</td>\n",
+       "      <td>41.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>59.000000</td>\n",
+       "      <td>999000.000000</td>\n",
+       "      <td>3.912875e+07</td>\n",
+       "      <td>3.935013e+06</td>\n",
+       "      <td>435771.000000</td>\n",
+       "      <td>35350.000000</td>\n",
+       "      <td>435771.000000</td>\n",
+       "      <td>3935.00000</td>\n",
+       "      <td>-38.900000</td>\n",
+       "      <td>-52.300000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>8 rows × 24 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           PROXIMITY    MEASUREMENT        ITEMID           EAN  \\\n",
+       "count  502689.000000  502689.000000  5.026890e+05  5.026890e+05   \n",
+       "mean      -75.406017      23.412382  2.169862e+06  5.902835e+12   \n",
+       "std         5.698062      12.175284  7.798483e+04  7.380986e+07   \n",
+       "min      -110.000000       1.000000  2.028742e+06  5.902691e+12   \n",
+       "25%       -79.900000      13.000000  2.113407e+06  5.902805e+12   \n",
+       "50%       -75.700000      24.000000  2.155604e+06  5.902806e+12   \n",
+       "75%       -71.900000      33.000000  2.226340e+06  5.902852e+12   \n",
+       "max       -38.900000      43.000000  2.304122e+06  5.902975e+12   \n",
+       "\n",
+       "          SubclassID   SubclassID.1        ClassID      ClassID.1  \\\n",
+       "count  502689.000000  502689.000000  502689.000000  502689.000000   \n",
+       "mean       83.920704      83.920704      18.231477      18.231477   \n",
+       "std       142.489244     142.489244       8.844056       8.844056   \n",
+       "min        10.000000      10.000000      10.000000      10.000000   \n",
+       "25%        11.000000      11.000000      10.000000      10.000000   \n",
+       "50%        82.000000      82.000000      16.000000      16.000000   \n",
+       "75%        82.000000      82.000000      25.000000      25.000000   \n",
+       "max       630.000000     630.000000      41.000000      41.000000   \n",
+       "\n",
+       "        DepartmentID  DepartmentID.1  ...            SEC       MICROSEC  \\\n",
+       "count  502689.000000   502689.000000  ...  502689.000000  502689.000000   \n",
+       "mean        2.609574        2.609574  ...      29.193547  499773.110213   \n",
+       "std         0.937828        0.937828  ...      17.223297  288469.414710   \n",
+       "min         2.000000        2.000000  ...       0.000000       0.000000   \n",
+       "25%         2.000000        2.000000  ...      14.000000  250000.000000   \n",
+       "50%         2.000000        2.000000  ...      29.000000  500000.000000   \n",
+       "75%         3.000000        3.000000  ...      44.000000  749000.000000   \n",
+       "max         6.000000        6.000000  ...      59.000000  999000.000000   \n",
+       "\n",
+       "            MILISEC       TIME_MS  TIME_PER_MEASUREMENT_MS  NUMBER_OF_SIGNALS  \\\n",
+       "count  5.026890e+05  5.026890e+05            502689.000000      502689.000000   \n",
+       "mean   3.721192e+07  2.018186e+06            100132.210719       15383.906986   \n",
+       "std    1.121487e+06  1.121487e+06             81859.831696        8217.121271   \n",
+       "min    3.519374e+07  0.000000e+00                 0.000000        4597.000000   \n",
+       "25%    3.624898e+07  1.055248e+06             38108.000000        8533.000000   \n",
+       "50%    3.719645e+07  2.002711e+06             78477.000000       13321.000000   \n",
+       "75%    3.815973e+07  2.965991e+06            139431.000000       22217.000000   \n",
+       "max    3.912875e+07  3.935013e+06            435771.000000       35350.000000   \n",
+       "\n",
+       "       LENGTH_OF_MEASUREMENT      TIME_KMS  MAX_PROXIMITY_KMS  \\\n",
+       "count          502689.000000  502689.00000      502689.000000   \n",
+       "mean           199835.398777    2017.68607         -72.497318   \n",
+       "std            101049.072703    1121.48684           5.893956   \n",
+       "min             53538.000000       0.00000        -100.500000   \n",
+       "25%            127122.000000    1055.00000         -76.400000   \n",
+       "50%            176026.000000    2002.00000         -72.900000   \n",
+       "75%            265127.000000    2965.00000         -68.400000   \n",
+       "max            435771.000000    3935.00000         -38.900000   \n",
+       "\n",
+       "       SUM_PROXIMITY_KMS  \n",
+       "count      502689.000000  \n",
+       "mean         -398.108291  \n",
+       "std           262.167663  \n",
+       "min         -2629.400000  \n",
+       "25%          -515.900000  \n",
+       "50%          -342.900000  \n",
+       "75%          -225.700000  \n",
+       "max           -52.300000  \n",
+       "\n",
+       "[8 rows x 24 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'whiskers': [<matplotlib.lines.Line2D at 0x215c384d970>,\n",
+       "  <matplotlib.lines.Line2D at 0x215c384dcd0>],\n",
+       " 'caps': [<matplotlib.lines.Line2D at 0x215c4d13070>,\n",
+       "  <matplotlib.lines.Line2D at 0x215c4d133d0>],\n",
+       " 'boxes': [<matplotlib.lines.Line2D at 0x215c384d610>],\n",
+       " 'medians': [<matplotlib.lines.Line2D at 0x215c4d13730>],\n",
+       " 'fliers': [<matplotlib.lines.Line2D at 0x215c4d13a90>],\n",
+       " 'means': []}"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAD6CAYAAAC/KwBlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAXaklEQVR4nO3dYYyd1X3n8e+vONqibU1tGJDjIWsUnNVCpXXFlYOUN9lQ2VZa1UQCxSu1+IUlR4hIqbZSFfrGBLRSWCWlQiuQyIIwtBuwaCusKJR1oFW0EjGMUxowBHkkaHCw8FTjEvIGyea/L+6ZzZ3pcHxnjMdh8v1Ij+4z/+ecM899Yf/mec5z70lVIUnSB/m1C30CkqRfbgaFJKnLoJAkdRkUkqQug0KS1GVQSJK6xg6KJBcl+cck32k/35Hkp0lebNvnR9renmQ6yWtJto/Ur0vyUjt2b5K0+r9L8nirH06yaaTP7iTH2rb7Q3nXkqSxrVlC268ArwJrR2r3VNU3RhsluQbYBVwLfBz4XpJPVdUZ4H5gL/AD4LvADuApYA9wqqquTrILuBv4YpL1wD5gABRwJMnBqjr1QSd52WWX1aZNm5bwtiRJR44c+Zeqmljs2FhBkWQS+D3gvwP/7SzNdwKPVdV7wOtJpoGtSd4A1lbVc23MR4AbGQbFTuCO1v8J4H+2q43twKGqmm19DjEMl29/0C/ftGkTU1NT47wtSVKT5J8/6Ni4t57+AvhT4P0F9S8n+VGSh5Ksa7WNwJsjbY632sa2v7A+r09VnQbeAS7tjCVJWiFnDYokvw+crKojCw7dD3wS2AKcAL4512WRYapTX26f0XPcm2QqydTMzMwiXSRJyzXOFcVngD9ot44eAz6X5C+r6u2qOlNV7wPfAra29seBK0f6TwJvtfrkIvV5fZKsAS4BZjtjzVNVD1TVoKoGExOL3mKTJC3TWYOiqm6vqsmq2sRwkvrZqvrDJBtGmn0BeLntHwR2tSeZrgI2A89X1Qng3STXt/mHW4AnR/rMPdF0U/sdBTwNbEuyrt3a2tZqkqQVspSnnhb6H0m2MLwV9AbwJYCqOprkAPAKcBq4rT3xBHAr8DBwMcNJ7Kda/UHg0TbxPcswkKiq2SR3AS+0dnfOTWxLklZGVtvXjA8Gg/KpJ0lamiRHqmqw2DE/mS1J6jIoJEld5zJHIf1Ka99Ac96tttvD+ugxKKRlWs5/4En8j18fOd56kiR1GRSSpC6DQpLUZVBIkroMCklSl0EhSeoyKCRJXQaFJKnLoJAkdRkUkqQug0KS1GVQSJK6DApJUtfYQZHkoiT/mOQ77ef1SQ4lOdZe1420vT3JdJLXkmwfqV+X5KV27N62djZtfe3HW/1wkk0jfXa333EsyW4kSStqKVcUXwFeHfn5q8AzVbUZeKb9TJJrGK55fS2wA7gvyUWtz/3AXmBz23a0+h7gVFVdDdwD3N3GWg/sAz4NbAX2jQaSJOn8GysokkwCvwf8r5HyTmB/298P3DhSf6yq3quq14FpYGuSDcDaqnquhl/I/8iCPnNjPQHc0K42tgOHqmq2qk4Bh/hFuEiSVsC4VxR/Afwp8P5I7YqqOgHQXi9v9Y3AmyPtjrfaxra/sD6vT1WdBt4BLu2MJUlaIWcNiiS/D5ysqiNjjrnY+pDVqS+3z+g57k0ylWRqZmZmzNOUJI1jnCuKzwB/kOQN4DHgc0n+Eni73U6ivZ5s7Y8DV470nwTeavXJRerz+iRZA1wCzHbGmqeqHqiqQVUNJiYmxnhLkqRxnTUoqur2qpqsqk0MJ6mfrao/BA4Cc08h7QaebPsHgV3tSaarGE5aP99uT72b5Po2/3DLgj5zY93UfkcBTwPbkqxrk9jbWk2StELWnEPfrwMHkuwBfgLcDFBVR5McAF4BTgO3VdWZ1udW4GHgYuCptgE8CDyaZJrhlcSuNtZskruAF1q7O6tq9hzOWZK0RBn+4b56DAaDmpqautCnIS0qCavt35xWhyRHqmqw2DE/mS1J6jIoJEldBoUkqcugkCR1GRSSpC6DQpLUZVBIkroMCklSl0EhSeoyKCRJXQaFJKnLoJAkdRkUkqQug0KS1GVQSJK6DApJUpdBIUnqOmtQJPn1JM8n+ackR5N8rdXvSPLTJC+27fMjfW5PMp3ktSTbR+rXJXmpHbu3rZ1NW1/78VY/nGTTSJ/dSY61bTeSpBU1zprZ7wGfq6qfJ/kY8H+TzK11fU9VfWO0cZJrGK55fS3wceB7ST7V1s2+H9gL/AD4LrCD4brZe4BTVXV1kl3A3cAXk6wH9gEDoIAjSQ5W1alze9uSpHGd9Yqihn7efvxY23qL/u4EHquq96rqdWAa2JpkA7C2qp6r4aLBjwA3jvTZ3/afAG5oVxvbgUNVNdvC4RDDcJEkrZCx5iiSXJTkReAkw/+4D7dDX07yoyQPJVnXahuBN0e6H2+1jW1/YX1en6o6DbwDXNoZS5K0QsYKiqo6U1VbgEmGVwe/zfA20ieBLcAJ4JuteRYbolNfbp//L8neJFNJpmZmZjrvRJK0VEt66qmq/hX4B2BHVb3dAuR94FvA1tbsOHDlSLdJ4K1Wn1ykPq9PkjXAJcBsZ6yF5/VAVQ2qajAxMbGUtyRJOotxnnqaSPJbbf9i4HeBH7c5hzlfAF5u+weBXe1JpquAzcDzVXUCeDfJ9W3+4RbgyZE+c0803QQ82+Yxnga2JVnXbm1tazVJ0goZ56mnDcD+JBcxDJYDVfWdJI8m2cLwVtAbwJcAqupokgPAK8Bp4Lb2xBPArcDDwMUMn3aae3rqQeDRJNMMryR2tbFmk9wFvNDa3VlVs8t/u5KkpcrwD/fVYzAY1NTU1IU+DWlRSVht/+a0OiQ5UlWDxY75yWxJUpdBIUnqMigkSV0GhSSpy6CQJHUZFJKkLoNCktRlUEiSugwKSVKXQSFJ6jIoJEldBoUkqcugkCR1GRSSpC6DQpLUZVBIkroMCklS1zhrZv96kueT/FOSo0m+1urrkxxKcqy9rhvpc3uS6SSvJdk+Ur8uyUvt2L1t7Wza+tqPt/rhJJtG+uxuv+NYkt1IklbUOFcU7wGfq6r/DGwBdiS5Hvgq8ExVbQaeaT+T5BqGa15fC+wA7mvrbQPcD+wFNrdtR6vvAU5V1dXAPcDdbaz1wD7g08BWYN9oIEmSzr+zBkUN/bz9+LG2FbAT2N/q+4Eb2/5O4LGqeq+qXgemga1JNgBrq+q5Gi4a/MiCPnNjPQHc0K42tgOHqmq2qk4Bh/hFuEiSVsBYcxRJLkryInCS4X/ch4ErquoEQHu9vDXfCLw50v14q21s+wvr8/pU1WngHeDSzlgLz29vkqkkUzMzM+O8JUnSmMYKiqo6U1VbgEmGVwe/3WmexYbo1JfbZ/T8HqiqQVUNJiYmOqcmSVqqJT31VFX/CvwDw9s/b7fbSbTXk63ZceDKkW6TwFutPrlIfV6fJGuAS4DZzliSpBUyzlNPE0l+q+1fDPwu8GPgIDD3FNJu4Mm2fxDY1Z5kuorhpPXz7fbUu0mub/MPtyzoMzfWTcCzbR7jaWBbknVtEntbq0mSVsiaMdpsAPa3J5d+DThQVd9J8hxwIMke4CfAzQBVdTTJAeAV4DRwW1WdaWPdCjwMXAw81TaAB4FHk0wzvJLY1caaTXIX8EJrd2dVzZ7LG5YkLU2Gf7ivHoPBoKampi70aUiLSsJq+zen1SHJkaoaLHbMT2ZLkroMCklSl0EhSeoyKCRJXQaFJKnLoJAkdRkUkqQug0KS1GVQSJK6DApJUpdBIUnqMigkSV3jfHus9Cth/fr1nDp16rz/nuG37J8/69atY3bWL1nWh8egkJpTp06tim92Pd9BpF893nqSJHUZFJKkLoNCktQ1zprZVyb5+ySvJjma5CutfkeSnyZ5sW2fH+lze5LpJK8l2T5Svy7JS+3YvW3tbNr62o+3+uEkm0b67E5yrG27kSStqHEms08Df1JVP0zym8CRJIfasXuq6hujjZNcw3DN62uBjwPfS/Kptm72/cBe4AfAd4EdDNfN3gOcqqqrk+wC7ga+mGQ9sA8YANV+98GqOv+PpkiSgDGuKKrqRFX9sO2/C7wKbOx02Qk8VlXvVdXrwDSwNckGYG1VPVfDR0seAW4c6bO/7T8B3NCuNrYDh6pqtoXDIYbhIklaIUuao2i3hH4HONxKX07yoyQPJVnXahuBN0e6HW+1jW1/YX1en6o6DbwDXNoZa+F57U0ylWRqZmZmKW9JknQWYwdFkt8A/hr446r6GcPbSJ8EtgAngG/ONV2ke3Xqy+3zi0LVA1U1qKrBxMRE721IkpZorKBI8jGGIfFXVfU3AFX1dlWdqar3gW8BW1vz48CVI90ngbdafXKR+rw+SdYAlwCznbEkSStknKeeAjwIvFpVfz5S3zDS7AvAy23/ILCrPcl0FbAZeL6qTgDvJrm+jXkL8ORIn7knmm4Cnm3zGE8D25Ksa7e2trWaJGmFjPPU02eAPwJeSvJiq/0Z8F+TbGF4K+gN4EsAVXU0yQHgFYZPTN3WnngCuBV4GLiY4dNOT7X6g8CjSaYZXknsamPNJrkLeKG1u7Oq/BIbSVpBWQ3fbTNqMBjU1NTUhT4NfQQlWTXf9bQa3odWVpIjVTVY7JifzJYkdRkUkqQug0KS1GVQSJK6DApJUpdBIUnqMigkSV0GhSSpy6CQJHUZFJKkLoNCktRlUEiSugwKSVKXQSFJ6jIoJEldBoUkqWucpVCvTPL3SV5NcjTJV1p9fZJDSY6113UjfW5PMp3ktSTbR+rXJXmpHbu3LYlKWzb18VY/nGTTSJ/d7XccS7IbSdKKGueK4jTwJ1X1n4DrgduSXAN8FXimqjYDz7Sfacd2AdcCO4D7klzUxrof2MtwHe3N7TjAHuBUVV0N3APc3cZaD+wDPg1sBfaNBpIk6fw7a1BU1Ymq+mHbfxd4FdgI7AT2t2b7gRvb/k7gsap6r6peB6aBrUk2AGur6rkartP4yII+c2M9AdzQrja2A4eqaraqTgGH+EW4SJJWwJLmKNotod8BDgNXVNUJGIYJcHlrthF4c6Tb8Vbb2PYX1uf1qarTwDvApZ2xJEkrZOygSPIbwF8Df1xVP+s1XaRWnfpy+4ye294kU0mmZmZmOqcmSVqqsYIiyccYhsRfVdXftPLb7XYS7fVkqx8HrhzpPgm81eqTi9Tn9UmyBrgEmO2MNU9VPVBVg6oaTExMjPOWJEljGueppwAPAq9W1Z+PHDoIzD2FtBt4cqS+qz3JdBXDSevn2+2pd5Nc38a8ZUGfubFuAp5t8xhPA9uSrGuT2NtaTZK0QtaM0eYzwB8BLyV5sdX+DPg6cCDJHuAnwM0AVXU0yQHgFYZPTN1WVWdav1uBh4GLgafaBsMgejTJNMMriV1trNkkdwEvtHZ3VtXs8t6qJGk5MvzDffUYDAY1NTV1oU9DH0FJWA3/HlbL+9DKSnKkqgaLHfOT2ZKkLoNCktRlUEiSugwKSVKXQSFJ6jIoJEldBoUkqcugkCR1jfPJbOlXQu1bC3dccqFP45zVvrUX+hS0yhgUUpOv/WxVfKI5CXXHhT4LrSbeepIkdRkUkqQug0KS1GVQSJK6DApJUpdBIUnqMigkSV3jrJn9UJKTSV4eqd2R5KdJXmzb50eO3Z5kOslrSbaP1K9L8lI7dm9bN5u2tvbjrX44yaaRPruTHGvb3JrakqQVNM4VxcPAjkXq91TVlrZ9FyDJNQzXu7629bkvyUWt/f3AXmBz2+bG3AOcqqqrgXuAu9tY64F9wKeBrcC+JOuW/A4lSefkrEFRVd8HZsccbyfwWFW9V1WvA9PA1iQbgLVV9VwNP/r6CHDjSJ/9bf8J4IZ2tbEdOFRVs1V1CjjE4oElSTqPzmWO4stJftRuTc39pb8ReHOkzfFW29j2F9bn9amq08A7wKWdsSRJK2i5QXE/8ElgC3AC+GarZ5G21akvt888SfYmmUoyNTMz0zltSdJSLSsoqurtqjpTVe8D32I4hwDDv/qvHGk6CbzV6pOL1Of1SbIGuIThra4PGmux83mgqgZVNZiYmFjOW5IkfYBlBUWbc5jzBWDuiaiDwK72JNNVDCetn6+qE8C7Sa5v8w+3AE+O9Jl7oukm4Nk2j/E0sC3JunZra1urSZJW0Fm/ZjzJt4HPApclOc7wSaTPJtnC8FbQG8CXAKrqaJIDwCvAaeC2qjrThrqV4RNUFwNPtQ3gQeDRJNMMryR2tbFmk9wFvNDa3VlV406qS5I+JFkN378/ajAY1NTU1IU+DX0EJVk961GsgvehlZXkSFUNFjvmJ7MlSV0GhSSpy6CQJHUZFJKkLoNCktRlUEiSugwKSVLXWT9wJ/0qacukfKStW+e38evDZVBIzUp8SM0Pw+mjyFtPkqQug0KS1GVQSJK6DApJUpdBIUnqMigkSV0GhSSpy6CQJHWdNSiSPJTkZJKXR2rrkxxKcqy9rhs5dnuS6SSvJdk+Ur8uyUvt2L1t7Wza+tqPt/rhJJtG+uxuv+NYkrl1tSVJK2icK4qHgR0Lal8FnqmqzcAz7WeSXMNwzetrW5/7klzU+twP7AU2t21uzD3Aqaq6GrgHuLuNtZ7h+tyfBrYC+0YDSZK0Ms4aFFX1fWB2QXknsL/t7wduHKk/VlXvVdXrwDSwNckGYG1VPVfD7y94ZEGfubGeAG5oVxvbgUNVNVtVp4BD/NvAkiSdZ8udo7iiqk4AtNfLW30j8OZIu+OttrHtL6zP61NVp4F3gEs7Y0mSVtCHPZm92FdvVqe+3D7zf2myN8lUkqmZmZmxTlSSNJ7lBsXb7XYS7fVkqx8HrhxpNwm81eqTi9Tn9UmyBriE4a2uDxrr36iqB6pqUFWDiYmJZb4lSdJilhsUB4G5p5B2A0+O1He1J5muYjhp/Xy7PfVukuvb/MMtC/rMjXUT8Gybx3ga2JZkXZvE3tZqkqQVdNb1KJJ8G/gscFmS4wyfRPo6cCDJHuAnwM0AVXU0yQHgFeA0cFtVnWlD3crwCaqLgafaBvAg8GiSaYZXErvaWLNJ7gJeaO3urKqFk+qSpPMsq20RlcFgUFNTUxf6NKRFuXCRflklOVJVg8WO+clsSVKXQSFJ6jIoJEldBoUkqcugkCR1GRSSpC6DQpLUZVBIkroMCklSl0EhSeoyKCRJXQaFJKnLoJAkdRkUkqQug0KS1GVQSJK6DApJUtc5BUWSN5K8lOTFJFOttj7JoSTH2uu6kfa3J5lO8lqS7SP169o400nubetq09befrzVDyfZdC7nK0laug/jiuK/VNWWkSX0vgo8U1WbgWfazyS5huF62NcCO4D7klzU+twP7AU2t21Hq+8BTlXV1cA9wN0fwvlKkpbgfNx62gnsb/v7gRtH6o9V1XtV9TowDWxNsgFYW1XP1XAx4UcW9Jkb6wnghrmrDelCS7LkbTn9pAvtXIOigP+T5EiSva12RVWdAGivl7f6RuDNkb7HW21j219Yn9enqk4D7wCXnuM5Sx+KqlqRTbrQ1pxj/89U1VtJLgcOJflxp+1ifxpVp97rM3/gYUjtBfjEJz7RP2NJ0pKc0xVFVb3VXk8CfwtsBd5ut5Norydb8+PAlSPdJ4G3Wn1ykfq8PknWAJcAs4ucxwNVNaiqwcTExLm8JUnSAssOiiT/Pslvzu0D24CXgYPA7tZsN/Bk2z8I7GpPMl3FcNL6+XZ76t0k17f5h1sW9Jkb6ybg2fJaXJJW1LnceroC+Ns22bYG+N9V9XdJXgAOJNkD/AS4GaCqjiY5ALwCnAZuq6ozbaxbgYeBi4Gn2gbwIPBokmmGVxK7zuF8JUnLkNX2B/pgMKipqakLfRqS9JGS5MjIxxzm8ZPZkqQug0KS1GVQSJK6Vt0cRZIZ4J8v9HlIH+Ay4F8u9ElIi/gPVbXo5wtWXVBIv8ySTH3QhKH0y8pbT5KkLoNCktRlUEgr64ELfQLSUjlHIUnq8opCktRlUEgrIMlDSU4meflCn4u0VAaFtDIe5hdL/EofKQaFtAKq6vssspaK9FFgUEiSugwKSVKXQSFJ6jIoJEldBoW0ApJ8G3gO+I9JjrelgqWPBD+ZLUnq8opCktRlUEiSugwKSVKXQSFJ6jIoJEldBoUkqcugkCR1GRSSpK7/B9G71F0NbH01AAAAAElFTkSuQmCC\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plt.boxplot(df['LENGTH_OF_MEASUREMENT'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Usuwamy najdłuższe pomiary\n",
+    "df.drop(df.loc[df['LENGTH_OF_MEASUREMENT'] > 200000].index, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>PROXIMITY</th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>ITEMID</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>SubclassID</th>\n",
+       "      <th>SubclassID.1</th>\n",
+       "      <th>ClassID</th>\n",
+       "      <th>ClassID.1</th>\n",
+       "      <th>DepartmentID</th>\n",
+       "      <th>DepartmentID.1</th>\n",
+       "      <th>...</th>\n",
+       "      <th>SEC</th>\n",
+       "      <th>MICROSEC</th>\n",
+       "      <th>MILISEC</th>\n",
+       "      <th>TIME_MS</th>\n",
+       "      <th>TIME_PER_MEASUREMENT_MS</th>\n",
+       "      <th>NUMBER_OF_SIGNALS</th>\n",
+       "      <th>LENGTH_OF_MEASUREMENT</th>\n",
+       "      <th>TIME_KMS</th>\n",
+       "      <th>MAX_PROXIMITY_KMS</th>\n",
+       "      <th>SUM_PROXIMITY_KMS</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>3.177620e+05</td>\n",
+       "      <td>3.177620e+05</td>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>3.177620e+05</td>\n",
+       "      <td>3.177620e+05</td>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>317762.000000</td>\n",
+       "      <td>317762.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>-75.526452</td>\n",
+       "      <td>22.776858</td>\n",
+       "      <td>2.170711e+06</td>\n",
+       "      <td>5.902836e+12</td>\n",
+       "      <td>82.639686</td>\n",
+       "      <td>82.639686</td>\n",
+       "      <td>18.142610</td>\n",
+       "      <td>18.142610</td>\n",
+       "      <td>2.603486</td>\n",
+       "      <td>2.603486</td>\n",
+       "      <td>...</td>\n",
+       "      <td>29.378906</td>\n",
+       "      <td>499539.922961</td>\n",
+       "      <td>3.717748e+07</td>\n",
+       "      <td>1.983745e+06</td>\n",
+       "      <td>66756.283605</td>\n",
+       "      <td>10103.277673</td>\n",
+       "      <td>133222.336198</td>\n",
+       "      <td>1983.244840</td>\n",
+       "      <td>-72.536124</td>\n",
+       "      <td>-404.190290</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>5.583369</td>\n",
+       "      <td>12.801744</td>\n",
+       "      <td>7.820521e+04</td>\n",
+       "      <td>7.406653e+07</td>\n",
+       "      <td>141.056329</td>\n",
+       "      <td>141.056329</td>\n",
+       "      <td>8.830015</td>\n",
+       "      <td>8.830015</td>\n",
+       "      <td>0.932835</td>\n",
+       "      <td>0.932835</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.290876</td>\n",
+       "      <td>288325.166786</td>\n",
+       "      <td>1.183041e+06</td>\n",
+       "      <td>1.183041e+06</td>\n",
+       "      <td>44987.211264</td>\n",
+       "      <td>3113.779828</td>\n",
+       "      <td>39296.957914</td>\n",
+       "      <td>1183.041729</td>\n",
+       "      <td>5.831107</td>\n",
+       "      <td>267.914649</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>-110.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>2.028742e+06</td>\n",
+       "      <td>5.902691e+12</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>3.519374e+07</td>\n",
+       "      <td>0.000000e+00</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>4597.000000</td>\n",
+       "      <td>53538.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>-98.000000</td>\n",
+       "      <td>-2629.400000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>-79.900000</td>\n",
+       "      <td>12.000000</td>\n",
+       "      <td>2.113407e+06</td>\n",
+       "      <td>5.902805e+12</td>\n",
+       "      <td>11.000000</td>\n",
+       "      <td>11.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.000000</td>\n",
+       "      <td>249000.000000</td>\n",
+       "      <td>3.617897e+07</td>\n",
+       "      <td>9.852318e+05</td>\n",
+       "      <td>29587.000000</td>\n",
+       "      <td>8027.000000</td>\n",
+       "      <td>101041.000000</td>\n",
+       "      <td>985.000000</td>\n",
+       "      <td>-76.400000</td>\n",
+       "      <td>-527.300000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>-75.700000</td>\n",
+       "      <td>24.000000</td>\n",
+       "      <td>2.155605e+06</td>\n",
+       "      <td>5.902806e+12</td>\n",
+       "      <td>82.000000</td>\n",
+       "      <td>82.000000</td>\n",
+       "      <td>16.000000</td>\n",
+       "      <td>16.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>29.000000</td>\n",
+       "      <td>500000.000000</td>\n",
+       "      <td>3.714444e+07</td>\n",
+       "      <td>1.950704e+06</td>\n",
+       "      <td>59599.000000</td>\n",
+       "      <td>9887.000000</td>\n",
+       "      <td>138579.000000</td>\n",
+       "      <td>1950.000000</td>\n",
+       "      <td>-72.900000</td>\n",
+       "      <td>-350.700000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>-71.900000</td>\n",
+       "      <td>34.000000</td>\n",
+       "      <td>2.226340e+06</td>\n",
+       "      <td>5.902852e+12</td>\n",
+       "      <td>82.000000</td>\n",
+       "      <td>82.000000</td>\n",
+       "      <td>25.000000</td>\n",
+       "      <td>25.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>44.000000</td>\n",
+       "      <td>748000.000000</td>\n",
+       "      <td>3.834786e+07</td>\n",
+       "      <td>3.154128e+06</td>\n",
+       "      <td>97761.750000</td>\n",
+       "      <td>12660.000000</td>\n",
+       "      <td>168403.000000</td>\n",
+       "      <td>3154.000000</td>\n",
+       "      <td>-68.400000</td>\n",
+       "      <td>-226.100000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>-41.000000</td>\n",
+       "      <td>42.000000</td>\n",
+       "      <td>2.304122e+06</td>\n",
+       "      <td>5.902975e+12</td>\n",
+       "      <td>630.000000</td>\n",
+       "      <td>630.000000</td>\n",
+       "      <td>41.000000</td>\n",
+       "      <td>41.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>59.000000</td>\n",
+       "      <td>999000.000000</td>\n",
+       "      <td>3.908956e+07</td>\n",
+       "      <td>3.895821e+06</td>\n",
+       "      <td>189705.000000</td>\n",
+       "      <td>15444.000000</td>\n",
+       "      <td>189705.000000</td>\n",
+       "      <td>3895.000000</td>\n",
+       "      <td>-41.000000</td>\n",
+       "      <td>-52.300000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>8 rows × 24 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           PROXIMITY    MEASUREMENT        ITEMID           EAN  \\\n",
+       "count  317762.000000  317762.000000  3.177620e+05  3.177620e+05   \n",
+       "mean      -75.526452      22.776858  2.170711e+06  5.902836e+12   \n",
+       "std         5.583369      12.801744  7.820521e+04  7.406653e+07   \n",
+       "min      -110.000000       1.000000  2.028742e+06  5.902691e+12   \n",
+       "25%       -79.900000      12.000000  2.113407e+06  5.902805e+12   \n",
+       "50%       -75.700000      24.000000  2.155605e+06  5.902806e+12   \n",
+       "75%       -71.900000      34.000000  2.226340e+06  5.902852e+12   \n",
+       "max       -41.000000      42.000000  2.304122e+06  5.902975e+12   \n",
+       "\n",
+       "          SubclassID   SubclassID.1        ClassID      ClassID.1  \\\n",
+       "count  317762.000000  317762.000000  317762.000000  317762.000000   \n",
+       "mean       82.639686      82.639686      18.142610      18.142610   \n",
+       "std       141.056329     141.056329       8.830015       8.830015   \n",
+       "min        10.000000      10.000000      10.000000      10.000000   \n",
+       "25%        11.000000      11.000000      10.000000      10.000000   \n",
+       "50%        82.000000      82.000000      16.000000      16.000000   \n",
+       "75%        82.000000      82.000000      25.000000      25.000000   \n",
+       "max       630.000000     630.000000      41.000000      41.000000   \n",
+       "\n",
+       "        DepartmentID  DepartmentID.1  ...            SEC       MICROSEC  \\\n",
+       "count  317762.000000   317762.000000  ...  317762.000000  317762.000000   \n",
+       "mean        2.603486        2.603486  ...      29.378906  499539.922961   \n",
+       "std         0.932835        0.932835  ...      17.290876  288325.166786   \n",
+       "min         2.000000        2.000000  ...       0.000000       0.000000   \n",
+       "25%         2.000000        2.000000  ...      14.000000  249000.000000   \n",
+       "50%         2.000000        2.000000  ...      29.000000  500000.000000   \n",
+       "75%         3.000000        3.000000  ...      44.000000  748000.000000   \n",
+       "max         6.000000        6.000000  ...      59.000000  999000.000000   \n",
+       "\n",
+       "            MILISEC       TIME_MS  TIME_PER_MEASUREMENT_MS  NUMBER_OF_SIGNALS  \\\n",
+       "count  3.177620e+05  3.177620e+05            317762.000000      317762.000000   \n",
+       "mean   3.717748e+07  1.983745e+06             66756.283605       10103.277673   \n",
+       "std    1.183041e+06  1.183041e+06             44987.211264        3113.779828   \n",
+       "min    3.519374e+07  0.000000e+00                 0.000000        4597.000000   \n",
+       "25%    3.617897e+07  9.852318e+05             29587.000000        8027.000000   \n",
+       "50%    3.714444e+07  1.950704e+06             59599.000000        9887.000000   \n",
+       "75%    3.834786e+07  3.154128e+06             97761.750000       12660.000000   \n",
+       "max    3.908956e+07  3.895821e+06            189705.000000       15444.000000   \n",
+       "\n",
+       "       LENGTH_OF_MEASUREMENT       TIME_KMS  MAX_PROXIMITY_KMS  \\\n",
+       "count          317762.000000  317762.000000      317762.000000   \n",
+       "mean           133222.336198    1983.244840         -72.536124   \n",
+       "std             39296.957914    1183.041729           5.831107   \n",
+       "min             53538.000000       0.000000         -98.000000   \n",
+       "25%            101041.000000     985.000000         -76.400000   \n",
+       "50%            138579.000000    1950.000000         -72.900000   \n",
+       "75%            168403.000000    3154.000000         -68.400000   \n",
+       "max            189705.000000    3895.000000         -41.000000   \n",
+       "\n",
+       "       SUM_PROXIMITY_KMS  \n",
+       "count      317762.000000  \n",
+       "mean         -404.190290  \n",
+       "std           267.914649  \n",
+       "min         -2629.400000  \n",
+       "25%          -527.300000  \n",
+       "50%          -350.700000  \n",
+       "75%          -226.100000  \n",
+       "max           -52.300000  \n",
+       "\n",
+       "[8 rows x 24 columns]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 1,  2,  3,  4,  5,  7,  6, 10, 11, 12, 13, 14, 16, 18, 17, 19, 20,\n",
+       "       21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 35, 36, 38, 37, 40, 39, 41,\n",
+       "       42], dtype=int64)"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.MEASUREMENT.unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 ,  RK485-99X\n",
+      "2 ,  RK485-99X\n",
+      "3 ,  RK485-99X\n",
+      "4 ,  RK485-99X\n",
+      "5 ,  RK485-99X\n",
+      "7 ,  RK485-99X\n",
+      "6 ,  RK485-99X\n",
+      "10 ,  RK485-99X\n",
+      "11 ,  RK485-99X\n",
+      "12 ,  RK485-99X\n",
+      "13 ,  RK485-99X\n",
+      "14 ,  RK485-99X\n",
+      "16 ,  RK485-99X\n",
+      "18 ,  RK485-99X\n",
+      "17 ,  RK485-99X\n",
+      "19 ,  RK485-99X\n",
+      "20 ,  RK485-99X\n",
+      "21 ,  RK485-99X\n",
+      "23 ,  RK485-99X\n",
+      "24 ,  RK485-99X\n",
+      "26 ,  RK485-99X\n",
+      "27 ,  RK485-99X\n",
+      "28 ,  RK485-99X\n",
+      "29 ,  RK485-99X\n",
+      "31 ,  RK485-99X\n",
+      "32 ,  RK485-99X\n",
+      "34 ,  RK485-99X\n",
+      "35 ,  RK485-99X\n",
+      "36 ,  RK485-99X\n",
+      "38 ,  RK485-99X\n",
+      "37 ,  RK485-99X\n",
+      "40 ,  RK485-99X\n",
+      "39 ,  RK485-99X\n",
+      "41 ,  RK485-99X\n",
+      "42 ,  RK485-99X\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in df.MEASUREMENT.unique():\n",
+    "    zb = df[df['MEASUREMENT'] == i]\n",
+    "    for j in zb.StyleColor.unique():\n",
+    "        zbior = zb[zb['StyleColor'] == j]\n",
+    "        if zbior.EPC.unique().size == 1:\n",
+    "            print(i,', ', j)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Zatem usuwamy cały ten Stylokolor\n",
+    "df.drop(df.loc[df['StyleColor'] == 'RK485-99X'].index, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Sprawdzamy, czy mamy klipsy przypisane do więcej niż 1 Itemu\n",
+    "for i in df.MEASUREMENT.unique():\n",
+    "    zb = df[df['MEASUREMENT'] == i]\n",
+    "    for j in zb.EPC.unique():\n",
+    "        zbior = zb[zb['EPC'] == j]\n",
+    "        if zbior.EAN.unique().size > 1:\n",
+    "            print(i,', ', j)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#do testow samego modelu\n",
+    "test_1 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') & ((df.MEASUREMENT == 2) | (df.MEASUREMENT == 3) | (df.MEASUREMENT == 4) | (df.MEASUREMENT == 26) | (df.MEASUREMENT == 28) ) ]\n",
+    "\n",
+    "test1 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') & ( (df.MEASUREMENT == 1) ) ]\n",
+    "test2 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') & ( (df.MEASUREMENT == 2) ) ]\n",
+    "test3 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') & ( (df.MEASUREMENT == 3) ) ]\n",
+    "test4 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') & ( (df.MEASUREMENT == 4) ) ]\n",
+    "test5 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') &  (df.MEASUREMENT == 5) ]\n",
+    "test6 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') &  (df.MEASUREMENT == 6) ]\n",
+    "test9 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') &  (df.MEASUREMENT == 9) ]\n",
+    "test12 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') &  (df.MEASUREMENT == 12) ]\n",
+    "test22 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') &  (df.MEASUREMENT == 22) ]\n",
+    "test24 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') &  (df.MEASUREMENT == 24) ]\n",
+    "test25 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') &  (df.MEASUREMENT == 25) ]\n",
+    "test21 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') &  (df.MEASUREMENT == 21) ]\n",
+    "test29 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') &  (df.MEASUREMENT == 29) ]\n",
+    "test28 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & (df.StyleColor == 'RH797-81X') &  (df.MEASUREMENT == 28) ]\n",
+    "\n",
+    "test29"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10,8))\n",
+    "sns.scatterplot(x=\"TIME_MS\", y=\"SUM_PROXIMITY_KMS\", hue='EAN', data=test6)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10,8))\n",
+    "sns.scatterplot(x=\"TIME_MS\", y=\"SUM_PROXIMITY_KMS\", hue='EAN', data=test_1[test_1.MEASUREMENT == 3])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10,8))\n",
+    "sns.scatterplot(x=\"TIME_MS\", y=\"SUM_PROXIMITY_KMS\", hue='EPC', data=test_1[test_1.MEASUREMENT == 4])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10,8))\n",
+    "sns.scatterplot(x=\"TIME_MS\", y=\"SUM_PROXIMITY_KMS\", hue='EPC', data=test_1[test_1.MEASUREMENT == 26])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10,8))\n",
+    "sns.scatterplot(x=\"TIME_MS\", y=\"SUM_PROXIMITY_KMS\", hue='EAN', data=test_1[test_1.MEASUREMENT == 28])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Następnie tworzymy intuicyjny podział na zbiór testowy i treningowy: pomiary przed godziną 10.30 traktujemy jako zbiór treningowy, natomiast te po godzinie 10.30 - jako zbiór testowy."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train = df[df.TIMESTAMP <= '2021-10-26T10:30:00.000']\n",
+    "train = train[['SUM_PROXIMITY_KMS','TIME_MS', 'EAN', 'MEASUREMENT','StyleColor', 'EPC']]\n",
+    "test = df[df.TIMESTAMP > '2021-10-26T10:30:00.000']\n",
+    "test = test[['SUM_PROXIMITY_KMS','TIME_MS', 'EAN', 'MEASUREMENT','StyleColor', 'EPC']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "from sklearn.cluster import DBSCAN\n",
+    "from sklearn import metrics\n",
+    "from sklearn.datasets import make_blobs\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.preprocessing import normalize\n",
+    "from sklearn.neighbors import NearestNeighbors\n",
+    "import plotly.express as px\n",
+    "from kneed import KneeLocator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#zbiĂłr testowy dla kilku stylokolorĂłw\n",
+    "test0 = df[(df.TIMESTAMP <= '2021-10-26T10:30:00.000') & ((df.StyleColor == 'RH797-81X') | (df.StyleColor == 'SL171-99X') \n",
+    "                                                         | (df.StyleColor == 'RH797-59X'))]\n",
+    "caly1 = test0[['SUM_PROXIMITY_KMS','TIME_MS', 'EAN', 'MEASUREMENT','StyleColor', 'EPC']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def c1(x):\n",
+    "    outliery = pd.DataFrame(columns = ['MEASUREMENT', 'EAN', 'StyleColor'])\n",
+    "    for j in x.StyleColor.unique():\n",
+    "        DF = x[(x.StyleColor == j)]\n",
+    "        measurements = DF.MEASUREMENT.unique()\n",
+    "\n",
+    "        for i in measurements:\n",
+    "            proba = DF[ DF.MEASUREMENT == i ]\n",
+    "            X = np.asarray(proba[['SUM_PROXIMITY_KMS','TIME_MS']]).reshape(-1, 2)\n",
+    "            minimal_epc = np.floor(proba['EPC'].value_counts().min()/2)\n",
+    "            neighbors = max(2,minimal_epc.astype(int))\n",
+    "            \n",
+    "            if minimal_epc > 70:\n",
+    "                minimal_epc = 70\n",
+    "                neighbors = 70\n",
+    "            \n",
+    "            X_embedded = proba[['TIME_MS','SUM_PROXIMITY_KMS']]\n",
+    "            nbrs = NearestNeighbors(n_neighbors=neighbors ).fit(X_embedded)\n",
+    "            distances, indices = nbrs.kneighbors(X_embedded)\n",
+    "            distance_desc = sorted(distances[:,neighbors-1], reverse=True)\n",
+    "\n",
+    "            kneedle = KneeLocator(range(1,len(distance_desc)+1),  #x values\n",
+    "                                  distance_desc, # y values\n",
+    "                                  S=1.0, #parameter suggested from paper\n",
+    "                                  curve=\"convex\", #parameter from figure\n",
+    "                                  direction=\"decreasing\") #parameter from figure\n",
+    "\n",
+    "            eps = max(6000,kneedle.knee_y/8)\n",
+    "\n",
+    "            db = DBSCAN(eps=eps, min_samples=minimal_epc, algorithm='auto')\n",
+    "            db.fit(X)\n",
+    "            y_pred = db.fit_predict(X)\n",
+    "            clusters = pd.DataFrame(db.labels_,columns = ['CLUSTER'],index=proba.index)\n",
+    "            calosc = pd.concat([proba, clusters.reindex(proba.index)], axis=1)\n",
+    "\n",
+    "            if db.labels_[db.labels_ == -1].size != 0 :\n",
+    "                a = calosc[calosc.CLUSTER == -1 ]['EPC'].value_counts()\n",
+    "                for b in a.index:\n",
+    "                    if a[b] > 0.5*proba[proba['EPC'] == b].count()[0] :\n",
+    "                        outliery = outliery.append({'MEASUREMENT': i, 'EAN':proba[proba['EPC'] == b].EAN.iloc[0], \n",
+    "                                                    'StyleColor':j}, ignore_index = True)\n",
+    "\n",
+    "\n",
+    "    return(outliery)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#zbiĂłr testowy dla kilku stylokolorĂłw \n",
+    "train1 = train[(train.StyleColor == 'RH267-85J') | (train.StyleColor == 'RJ369-87X') | (train.StyleColor =='RM119-93X') \n",
+    "              | (train.StyleColor == 'RS483-99X') | (train.StyleColor == 'SB281-90M')]\n",
+    "\n",
+    "train2 = train[(train.StyleColor == 'RV167-MLC') | (train.StyleColor == 'RV462-87X') | (train.StyleColor =='QJ677-33X') \n",
+    "               | (train.StyleColor == 'RH797-00X') | (train.StyleColor == 'RH267-55J')]\n",
+    "\n",
+    "train3 = train[(train.StyleColor == 'SL171-99X') | (train.StyleColor == 'SO133-09M') | (train.StyleColor =='RB254-00X')  \n",
+    "               | (train.StyleColor == 'SF078-MLC') | (train.StyleColor == 'QY337-00X')]\n",
+    "\n",
+    "train4 = train[(train.StyleColor == 'SP095-59X') | (train.StyleColor == 'RN633-00X') | (train.StyleColor =='RH267-59J') \n",
+    "               | (train.StyleColor == 'RV167-87X')]\n",
+    "\n",
+    "train5 = train[(train.StyleColor == 'RJ365-09M') | (train.StyleColor == 'RH797-59X') | (train.StyleColor =='SP090-90X') \n",
+    "               | (train.StyleColor == 'RH797-99X') | (train.StyleColor == 'RJ371-59M')]\n",
+    "\n",
+    "train6 = train[(train.StyleColor == 'RV462-99X') | (train.StyleColor == 'RH797-81X') | (train.StyleColor =='QZ555-20X') \n",
+    "               | (train.StyleColor == 'RJ371-53M') | (train.StyleColor == 'RS054-99X')]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [MEASUREMENT, EAN, StyleColor]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(train1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [MEASUREMENT, EAN, StyleColor]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(train2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>18</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>11</td>\n",
+       "      <td>5902690542745</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>11</td>\n",
+       "      <td>5902690542769</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          18  5902851852638  SO133-09M\n",
+       "1          11  5902690542745  QY337-00X\n",
+       "2          11  5902690542769  QY337-00X"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(train3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [MEASUREMENT, EAN, StyleColor]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(train4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [MEASUREMENT, EAN, StyleColor]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(train5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>21</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>24</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>26</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>28</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          21  5902805820447  RH797-81X\n",
+       "1          24  5902805820447  RH797-81X\n",
+       "2          26  5902805820447  RH797-81X\n",
+       "3          28  5902805820447  RH797-81X"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(train6)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>18</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>11</td>\n",
+       "      <td>5902690542745</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>11</td>\n",
+       "      <td>5902690542769</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>21</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>24</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>26</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>28</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          18  5902851852638  SO133-09M\n",
+       "1          11  5902690542745  QY337-00X\n",
+       "2          11  5902690542769  QY337-00X\n",
+       "3          21  5902805820447  RH797-81X\n",
+       "4          24  5902805820447  RH797-81X\n",
+       "5          26  5902805820447  RH797-81X\n",
+       "6          28  5902805820447  RH797-81X"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851414515</td>\n",
+       "      <td>SL171-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902690542769</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>29</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>34</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851852614</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852614</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   MEASUREMENT            EAN StyleColor\n",
+       "0           38  5902851414515  SL171-99X\n",
+       "1           42  5902690542769  QY337-00X\n",
+       "2           29  5902805820447  RH797-81X\n",
+       "3           32  5902805820447  RH797-81X\n",
+       "4           34  5902805820447  RH797-81X\n",
+       "5           38  5902851852614  SO133-09M\n",
+       "6           38  5902851852638  SO133-09M\n",
+       "7           38  5902851852638  SO133-09M\n",
+       "8           42  5902851852638  SO133-09M\n",
+       "9           42  5902851852614  SO133-09M\n",
+       "10          42  5902851852638  SO133-09M"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 268,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>22</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>25</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>24</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>26</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>30</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>28</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          22  5902805820447  RH797-81X\n",
+       "1          25  5902805820447  RH797-81X\n",
+       "2          24  5902805820447  RH797-81X\n",
+       "3          26  5902805820447  RH797-81X\n",
+       "4          30  5902805820447  RH797-81X\n",
+       "5          28  5902805820447  RH797-81X"
+      ]
+     },
+     "execution_count": 268,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(caly1,0.7)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 225,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>22</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>21</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>25</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>24</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>26</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>30</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>28</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          22  5902805820447  RH797-81X\n",
+       "1          21  5902805820447  RH797-81X\n",
+       "2          25  5902805820447  RH797-81X\n",
+       "3          24  5902805820447  RH797-81X\n",
+       "4          26  5902805820447  RH797-81X\n",
+       "5          30  5902805820447  RH797-81X\n",
+       "6          28  5902805820447  RH797-81X"
+      ]
+     },
+     "execution_count": 225,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(caly1, 0.5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "outliery = pd.DataFrame(columns = ['MEASUREMENT', 'EAN', 'StyleColor'])\n",
+    "DF = df[(df.StyleColor == 'RH797-81X')]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "proba = DF[ DF.MEASUREMENT == 38 ]\n",
+    "X = np.asarray(proba[['SUM_PROXIMITY_KMS','TIME_MS']]).reshape(-1, 2)\n",
+    "minimal_epc = np.floor(proba['EAN'].value_counts().min()/2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if minimal_epc > 70:\n",
+    "    minimal_epc = 70\n",
+    "    neighbors = 70"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3035684754501F8000B5B6E6    140\n",
+       "3035684754501F4000B5B6E5    135\n",
+       "3035684754501F0000B5B614    130\n",
+       "3035684754501F0000B5B632     90\n",
+       "3035684754501F8000B5B6A5     80\n",
+       "Name: EPC, dtype: int64"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "proba['EPC'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "67.0"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "minimal_epc"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "neighbors = max(2,minimal_epc.astype(int))\n",
+    "X_embedded = proba[['TIME_MS','SUM_PROXIMITY_KMS']]\n",
+    "nbrs = NearestNeighbors(n_neighbors=neighbors ).fit(X_embedded)\n",
+    "distances, indices = nbrs.kneighbors(X_embedded)\n",
+    "distance_desc = sorted(distances[:,neighbors-1], reverse=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kneedle = KneeLocator(range(1,len(distance_desc)+1),  #x values\n",
+    "                                  distance_desc, # y values\n",
+    "                                  S=1.0, #parameter suggested from paper\n",
+    "                                  curve=\"convex\", #parameter from figure\n",
+    "                                  direction=\"decreasing\") #parameter from figure"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "5000"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "eps = max(5000,kneedle.knee_y/8)\n",
+    "eps"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db = DBSCAN(eps=eps, min_samples=minimal_epc, algorithm='brute')\n",
+    "db.fit(X)\n",
+    "y_pred = db.fit_predict(X)\n",
+    "clusters = pd.DataFrame(db.labels_,columns = ['CLUSTER'],index=proba.index)\n",
+    "calosc = pd.concat([proba, clusters.reindex(proba.index)], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if (db.labels_[db.labels_ == -1].size != 0) :\n",
+    "    a = calosc[calosc.CLUSTER == -1 ]['EPC'].value_counts()\n",
+    "    for b in a.index:\n",
+    "        if a[b] > 0.6*proba[proba['EPC'] == b].count()[0] :\n",
+    "            outliery = outliery.append({'MEASUREMENT': 24, 'EAN':proba[proba['EPC'] == b].EAN.iloc[0], \n",
+    "                                                    'StyleColor':'QY337-00X'}, ignore_index = True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3035684754501F4000B5B6E5    39\n",
+       "3035684754501F8000B5B6E6    31\n",
+       "3035684754501F8000B5B6A5    10\n",
+       "3035684754501F0000B5B632    10\n",
+       "3035684754501F0000B5B614     6\n",
+       "Name: EPC, dtype: int64"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [MEASUREMENT, EAN, StyleColor]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "outliery"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Wybrane StyleColor\n",
+    "z1 = test[(test.StyleColor == 'RH267-85J') | (test.StyleColor == 'RJ369-87X') | (test.StyleColor =='RM119-93X')  \n",
+    "            | (test.StyleColor == 'RS483-99X') | (test.StyleColor == 'SB281-90M')]\n",
+    "\n",
+    "z2 = test[(test.StyleColor == 'RV167-MLC') | (test.StyleColor == 'RV462-87X') | (test.StyleColor =='QJ677-33X') \n",
+    "         | (test.StyleColor == 'RH797-00X') | (test.StyleColor == 'RH267-55J')]\n",
+    "\n",
+    "z3 = test[(test.StyleColor == 'SL171-99X') | (test.StyleColor == 'SO133-09M') |  (test.StyleColor =='RB254-00X')  \n",
+    "         | (test.StyleColor == 'SF078-MLC') | (test.StyleColor == 'QY337-00X')]\n",
+    "\n",
+    "z4 = test[(test.StyleColor == 'SP095-59X') | (test.StyleColor == 'RN633-00X') |  (test.StyleColor =='RH267-59J') \n",
+    "         | (test.StyleColor == 'RV167-87X') | (test.StyleColor == 'RK485-99X')]\n",
+    "\n",
+    "z5 = test[(test.StyleColor == 'RJ365-09M') | (test.StyleColor == 'RH797-59X') | (test.StyleColor =='SP090-90X') \n",
+    "         | (test.StyleColor == 'RH797-99X') | (test.StyleColor == 'RJ371-59M')]\n",
+    "\n",
+    "z6 = test[(test.StyleColor == 'RV462-99X') | (test.StyleColor == 'RH797-81X') | (test.StyleColor =='QZ555-20X') \n",
+    "         | (test.StyleColor == 'RJ371-53M') | (test.StyleColor == 'RS054-99X')]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [MEASUREMENT, EAN, StyleColor]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 84,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(z1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 183,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851414515</td>\n",
+       "      <td>SL171-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>29</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>34</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851852614</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852614</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          38  5902851414515  SL171-99X\n",
+       "1          29  5902805820447  RH797-81X\n",
+       "2          32  5902805820447  RH797-81X\n",
+       "3          34  5902805820447  RH797-81X\n",
+       "4          38  5902851852614  SO133-09M\n",
+       "5          38  5902851852638  SO133-09M\n",
+       "6          42  5902851852638  SO133-09M\n",
+       "7          42  5902851852614  SO133-09M\n",
+       "8          42  5902851852638  SO133-09M"
+      ]
+     },
+     "execution_count": 183,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 186,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851414515</td>\n",
+       "      <td>SL171-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>29</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>34</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851852614</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852614</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          38  5902851414515  SL171-99X\n",
+       "1          29  5902805820447  RH797-81X\n",
+       "2          32  5902805820447  RH797-81X\n",
+       "3          34  5902805820447  RH797-81X\n",
+       "4          38  5902851852614  SO133-09M\n",
+       "5          38  5902851852638  SO133-09M\n",
+       "6          42  5902851852638  SO133-09M\n",
+       "7          42  5902851852614  SO133-09M\n",
+       "8          42  5902851852638  SO133-09M"
+      ]
+     },
+     "execution_count": 186,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 277,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>24</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>28</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          24  5902805820447  RH797-81X\n",
+       "1          28  5902805820447  RH797-81X"
+      ]
+     },
+     "execution_count": 277,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# dla /5\n",
+    "c1(train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 278,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>29</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          29  5902805820447  RH797-81X\n",
+       "1          32  5902805820447  RH797-81X\n",
+       "2          42  5902851852638  SO133-09M"
+      ]
+     },
+     "execution_count": 278,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 280,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>24</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>28</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          24  5902805820447  RH797-81X\n",
+       "1          28  5902805820447  RH797-81X"
+      ]
+     },
+     "execution_count": 280,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# dla /6\n",
+    "c1(train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 281,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>29</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>34</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          29  5902805820447  RH797-81X\n",
+       "1          32  5902805820447  RH797-81X\n",
+       "2          34  5902805820447  RH797-81X\n",
+       "3          42  5902851852638  SO133-09M"
+      ]
+     },
+     "execution_count": 281,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 283,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>11</td>\n",
+       "      <td>5902690542745</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>11</td>\n",
+       "      <td>5902690542769</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>24</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>28</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          11  5902690542745  QY337-00X\n",
+       "1          11  5902690542769  QY337-00X\n",
+       "2          24  5902805820447  RH797-81X\n",
+       "3          28  5902805820447  RH797-81X"
+      ]
+     },
+     "execution_count": 283,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# eps min 5000 / 5\n",
+    "# \n",
+    "c1(train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 284,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851414515</td>\n",
+       "      <td>SL171-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902690542769</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>29</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>34</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851852614</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          38  5902851414515  SL171-99X\n",
+       "1          42  5902690542769  QY337-00X\n",
+       "2          29  5902805820447  RH797-81X\n",
+       "3          32  5902805820447  RH797-81X\n",
+       "4          34  5902805820447  RH797-81X\n",
+       "5          38  5902851852614  SO133-09M\n",
+       "6          38  5902851852638  SO133-09M\n",
+       "7          42  5902851852638  SO133-09M"
+      ]
+     },
+     "execution_count": 284,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# c=0.6\n",
+    "c1(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 317,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>11</td>\n",
+       "      <td>5902690542745</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>11</td>\n",
+       "      <td>5902690542769</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>24</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>28</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          11  5902690542745  QY337-00X\n",
+       "1          11  5902690542769  QY337-00X\n",
+       "2          24  5902805820447  RH797-81X\n",
+       "3          28  5902805820447  RH797-81X"
+      ]
+     },
+     "execution_count": 317,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 318,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851414515</td>\n",
+       "      <td>SL171-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>29</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>34</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          38  5902851414515  SL171-99X\n",
+       "1          29  5902805820447  RH797-81X\n",
+       "2          32  5902805820447  RH797-81X\n",
+       "3          34  5902805820447  RH797-81X\n",
+       "4          42  5902851852638  SO133-09M"
+      ]
+     },
+     "execution_count": 318,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c1(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def c2(x):\n",
+    "    outliery = pd.DataFrame(columns = ['MEASUREMENT', 'EAN', 'StyleColor'])\n",
+    "    for j in x.StyleColor.unique():\n",
+    "        DF = x[(x.StyleColor == j)]\n",
+    "        measurements = DF.MEASUREMENT.unique()\n",
+    "\n",
+    "        for i in measurements:\n",
+    "            proba = DF[ DF.MEASUREMENT == i ]\n",
+    "            X = np.asarray(proba[['SUM_PROXIMITY_KMS','TIME_MS']]).reshape(-1, 2)\n",
+    "            minimal_epc = np.floor(proba['EPC'].value_counts().min()/2)\n",
+    "            neighbors = max(2,minimal_epc.astype(int))\n",
+    "            \n",
+    "            if minimal_epc > 70:\n",
+    "                minimal_epc = 70\n",
+    "                neighbors = 70\n",
+    "            \n",
+    "            X_embedded = proba[['TIME_MS','SUM_PROXIMITY_KMS']]\n",
+    "            nbrs = NearestNeighbors(n_neighbors=neighbors ).fit(X_embedded)\n",
+    "            distances, indices = nbrs.kneighbors(X_embedded)\n",
+    "            distance_desc = sorted(distances[:,neighbors-1], reverse=True)\n",
+    "\n",
+    "            kneedle = KneeLocator(range(1,len(distance_desc)+1),  #x values\n",
+    "                                  distance_desc, # y values\n",
+    "                                  S=1.0, #parameter suggested from paper\n",
+    "                                  curve=\"convex\", #parameter from figure\n",
+    "                                  direction=\"decreasing\") #parameter from figure\n",
+    "\n",
+    "            eps = max(5000,kneedle.knee_y/5)\n",
+    "\n",
+    "            db = DBSCAN(eps=eps, min_samples=minimal_epc, algorithm='auto')\n",
+    "            db.fit(X)\n",
+    "            y_pred = db.fit_predict(X)\n",
+    "            clusters = pd.DataFrame(db.labels_,columns = ['CLUSTER'],index=proba.index)\n",
+    "            calosc = pd.concat([proba, clusters.reindex(proba.index)], axis=1)\n",
+    "\n",
+    "            if db.labels_[db.labels_ == -1].size != 0 :\n",
+    "                a = calosc[calosc.CLUSTER == -1 ]['EPC'].value_counts()\n",
+    "                for b in a.index:\n",
+    "                    if a[b] > 0.5*proba[proba['EPC'] == b].count()[0] :\n",
+    "                        outliery = outliery.append({'MEASUREMENT': i, 'EAN':proba[proba['EPC'] == b].EAN.iloc[0], \n",
+    "                                                    'StyleColor':j}, ignore_index = True)\n",
+    "\n",
+    "\n",
+    "    return(outliery)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>21</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>24</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>26</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>28</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          21  5902805820447  RH797-81X\n",
+       "1          24  5902805820447  RH797-81X\n",
+       "2          26  5902805820447  RH797-81X\n",
+       "3          28  5902805820447  RH797-81X"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c2(train6)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>29</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>34</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          29  5902805820447  RH797-81X\n",
+       "1          32  5902805820447  RH797-81X\n",
+       "2          34  5902805820447  RH797-81X"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c2(z6)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>18</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>11</td>\n",
+       "      <td>5902690542745</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>11</td>\n",
+       "      <td>5902690542769</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>21</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>24</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>26</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>28</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          18  5902851852638  SO133-09M\n",
+       "1          11  5902690542745  QY337-00X\n",
+       "2          11  5902690542769  QY337-00X\n",
+       "3          21  5902805820447  RH797-81X\n",
+       "4          24  5902805820447  RH797-81X\n",
+       "5          26  5902805820447  RH797-81X\n",
+       "6          28  5902805820447  RH797-81X"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c2(train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851414515</td>\n",
+       "      <td>SL171-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902690542769</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902690542745</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>29</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>34</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851852614</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852614</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   MEASUREMENT            EAN StyleColor\n",
+       "0           38  5902851414515  SL171-99X\n",
+       "1           42  5902690542769  QY337-00X\n",
+       "2           42  5902690542745  QY337-00X\n",
+       "3           29  5902805820447  RH797-81X\n",
+       "4           32  5902805820447  RH797-81X\n",
+       "5           34  5902805820447  RH797-81X\n",
+       "6           38  5902851852614  SO133-09M\n",
+       "7           38  5902851852638  SO133-09M\n",
+       "8           38  5902851852638  SO133-09M\n",
+       "9           42  5902851852638  SO133-09M\n",
+       "10          42  5902851852614  SO133-09M\n",
+       "11          42  5902851852638  SO133-09M"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c2(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def c3(x):\n",
+    "    outliery = pd.DataFrame(columns = ['MEASUREMENT', 'EAN', 'StyleColor'])\n",
+    "    for j in x.StyleColor.unique():\n",
+    "        DF = x[(x.StyleColor == j)]\n",
+    "        measurements = DF.MEASUREMENT.unique()\n",
+    "\n",
+    "        for i in measurements:\n",
+    "            proba = DF[ DF.MEASUREMENT == i ]\n",
+    "            X = np.asarray(proba[['SUM_PROXIMITY_KMS','TIME_MS']]).reshape(-1, 2)\n",
+    "            minimal_epc = np.floor(proba['EPC'].value_counts().min()/2)\n",
+    "            neighbors = max(2,minimal_epc.astype(int))\n",
+    "            \n",
+    "            if minimal_epc > 70:\n",
+    "                minimal_epc = 70\n",
+    "                neighbors = 70\n",
+    "            \n",
+    "            X_embedded = proba[['TIME_MS','SUM_PROXIMITY_KMS']]\n",
+    "            nbrs = NearestNeighbors(n_neighbors=neighbors ).fit(X_embedded)\n",
+    "            distances, indices = nbrs.kneighbors(X_embedded)\n",
+    "            distance_desc = sorted(distances[:,neighbors-1], reverse=True)\n",
+    "\n",
+    "            kneedle = KneeLocator(range(1,len(distance_desc)+1),  #x values\n",
+    "                                  distance_desc, # y values\n",
+    "                                  S=1.0, #parameter suggested from paper\n",
+    "                                  curve=\"convex\", #parameter from figure\n",
+    "                                  direction=\"decreasing\") #parameter from figure\n",
+    "\n",
+    "            eps = max(5000,kneedle.knee_y/4)\n",
+    "\n",
+    "            db = DBSCAN(eps=eps, min_samples=minimal_epc, algorithm='kd_tree')\n",
+    "            db.fit(X)\n",
+    "            y_pred = db.fit_predict(X)\n",
+    "            clusters = pd.DataFrame(db.labels_,columns = ['CLUSTER'],index=proba.index)\n",
+    "            calosc = pd.concat([proba, clusters.reindex(proba.index)], axis=1)\n",
+    "\n",
+    "            if db.labels_[db.labels_ == -1].size != 0 :\n",
+    "                a = calosc[calosc.CLUSTER == -1 ]['EPC'].value_counts()\n",
+    "                for b in a.index:\n",
+    "                    if a[b] > 0.5*proba[proba['EPC'] == b].count()[0] :\n",
+    "                        outliery = outliery.append({'MEASUREMENT': i, 'EAN':proba[proba['EPC'] == b].EAN.iloc[0], \n",
+    "                                                    'StyleColor':j}, ignore_index = True)\n",
+    "\n",
+    "\n",
+    "    return(outliery)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>18</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>11</td>\n",
+       "      <td>5902690542745</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>11</td>\n",
+       "      <td>5902690542769</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>21</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>24</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>28</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          18  5902851852638  SO133-09M\n",
+       "1          11  5902690542745  QY337-00X\n",
+       "2          11  5902690542769  QY337-00X\n",
+       "3          21  5902805820447  RH797-81X\n",
+       "4          24  5902805820447  RH797-81X\n",
+       "5          28  5902805820447  RH797-81X"
+      ]
+     },
+     "execution_count": 51,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c3(train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851414515</td>\n",
+       "      <td>SL171-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902690542769</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902690542745</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>29</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852614</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          38  5902851414515  SL171-99X\n",
+       "1          42  5902690542769  QY337-00X\n",
+       "2          42  5902690542745  QY337-00X\n",
+       "3          29  5902805820447  RH797-81X\n",
+       "4          32  5902805820447  RH797-81X\n",
+       "5          38  5902851852638  SO133-09M\n",
+       "6          42  5902851852638  SO133-09M\n",
+       "7          42  5902851852614  SO133-09M\n",
+       "8          42  5902851852638  SO133-09M"
+      ]
+     },
+     "execution_count": 52,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c3(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def c4(x):\n",
+    "    outliery = pd.DataFrame(columns = ['MEASUREMENT', 'EAN', 'StyleColor'])\n",
+    "    for j in x.StyleColor.unique():\n",
+    "        DF = x[(x.StyleColor == j)]\n",
+    "        measurements = DF.MEASUREMENT.unique()\n",
+    "\n",
+    "        for i in measurements:\n",
+    "            proba = DF[ DF.MEASUREMENT == i ]\n",
+    "            X = np.asarray(proba[['SUM_PROXIMITY_KMS','TIME_MS']]).reshape(-1, 2)\n",
+    "            minimal_epc = np.floor(proba['EPC'].value_counts().min()/2)\n",
+    "            neighbors = max(2,minimal_epc.astype(int))\n",
+    "            \n",
+    "            if minimal_epc > 70:\n",
+    "                minimal_epc = 70\n",
+    "                neighbors = 70\n",
+    "            \n",
+    "            X_embedded = proba[['TIME_MS','SUM_PROXIMITY_KMS']]\n",
+    "            nbrs = NearestNeighbors(n_neighbors=neighbors ).fit(X_embedded)\n",
+    "            distances, indices = nbrs.kneighbors(X_embedded)\n",
+    "            distance_desc = sorted(distances[:,neighbors-1], reverse=True)\n",
+    "\n",
+    "            kneedle = KneeLocator(range(1,len(distance_desc)+1),  #x values\n",
+    "                                  distance_desc, # y values\n",
+    "                                  S=1.0, #parameter suggested from paper\n",
+    "                                  curve=\"convex\", #parameter from figure\n",
+    "                                  direction=\"decreasing\") #parameter from figure\n",
+    "\n",
+    "            eps = max(5000,kneedle.knee_y/2)\n",
+    "\n",
+    "            db = DBSCAN(eps=eps, min_samples=minimal_epc, algorithm='auto')\n",
+    "            db.fit(X)\n",
+    "            y_pred = db.fit_predict(X)\n",
+    "            clusters = pd.DataFrame(db.labels_,columns = ['CLUSTER'],index=proba.index)\n",
+    "            calosc = pd.concat([proba, clusters.reindex(proba.index)], axis=1)\n",
+    "\n",
+    "            if (db.labels_[db.labels_ == -1].size != 0 ) & (db.labels_[db.labels_ == -1].size > minimal_epc ):\n",
+    "                a = calosc[calosc.CLUSTER == -1 ]['EPC'].value_counts().sum()\n",
+    "                b = calosc[calosc.CLUSTER == -1 ]['EPC'].value_counts()[0]  \n",
+    "                if a - 2*b < 0:\n",
+    "                    outliery = outliery.append({'MEASUREMENT': i, 'EAN': proba[proba['EPC'] == calosc[calosc.CLUSTER == -1 ]['EPC'].value_counts().index[0]].EAN.iloc[0], \"StyleColor\":j}, ignore_index = True)\n",
+    "\n",
+    "    return(outliery)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>6</td>\n",
+       "      <td>5902805533040</td>\n",
+       "      <td>RH267-85J</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>24</td>\n",
+       "      <td>5902805533040</td>\n",
+       "      <td>RH267-85J</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>4</td>\n",
+       "      <td>5902851445731</td>\n",
+       "      <td>RS483-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>7</td>\n",
+       "      <td>5902851445700</td>\n",
+       "      <td>RS483-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>11</td>\n",
+       "      <td>5902851445731</td>\n",
+       "      <td>RS483-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>28</td>\n",
+       "      <td>5902805303681</td>\n",
+       "      <td>RJ369-87X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5902805431803</td>\n",
+       "      <td>RM119-93X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>5</td>\n",
+       "      <td>5902805431797</td>\n",
+       "      <td>RM119-93X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>6</td>\n",
+       "      <td>5902805431803</td>\n",
+       "      <td>RM119-93X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>12</td>\n",
+       "      <td>5902851535913</td>\n",
+       "      <td>RV167-MLC</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>2</td>\n",
+       "      <td>5902975217986</td>\n",
+       "      <td>RV462-87X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>4</td>\n",
+       "      <td>5902851414508</td>\n",
+       "      <td>SL171-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>7</td>\n",
+       "      <td>5902851414508</td>\n",
+       "      <td>SL171-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>12</td>\n",
+       "      <td>5902851414508</td>\n",
+       "      <td>SL171-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>4</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>7</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>14</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>20</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>2</td>\n",
+       "      <td>5902690542769</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>4</td>\n",
+       "      <td>5902690542745</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>13</td>\n",
+       "      <td>5902690542769</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>16</td>\n",
+       "      <td>5902690542745</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>23</td>\n",
+       "      <td>5902690542745</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>24</td>\n",
+       "      <td>5902805219685</td>\n",
+       "      <td>RN633-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5902805533255</td>\n",
+       "      <td>RH267-59J</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>2</td>\n",
+       "      <td>5902805533255</td>\n",
+       "      <td>RH267-59J</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>13</td>\n",
+       "      <td>5902805533255</td>\n",
+       "      <td>RH267-59J</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>21</td>\n",
+       "      <td>5902805533255</td>\n",
+       "      <td>RH267-59J</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>24</td>\n",
+       "      <td>5902805533255</td>\n",
+       "      <td>RH267-59J</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>26</td>\n",
+       "      <td>5902805444698</td>\n",
+       "      <td>RJ365-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>21</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>23</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>28</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>3</td>\n",
+       "      <td>5902805385823</td>\n",
+       "      <td>RJ371-53M</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   MEASUREMENT            EAN StyleColor\n",
+       "0            6  5902805533040  RH267-85J\n",
+       "1           24  5902805533040  RH267-85J\n",
+       "2            4  5902851445731  RS483-99X\n",
+       "3            7  5902851445700  RS483-99X\n",
+       "4           11  5902851445731  RS483-99X\n",
+       "5           28  5902805303681  RJ369-87X\n",
+       "6            1  5902805431803  RM119-93X\n",
+       "7            5  5902805431797  RM119-93X\n",
+       "8            6  5902805431803  RM119-93X\n",
+       "9           12  5902851535913  RV167-MLC\n",
+       "10           2  5902975217986  RV462-87X\n",
+       "11           4  5902851414508  SL171-99X\n",
+       "12           7  5902851414508  SL171-99X\n",
+       "13          12  5902851414508  SL171-99X\n",
+       "14           4  5902851852638  SO133-09M\n",
+       "15           7  5902851852638  SO133-09M\n",
+       "16          14  5902851852638  SO133-09M\n",
+       "17          20  5902851852638  SO133-09M\n",
+       "18           2  5902690542769  QY337-00X\n",
+       "19           4  5902690542745  QY337-00X\n",
+       "20          13  5902690542769  QY337-00X\n",
+       "21          16  5902690542745  QY337-00X\n",
+       "22          23  5902690542745  QY337-00X\n",
+       "23          24  5902805219685  RN633-00X\n",
+       "24           1  5902805533255  RH267-59J\n",
+       "25           2  5902805533255  RH267-59J\n",
+       "26          13  5902805533255  RH267-59J\n",
+       "27          21  5902805533255  RH267-59J\n",
+       "28          24  5902805533255  RH267-59J\n",
+       "29          26  5902805444698  RJ365-09M\n",
+       "30          21  5902805820447  RH797-81X\n",
+       "31          23  5902805820447  RH797-81X\n",
+       "32          28  5902805820447  RH797-81X\n",
+       "33           3  5902805385823  RJ371-53M"
+      ]
+     },
+     "execution_count": 61,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c4(train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>41</td>\n",
+       "      <td>5902805532999</td>\n",
+       "      <td>RH267-55J</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902851414515</td>\n",
+       "      <td>SL171-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>34</td>\n",
+       "      <td>5902851414515</td>\n",
+       "      <td>SL171-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>42</td>\n",
+       "      <td>5902851414515</td>\n",
+       "      <td>SL171-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805431803</td>\n",
+       "      <td>RM119-93X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>40</td>\n",
+       "      <td>5902805431803</td>\n",
+       "      <td>RM119-93X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902851445700</td>\n",
+       "      <td>RS483-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>37</td>\n",
+       "      <td>5902851445700</td>\n",
+       "      <td>RS483-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>40</td>\n",
+       "      <td>5902851445731</td>\n",
+       "      <td>RS483-99X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902690542745</td>\n",
+       "      <td>QY337-00X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>36</td>\n",
+       "      <td>5902805820423</td>\n",
+       "      <td>RH797-59X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>37</td>\n",
+       "      <td>5902805303681</td>\n",
+       "      <td>RJ369-87X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>29</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805533255</td>\n",
+       "      <td>RH267-59J</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>38</td>\n",
+       "      <td>5902805533255</td>\n",
+       "      <td>RH267-59J</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>40</td>\n",
+       "      <td>5902805533255</td>\n",
+       "      <td>RH267-59J</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>29</td>\n",
+       "      <td>5902975236994</td>\n",
+       "      <td>SF078-MLC</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>36</td>\n",
+       "      <td>5902975236994</td>\n",
+       "      <td>SF078-MLC</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>41</td>\n",
+       "      <td>5902975236994</td>\n",
+       "      <td>SF078-MLC</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>39</td>\n",
+       "      <td>5902851852638</td>\n",
+       "      <td>SO133-09M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805820546</td>\n",
+       "      <td>RH797-00X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   MEASUREMENT            EAN StyleColor\n",
+       "0           41  5902805532999  RH267-55J\n",
+       "1           32  5902851414515  SL171-99X\n",
+       "2           34  5902851414515  SL171-99X\n",
+       "3           42  5902851414515  SL171-99X\n",
+       "4           32  5902805431803  RM119-93X\n",
+       "5           40  5902805431803  RM119-93X\n",
+       "6           32  5902851445700  RS483-99X\n",
+       "7           37  5902851445700  RS483-99X\n",
+       "8           40  5902851445731  RS483-99X\n",
+       "9           32  5902690542745  QY337-00X\n",
+       "10          36  5902805820423  RH797-59X\n",
+       "11          37  5902805303681  RJ369-87X\n",
+       "12          29  5902805820447  RH797-81X\n",
+       "13          32  5902805820447  RH797-81X\n",
+       "14          32  5902805533255  RH267-59J\n",
+       "15          38  5902805533255  RH267-59J\n",
+       "16          40  5902805533255  RH267-59J\n",
+       "17          29  5902975236994  SF078-MLC\n",
+       "18          36  5902975236994  SF078-MLC\n",
+       "19          41  5902975236994  SF078-MLC\n",
+       "20          39  5902851852638  SO133-09M\n",
+       "21          32  5902805820546  RH797-00X"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c4(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def c5(x):\n",
+    "    outliery = pd.DataFrame(columns = ['MEASUREMENT', 'EAN', 'StyleColor'])\n",
+    "    for j in x.StyleColor.unique():\n",
+    "        DF = x[(x.StyleColor == j)]\n",
+    "        measurements = DF.MEASUREMENT.unique()\n",
+    "\n",
+    "        for i in measurements:\n",
+    "            proba = DF[ DF.MEASUREMENT == i ]\n",
+    "            X = np.asarray(proba[['SUM_PROXIMITY_KMS','TIME_MS']]).reshape(-1, 2)\n",
+    "            minimal_epc = np.floor(proba['EPC'].value_counts().min()/2)\n",
+    "            neighbors = max(2,minimal_epc.astype(int))\n",
+    "            \n",
+    "            if minimal_epc > 70:\n",
+    "                minimal_epc = 70\n",
+    "                neighbors = 70\n",
+    "            \n",
+    "            X_embedded = proba[['TIME_MS','SUM_PROXIMITY_KMS']]\n",
+    "            nbrs = NearestNeighbors(n_neighbors=neighbors ).fit(X_embedded)\n",
+    "            distances, indices = nbrs.kneighbors(X_embedded)\n",
+    "            distance_desc = sorted(distances[:,neighbors-1], reverse=True)\n",
+    "\n",
+    "            kneedle = KneeLocator(range(1,len(distance_desc)+1),  #x values\n",
+    "                                  distance_desc, # y values\n",
+    "                                  S=1.0, #parameter suggested from paper\n",
+    "                                  curve=\"convex\", #parameter from figure\n",
+    "                                  direction=\"decreasing\") #parameter from figure\n",
+    "\n",
+    "            eps = max(5000,kneedle.knee_y/2)\n",
+    "\n",
+    "            db = DBSCAN(eps=eps, min_samples=minimal_epc, algorithm='kd_tree')\n",
+    "            db.fit(X)\n",
+    "            y_pred = db.fit_predict(X)\n",
+    "            clusters = pd.DataFrame(db.labels_,columns = ['CLUSTER'],index=proba.index)\n",
+    "            calosc = pd.concat([proba, clusters.reindex(proba.index)], axis=1)\n",
+    "\n",
+    "            if db.labels_[db.labels_ == -1].size != 0 :\n",
+    "                a = calosc[calosc.CLUSTER == -1 ]['EPC'].value_counts()\n",
+    "                for b in a.index:\n",
+    "                    if a[b] > 0.5*proba[proba['EPC'] == b].count()[0] :\n",
+    "                        outliery = outliery.append({'MEASUREMENT': i, 'EAN':proba[proba['EPC'] == b].EAN.iloc[0], \n",
+    "                                                    'StyleColor':j}, ignore_index = True)\n",
+    "\n",
+    "\n",
+    "    return(outliery)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>28</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          28  5902805820447  RH797-81X"
+      ]
+     },
+     "execution_count": 64,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c5(train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MEASUREMENT</th>\n",
+       "      <th>EAN</th>\n",
+       "      <th>StyleColor</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>29</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>32</td>\n",
+       "      <td>5902805820447</td>\n",
+       "      <td>RH797-81X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  MEASUREMENT            EAN StyleColor\n",
+       "0          29  5902805820447  RH797-81X\n",
+       "1          32  5902805820447  RH797-81X"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c5(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/df.csv b/df.csv
new file mode 100644
index 0000000000000000000000000000000000000000..5232a92fa78cdb90a1c586efb0e29f7f80f75f17
Binary files /dev/null and b/df.csv differ