From 6b1bacd106e3229bc7ea2a762ee22a4238ab77f8 Mon Sep 17 00:00:00 2001
From: szpwski <szpwski@gmail.com>
Date: Mon, 3 Jan 2022 12:54:40 +0100
Subject: [PATCH] add sum of proximity

---
 caly_kod.ipynb | 136 +++++++++++++++++++++++--------------------------
 1 file changed, 65 insertions(+), 71 deletions(-)

diff --git a/caly_kod.ipynb b/caly_kod.ipynb
index 1c16446..2997511 100644
--- a/caly_kod.ipynb
+++ b/caly_kod.ipynb
@@ -3,7 +3,6 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "id": "1d75aa04",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -20,13 +19,12 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "id": "18dbc1d5",
    "metadata": {},
    "outputs": [],
    "source": [
     "params = urllib.parse.quote_plus(\"DRIVER={ODBC Driver 17 for SQL Server};\"\n",
-    "                                 \"SERVER=dbserver.mif.pg.gda.pl,1433;\"\n",
-    "                                 #\"SERVER=127.0.0.1,1433;\"\n",
+    "                                 #\"SERVER=dbserver.mif.pg.gda.pl,1433;\"\n",
+    "                                 \"SERVER=127.0.0.1,1433;\"\n",
     "                                 \"DATABASE=silkycoders;\"\n",
     "                                 \"UID=LOGIN;\"\n",
     "                                 \"PWD=HASLO\")\n",
@@ -36,8 +34,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
-   "id": "15363396",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -59,8 +56,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
-   "id": "0339aab0",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -69,8 +65,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
-   "id": "25a42792",
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -98,13 +93,14 @@
     "    dt[\"TIME_KMS\"] = np.floor(dt.TIME_MS/1000)    \n",
     "    dt = dt.merge(dt.groupby(['EPC','TIME_KMS'])[\"PROXIMITY\"].max().reset_index(name=\"MAX_PROXIMITY_KMS\"), how=\"left\",\n",
     "                                 on = ['EPC','TIME_KMS'])\n",
+    "    dt = dt.merge(dt.groupby(['EPC','TIME_KMS'])[\"PROXIMITY\"].sum().reset_index(name=\"SUM_PROXIMITY_KMS\"), how=\"left\",\n",
+    "                                 on = ['EPC','TIME_KMS'])\n",
     "    return dt"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
-   "id": "93baae83",
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -113,8 +109,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
-   "id": "4204fa64",
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -123,7 +118,7 @@
      "text": [
       "<class 'pandas.core.frame.DataFrame'>\n",
       "Int64Index: 502689 entries, 0 to 502688\n",
-      "Data columns (total 35 columns):\n",
+      "Data columns (total 37 columns):\n",
       " #   Column                   Non-Null Count   Dtype  \n",
       "---  ------                   --------------   -----  \n",
       " 0   EPC                      502689 non-null  object \n",
@@ -158,11 +153,13 @@
       " 29  TIME_MS                  502689 non-null  float64\n",
       " 30  TIME_PER_MEASUREMENT_MS  502689 non-null  float64\n",
       " 31  NUMBER_OF_SIGNALS        502689 non-null  int64  \n",
-      " 32  LENGTH_OF_MEASUREMENT    502689 non-null  int64  \n",
+      " 32  LENGTH_OF_MEASUREMENT    502689 non-null  float64\n",
       " 33  TIME_KMS                 502689 non-null  float64\n",
-      " 34  MAX_PROXIMITY_KMS        502689 non-null  float64\n",
-      "dtypes: bool(1), float64(6), int32(4), int64(12), object(12)\n",
-      "memory usage: 127.0+ MB\n"
+      " 34  MAX_PROXIMITY_KMS_x      502689 non-null  float64\n",
+      " 35  MAX_PROXIMITY_KMS_y      502689 non-null  float64\n",
+      " 36  SUM_PROXIMITY_KMS        502689 non-null  float64\n",
+      "dtypes: bool(1), float64(9), int32(4), int64(11), object(12)\n",
+      "memory usage: 134.7+ MB\n"
      ]
     }
    ],
@@ -172,8 +169,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
-   "id": "05f9fc46",
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -208,8 +204,6 @@
        "      <th>Size</th>\n",
        "      <th>SubclassID</th>\n",
        "      <th>...</th>\n",
-       "      <th>MIN</th>\n",
-       "      <th>SEC</th>\n",
        "      <th>MICROSEC</th>\n",
        "      <th>MILISEC</th>\n",
        "      <th>TIME_MS</th>\n",
@@ -217,7 +211,9 @@
        "      <th>NUMBER_OF_SIGNALS</th>\n",
        "      <th>LENGTH_OF_MEASUREMENT</th>\n",
        "      <th>TIME_KMS</th>\n",
-       "      <th>MAX_PROXIMITY_KMS</th>\n",
+       "      <th>MAX_PROXIMITY_KMS_x</th>\n",
+       "      <th>MAX_PROXIMITY_KMS_y</th>\n",
+       "      <th>SUM_PROXIMITY_KMS</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -234,16 +230,16 @@
        "      <td>L</td>\n",
        "      <td>86</td>\n",
        "      <td>...</td>\n",
-       "      <td>46</td>\n",
-       "      <td>33</td>\n",
        "      <td>735000</td>\n",
        "      <td>35193735.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>9495</td>\n",
-       "      <td>103956</td>\n",
+       "      <td>103956.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>-66.8</td>\n",
+       "      <td>-66.8</td>\n",
+       "      <td>-274.6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -258,16 +254,16 @@
        "      <td>XS</td>\n",
        "      <td>630</td>\n",
        "      <td>...</td>\n",
-       "      <td>46</td>\n",
-       "      <td>33</td>\n",
        "      <td>747000</td>\n",
        "      <td>35193747.0</td>\n",
        "      <td>12.0</td>\n",
        "      <td>12.0</td>\n",
        "      <td>9495</td>\n",
-       "      <td>103956</td>\n",
+       "      <td>103956.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>-60.8</td>\n",
+       "      <td>-60.8</td>\n",
+       "      <td>-244.2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -282,16 +278,16 @@
        "      <td>M</td>\n",
        "      <td>11</td>\n",
        "      <td>...</td>\n",
-       "      <td>46</td>\n",
-       "      <td>33</td>\n",
        "      <td>751000</td>\n",
        "      <td>35193751.0</td>\n",
        "      <td>16.0</td>\n",
        "      <td>16.0</td>\n",
        "      <td>9495</td>\n",
-       "      <td>103956</td>\n",
+       "      <td>103956.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>-74.4</td>\n",
+       "      <td>-74.4</td>\n",
+       "      <td>-300.2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -306,16 +302,16 @@
        "      <td>XL</td>\n",
        "      <td>82</td>\n",
        "      <td>...</td>\n",
-       "      <td>46</td>\n",
-       "      <td>33</td>\n",
        "      <td>754000</td>\n",
        "      <td>35193754.0</td>\n",
        "      <td>19.0</td>\n",
        "      <td>19.0</td>\n",
        "      <td>9495</td>\n",
-       "      <td>103956</td>\n",
+       "      <td>103956.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>-78.0</td>\n",
+       "      <td>-78.0</td>\n",
+       "      <td>-318.9</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -330,20 +326,20 @@
        "      <td>38</td>\n",
        "      <td>53</td>\n",
        "      <td>...</td>\n",
-       "      <td>46</td>\n",
-       "      <td>33</td>\n",
        "      <td>758000</td>\n",
        "      <td>35193758.0</td>\n",
        "      <td>23.0</td>\n",
        "      <td>23.0</td>\n",
        "      <td>9495</td>\n",
-       "      <td>103956</td>\n",
+       "      <td>103956.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>-71.1</td>\n",
+       "      <td>-71.1</td>\n",
+       "      <td>-214.1</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>5 rows × 35 columns</p>\n",
+       "<p>5 rows × 37 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
@@ -354,43 +350,49 @@
        "3  30356847541DA78000B5BA4D      -78.0  2021-10-26T09:46:33.754            1   \n",
        "4  30356847542A2B0000B5B215      -71.1  2021-10-26T09:46:33.758            1   \n",
        "\n",
-       "    ITEMID            EAN StyleColorSize StyleColor Size  SubclassID  ... MIN  \\\n",
-       "0  2127269  5902805533040    RH267-85J-L  RH267-85J    L          86  ...  46   \n",
-       "1  2217401  5902851445731   RS483-99X-XS  RS483-99X   XS         630  ...  46   \n",
-       "2  2227511  5902851547602    SB281-90M-M  SB281-90M    M          11  ...  46   \n",
-       "3  2104706  5902805303667   RJ369-87X-XL  RJ369-87X   XL          82  ...  46   \n",
-       "4  2117629  5902805431803   RM119-93X-38  RM119-93X   38          53  ...  46   \n",
+       "    ITEMID            EAN StyleColorSize StyleColor Size  SubclassID  ...  \\\n",
+       "0  2127269  5902805533040    RH267-85J-L  RH267-85J    L          86  ...   \n",
+       "1  2217401  5902851445731   RS483-99X-XS  RS483-99X   XS         630  ...   \n",
+       "2  2227511  5902851547602    SB281-90M-M  SB281-90M    M          11  ...   \n",
+       "3  2104706  5902805303667   RJ369-87X-XL  RJ369-87X   XL          82  ...   \n",
+       "4  2117629  5902805431803   RM119-93X-38  RM119-93X   38          53  ...   \n",
+       "\n",
+       "  MICROSEC     MILISEC  TIME_MS TIME_PER_MEASUREMENT_MS  NUMBER_OF_SIGNALS  \\\n",
+       "0   735000  35193735.0      0.0                     0.0               9495   \n",
+       "1   747000  35193747.0     12.0                    12.0               9495   \n",
+       "2   751000  35193751.0     16.0                    16.0               9495   \n",
+       "3   754000  35193754.0     19.0                    19.0               9495   \n",
+       "4   758000  35193758.0     23.0                    23.0               9495   \n",
        "\n",
-       "  SEC  MICROSEC     MILISEC  TIME_MS  TIME_PER_MEASUREMENT_MS  \\\n",
-       "0  33    735000  35193735.0      0.0                      0.0   \n",
-       "1  33    747000  35193747.0     12.0                     12.0   \n",
-       "2  33    751000  35193751.0     16.0                     16.0   \n",
-       "3  33    754000  35193754.0     19.0                     19.0   \n",
-       "4  33    758000  35193758.0     23.0                     23.0   \n",
+       "   LENGTH_OF_MEASUREMENT TIME_KMS  MAX_PROXIMITY_KMS_x  MAX_PROXIMITY_KMS_y  \\\n",
+       "0               103956.0      0.0                -66.8                -66.8   \n",
+       "1               103956.0      0.0                -60.8                -60.8   \n",
+       "2               103956.0      0.0                -74.4                -74.4   \n",
+       "3               103956.0      0.0                -78.0                -78.0   \n",
+       "4               103956.0      0.0                -71.1                -71.1   \n",
        "\n",
-       "  NUMBER_OF_SIGNALS  LENGTH_OF_MEASUREMENT  TIME_KMS MAX_PROXIMITY_KMS  \n",
-       "0              9495                 103956       0.0             -66.8  \n",
-       "1              9495                 103956       0.0             -60.8  \n",
-       "2              9495                 103956       0.0             -74.4  \n",
-       "3              9495                 103956       0.0             -78.0  \n",
-       "4              9495                 103956       0.0             -71.1  \n",
+       "  SUM_PROXIMITY_KMS  \n",
+       "0            -274.6  \n",
+       "1            -244.2  \n",
+       "2            -300.2  \n",
+       "3            -318.9  \n",
+       "4            -214.1  \n",
        "\n",
-       "[5 rows x 35 columns]"
+       "[5 rows x 37 columns]"
       ]
      },
-     "execution_count": 47,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "df.head()"
+    "df[df.MEASUREMENT == 1].head()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 88,
-   "id": "6b4cd796",
    "metadata": {},
    "outputs": [
     {
@@ -614,7 +616,6 @@
   {
    "cell_type": "code",
    "execution_count": 67,
-   "id": "362630f1",
    "metadata": {},
    "outputs": [
     {
@@ -903,7 +904,6 @@
   },
   {
    "cell_type": "markdown",
-   "id": "b01e5c4c",
    "metadata": {},
    "source": [
     "## EDA"
@@ -912,7 +912,6 @@
   {
    "cell_type": "code",
    "execution_count": 125,
-   "id": "fb8764f0",
    "metadata": {},
    "outputs": [
     {
@@ -936,7 +935,6 @@
   {
    "cell_type": "code",
    "execution_count": 123,
-   "id": "5bd6b37a",
    "metadata": {},
    "outputs": [
     {
@@ -961,7 +959,6 @@
   {
    "cell_type": "code",
    "execution_count": 126,
-   "id": "f796d170",
    "metadata": {},
    "outputs": [
     {
@@ -985,7 +982,6 @@
   {
    "cell_type": "code",
    "execution_count": 127,
-   "id": "4fcb7ddf",
    "metadata": {},
    "outputs": [
     {
@@ -1009,7 +1005,6 @@
   {
    "cell_type": "code",
    "execution_count": 144,
-   "id": "4cbcf0eb",
    "metadata": {},
    "outputs": [
     {
@@ -1038,7 +1033,6 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3039f4ab",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -1060,7 +1054,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.8"
+   "version": "3.8.3"
   }
  },
  "nbformat": 4,
-- 
GitLab