diff --git a/caly_kod.ipynb b/caly_kod.ipynb index 1c16446d5270432e7c79aba0a89425d8eeff0b84..2997511c20dfe158d154009770a6c87287ee5d18 100644 --- a/caly_kod.ipynb +++ b/caly_kod.ipynb @@ -3,7 +3,6 @@ { "cell_type": "code", "execution_count": 1, - "id": "1d75aa04", "metadata": {}, "outputs": [], "source": [ @@ -20,13 +19,12 @@ { "cell_type": "code", "execution_count": 2, - "id": "18dbc1d5", "metadata": {}, "outputs": [], "source": [ "params = urllib.parse.quote_plus(\"DRIVER={ODBC Driver 17 for SQL Server};\"\n", - " \"SERVER=dbserver.mif.pg.gda.pl,1433;\"\n", - " #\"SERVER=127.0.0.1,1433;\"\n", + " #\"SERVER=dbserver.mif.pg.gda.pl,1433;\"\n", + " \"SERVER=127.0.0.1,1433;\"\n", " \"DATABASE=silkycoders;\"\n", " \"UID=LOGIN;\"\n", " \"PWD=HASLO\")\n", @@ -36,8 +34,7 @@ }, { "cell_type": "code", - "execution_count": 24, - "id": "15363396", + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -59,8 +56,7 @@ }, { "cell_type": "code", - "execution_count": 25, - "id": "0339aab0", + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -69,8 +65,7 @@ }, { "cell_type": "code", - "execution_count": 44, - "id": "25a42792", + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -98,13 +93,14 @@ " dt[\"TIME_KMS\"] = np.floor(dt.TIME_MS/1000) \n", " dt = dt.merge(dt.groupby(['EPC','TIME_KMS'])[\"PROXIMITY\"].max().reset_index(name=\"MAX_PROXIMITY_KMS\"), how=\"left\",\n", " on = ['EPC','TIME_KMS'])\n", + " dt = dt.merge(dt.groupby(['EPC','TIME_KMS'])[\"PROXIMITY\"].sum().reset_index(name=\"SUM_PROXIMITY_KMS\"), how=\"left\",\n", + " on = ['EPC','TIME_KMS'])\n", " return dt" ] }, { "cell_type": "code", - "execution_count": 45, - "id": "93baae83", + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -113,8 +109,7 @@ }, { "cell_type": "code", - "execution_count": 46, - "id": "4204fa64", + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -123,7 +118,7 @@ "text": [ "<class 'pandas.core.frame.DataFrame'>\n", "Int64Index: 502689 entries, 0 to 502688\n", - "Data columns (total 35 columns):\n", + "Data columns (total 37 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 EPC 502689 non-null object \n", @@ -158,11 +153,13 @@ " 29 TIME_MS 502689 non-null float64\n", " 30 TIME_PER_MEASUREMENT_MS 502689 non-null float64\n", " 31 NUMBER_OF_SIGNALS 502689 non-null int64 \n", - " 32 LENGTH_OF_MEASUREMENT 502689 non-null int64 \n", + " 32 LENGTH_OF_MEASUREMENT 502689 non-null float64\n", " 33 TIME_KMS 502689 non-null float64\n", - " 34 MAX_PROXIMITY_KMS 502689 non-null float64\n", - "dtypes: bool(1), float64(6), int32(4), int64(12), object(12)\n", - "memory usage: 127.0+ MB\n" + " 34 MAX_PROXIMITY_KMS_x 502689 non-null float64\n", + " 35 MAX_PROXIMITY_KMS_y 502689 non-null float64\n", + " 36 SUM_PROXIMITY_KMS 502689 non-null float64\n", + "dtypes: bool(1), float64(9), int32(4), int64(11), object(12)\n", + "memory usage: 134.7+ MB\n" ] } ], @@ -172,8 +169,7 @@ }, { "cell_type": "code", - "execution_count": 47, - "id": "05f9fc46", + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -208,8 +204,6 @@ " <th>Size</th>\n", " <th>SubclassID</th>\n", " <th>...</th>\n", - " <th>MIN</th>\n", - " <th>SEC</th>\n", " <th>MICROSEC</th>\n", " <th>MILISEC</th>\n", " <th>TIME_MS</th>\n", @@ -217,7 +211,9 @@ " <th>NUMBER_OF_SIGNALS</th>\n", " <th>LENGTH_OF_MEASUREMENT</th>\n", " <th>TIME_KMS</th>\n", - " <th>MAX_PROXIMITY_KMS</th>\n", + " <th>MAX_PROXIMITY_KMS_x</th>\n", + " <th>MAX_PROXIMITY_KMS_y</th>\n", + " <th>SUM_PROXIMITY_KMS</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", @@ -234,16 +230,16 @@ " <td>L</td>\n", " <td>86</td>\n", " <td>...</td>\n", - " <td>46</td>\n", - " <td>33</td>\n", " <td>735000</td>\n", " <td>35193735.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>9495</td>\n", - " <td>103956</td>\n", + " <td>103956.0</td>\n", " <td>0.0</td>\n", " <td>-66.8</td>\n", + " <td>-66.8</td>\n", + " <td>-274.6</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", @@ -258,16 +254,16 @@ " <td>XS</td>\n", " <td>630</td>\n", " <td>...</td>\n", - " <td>46</td>\n", - " <td>33</td>\n", " <td>747000</td>\n", " <td>35193747.0</td>\n", " <td>12.0</td>\n", " <td>12.0</td>\n", " <td>9495</td>\n", - " <td>103956</td>\n", + " <td>103956.0</td>\n", " <td>0.0</td>\n", " <td>-60.8</td>\n", + " <td>-60.8</td>\n", + " <td>-244.2</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", @@ -282,16 +278,16 @@ " <td>M</td>\n", " <td>11</td>\n", " <td>...</td>\n", - " <td>46</td>\n", - " <td>33</td>\n", " <td>751000</td>\n", " <td>35193751.0</td>\n", " <td>16.0</td>\n", " <td>16.0</td>\n", " <td>9495</td>\n", - " <td>103956</td>\n", + " <td>103956.0</td>\n", " <td>0.0</td>\n", " <td>-74.4</td>\n", + " <td>-74.4</td>\n", + " <td>-300.2</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", @@ -306,16 +302,16 @@ " <td>XL</td>\n", " <td>82</td>\n", " <td>...</td>\n", - " <td>46</td>\n", - " <td>33</td>\n", " <td>754000</td>\n", " <td>35193754.0</td>\n", " <td>19.0</td>\n", " <td>19.0</td>\n", " <td>9495</td>\n", - " <td>103956</td>\n", + " <td>103956.0</td>\n", " <td>0.0</td>\n", " <td>-78.0</td>\n", + " <td>-78.0</td>\n", + " <td>-318.9</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", @@ -330,20 +326,20 @@ " <td>38</td>\n", " <td>53</td>\n", " <td>...</td>\n", - " <td>46</td>\n", - " <td>33</td>\n", " <td>758000</td>\n", " <td>35193758.0</td>\n", " <td>23.0</td>\n", " <td>23.0</td>\n", " <td>9495</td>\n", - " <td>103956</td>\n", + " <td>103956.0</td>\n", " <td>0.0</td>\n", " <td>-71.1</td>\n", + " <td>-71.1</td>\n", + " <td>-214.1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>5 rows Ă 35 columns</p>\n", + "<p>5 rows Ă 37 columns</p>\n", "</div>" ], "text/plain": [ @@ -354,43 +350,49 @@ "3 30356847541DA78000B5BA4D -78.0 2021-10-26T09:46:33.754 1 \n", "4 30356847542A2B0000B5B215 -71.1 2021-10-26T09:46:33.758 1 \n", "\n", - " ITEMID EAN StyleColorSize StyleColor Size SubclassID ... MIN \\\n", - "0 2127269 5902805533040 RH267-85J-L RH267-85J L 86 ... 46 \n", - "1 2217401 5902851445731 RS483-99X-XS RS483-99X XS 630 ... 46 \n", - "2 2227511 5902851547602 SB281-90M-M SB281-90M M 11 ... 46 \n", - "3 2104706 5902805303667 RJ369-87X-XL RJ369-87X XL 82 ... 46 \n", - "4 2117629 5902805431803 RM119-93X-38 RM119-93X 38 53 ... 46 \n", + " ITEMID EAN StyleColorSize StyleColor Size SubclassID ... \\\n", + "0 2127269 5902805533040 RH267-85J-L RH267-85J L 86 ... \n", + "1 2217401 5902851445731 RS483-99X-XS RS483-99X XS 630 ... \n", + "2 2227511 5902851547602 SB281-90M-M SB281-90M M 11 ... \n", + "3 2104706 5902805303667 RJ369-87X-XL RJ369-87X XL 82 ... \n", + "4 2117629 5902805431803 RM119-93X-38 RM119-93X 38 53 ... \n", + "\n", + " MICROSEC MILISEC TIME_MS TIME_PER_MEASUREMENT_MS NUMBER_OF_SIGNALS \\\n", + "0 735000 35193735.0 0.0 0.0 9495 \n", + "1 747000 35193747.0 12.0 12.0 9495 \n", + "2 751000 35193751.0 16.0 16.0 9495 \n", + "3 754000 35193754.0 19.0 19.0 9495 \n", + "4 758000 35193758.0 23.0 23.0 9495 \n", "\n", - " SEC MICROSEC MILISEC TIME_MS TIME_PER_MEASUREMENT_MS \\\n", - "0 33 735000 35193735.0 0.0 0.0 \n", - "1 33 747000 35193747.0 12.0 12.0 \n", - "2 33 751000 35193751.0 16.0 16.0 \n", - "3 33 754000 35193754.0 19.0 19.0 \n", - "4 33 758000 35193758.0 23.0 23.0 \n", + " LENGTH_OF_MEASUREMENT TIME_KMS MAX_PROXIMITY_KMS_x MAX_PROXIMITY_KMS_y \\\n", + "0 103956.0 0.0 -66.8 -66.8 \n", + "1 103956.0 0.0 -60.8 -60.8 \n", + "2 103956.0 0.0 -74.4 -74.4 \n", + "3 103956.0 0.0 -78.0 -78.0 \n", + "4 103956.0 0.0 -71.1 -71.1 \n", "\n", - " NUMBER_OF_SIGNALS LENGTH_OF_MEASUREMENT TIME_KMS MAX_PROXIMITY_KMS \n", - "0 9495 103956 0.0 -66.8 \n", - "1 9495 103956 0.0 -60.8 \n", - "2 9495 103956 0.0 -74.4 \n", - "3 9495 103956 0.0 -78.0 \n", - "4 9495 103956 0.0 -71.1 \n", + " SUM_PROXIMITY_KMS \n", + "0 -274.6 \n", + "1 -244.2 \n", + "2 -300.2 \n", + "3 -318.9 \n", + "4 -214.1 \n", "\n", - "[5 rows x 35 columns]" + "[5 rows x 37 columns]" ] }, - "execution_count": 47, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df.head()" + "df[df.MEASUREMENT == 1].head()" ] }, { "cell_type": "code", "execution_count": 88, - "id": "6b4cd796", "metadata": {}, "outputs": [ { @@ -614,7 +616,6 @@ { "cell_type": "code", "execution_count": 67, - "id": "362630f1", "metadata": {}, "outputs": [ { @@ -903,7 +904,6 @@ }, { "cell_type": "markdown", - "id": "b01e5c4c", "metadata": {}, "source": [ "## EDA" @@ -912,7 +912,6 @@ { "cell_type": "code", "execution_count": 125, - "id": "fb8764f0", "metadata": {}, "outputs": [ { @@ -936,7 +935,6 @@ { "cell_type": "code", "execution_count": 123, - "id": "5bd6b37a", "metadata": {}, "outputs": [ { @@ -961,7 +959,6 @@ { "cell_type": "code", "execution_count": 126, - "id": "f796d170", "metadata": {}, "outputs": [ { @@ -985,7 +982,6 @@ { "cell_type": "code", "execution_count": 127, - "id": "4fcb7ddf", "metadata": {}, "outputs": [ { @@ -1009,7 +1005,6 @@ { "cell_type": "code", "execution_count": 144, - "id": "4cbcf0eb", "metadata": {}, "outputs": [ { @@ -1038,7 +1033,6 @@ { "cell_type": "code", "execution_count": null, - "id": "3039f4ab", "metadata": {}, "outputs": [], "source": [] @@ -1060,7 +1054,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.8.3" } }, "nbformat": 4,