{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Labelling data and comparing classification algorithms" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "from sklearn import model_selection, preprocessing\n", "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import (\n", " accuracy_score,\n", " classification_report,\n", " confusion_matrix,\n", ")\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.neural_network import MLPClassifier\n", "from sklearn.tree import DecisionTreeClassifier" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# import data\n", "df = pd.read_csv(\"data/SCADA_downtime_merged.csv\", skip_blank_lines=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# list of turbines to plot\n", "list1 = [1]\n", "# list1 = list(df['turbine_id'].unique())\n", "# sort turbines in ascending order\n", "# list1 = sorted(list1, key=int)\n", "# list of categories to plot\n", "list2 = [10, 11]\n", "# list2 = list(df['TurbineCategory_id'].unique())\n", "# list2 = [g for g in list2 if g >= 0]\n", "# sort categories in ascending order\n", "# list2 = sorted(list2, key=int)\n", "# categories to remove from plot\n", "# list2 = [m for m in list2 if m not in (1, 12, 13, 14, 15, 17, 21, 22)]" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Results for Turbine 1 with Turbine Category 11\n", "LR: 0.682084 (0.006069)\n", "LDA: 0.765452 (0.005270)\n", "KNN5: 0.889947 (0.002742)\n", "KNN15: 0.888914 (0.003590)\n", "DT: 0.851390 (0.003076)\n", "RF: 0.899010 (0.002075)\n", "NB: 0.683200 (0.006298)\n", "MLP: 0.890631 (0.004285)\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEVCAYAAADn6Y5lAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAGGhJREFUeJzt3XuUZWV95vHvk+bmBdquoUUFhsZItBkvnaTCrIlGJY4KiYrGjNKSEVg4hFkgLjSJRJyhidOJM47xEnAIIwSNtmhUDMwYIQ5EB6Mj1aYFGkTbFqVBY2O3tsitu/nNH2cXHoq6nGpO1alT+/tZ66w6e+/33fu3z+U5u969q06qCklSe/zCoAuQJM0vg1+SWsbgl6SWMfglqWUMfklqGYNfklrG4NesJLk0yX+Zo3WfkOTqaZa/MMmWudj2sEvytiQfHHQdGg4GvyaV5B+SbE+y73xts6o+WlUv6aqhkjxtvrafjjOT3JTkZ0m2JPmbJM+arxr2VFX9aVW9YdB1aDgY/HqEJCuA3wAKeMU8bXOv+djODN4HvAk4ExgBfgn4DPDbgyxqJgvksdMQMfg1mdcDXwEuBU6crmGSP0ry/SR3JnlD91F6kqVJPpxka5LvJnl7kl9olp2U5EtJ3pPkR8CaZt51zfIvNpv4epK7k7y2a5tvSfLDZrsnd82/NMkHkvxd0+dLSZ6U5L3Nby/fSPLLU+zHEcDpwOqquqaq7q+qe5rfQt45y/35cZLNSX69mX97U++JE2q9MMnfJ/lpki8kOaxr+fuafjuSrE/yG13L1iT5ZJKPJNkBnNTM+0izfL9m2Y+aWq5PclCz7ClJrkiyLcmmJP9hwno/0ezjT5NsTDI63fOv4WTwazKvBz7a3F46HhoTJTkGeDPwb4GnAS+c0OQvgKXAU4EXNOs9uWv5vwY2AwcBa7s7VtXzm7vPqarHV9XHm+knNes8GDgFuCDJsq6urwHeDhwI3A98GfhaM/1J4M+n2OcXAVuq6qtTLO91f24A/gWwDrgM+DU6j83vAecneXxX+xOAdzS1baDzeI+7HlhF5zePdcDfJNmva/lxzf48YUI/6HxYLwUObWo5Dbi3WXYZsAV4CvC7wJ8m+c2uvq9o2jwBuAI4f5rHQ0PK4NfDJHkecBjwiapaD3wbeN0UzV8D/FVVbayqe4A1XetZAhwP/HFV/bSqbgPeDfz7rv53VtVfVNWuqrqX3uwE/qSqdlbVZ4G7gad3Lb+8qtZX1X3A5cB9VfXhqtoNfByY9IifTkB+f6qN9rg/36mqv+ra1qFNrfdX1dXAA3Q+BMb976r6YlXdD5wD/JskhwJU1Ueq6kfNY/NuYN8J+/nlqvpMVT04yWO3s9mfp1XV7ubx2NGs+7nAW6vqvqraAHyQzgfYuOuq6rPNPvw18JypHhMNL4NfE50IXF1VdzXT65h6uOcpwO1d0933DwT2Br7bNe+7dI7UJ2vfqx9V1a6u6XuA7qPof+66f+8k091tH7Ze4MnTbLeX/Zm4Lapquu0/tP9VdTewjc5jSpI/SHJLkp8k+TGdI/gDJ+s7ib8GrgIua4bg/luSvZt1b6uqn06zDz/oun8PsJ/nEBYfg18PSfIYOkfxL0jygyQ/AM4CnpNksiO/7wOHdE0f2nX/LjpHnod1zfuXwB1d0wvpX8P+H+CQaca0e9mf2Xro8WqGgEaAO5vx/D+i81wsq6onAD8B0tV3yseu+W3ovKo6Evh14GV0jurvBEaS7N/HfdAQMvjV7ZXAbuBIOuPLq4CVwP/l4cMB4z4BnJxkZZLHAv9pfEEzVPAJYG2S/ZsTl28GPjKLev6Zznj6nKuqbwEfAD6Wzt8L7NOcJD0+ydl92p+JfivJ85LsQ2es/ytVdTuwP7AL2ArsleQ/Awf0utIkRyd5VjM8tYPOB9aDzbr/EfizZt+eTec8yaPZBw0hg1/dTqQzZv+9qvrB+I3OCb4TJv7KX1V/B7wfuBbYROdKIOicVAV4I/AzOidwr6MzbHTJLOpZA3youTLlNXu4T7NxJp19vQD4MZ3zG68CrmyWP9r9mWgdcC6dIZ5fpXMCGDrDNJ8DvklnKOY+Zjcs9iQ6J353ALcAX6Az/AOwGlhB5+j/cuDcqvr8o9gHDaH4RSzqlyQrgZuAfSeMw2uCJJfSuYro7YOuRe3jEb8elSSvSrJvc0nlfwWuNPSlhc3g16P1+8AP6QyL7Ab+42DLkTQTh3okqWU84pekljH4JallDH5JahmDX5JaxuCXpJYx+CWpZQx+SWoZg1+SWsbgl6SWMfglqWUMfklqGYNfklrG4JekljH4Jall9pq5yfw78MADa8WKFYMuQ5KGxvr16++qquW9tF2Qwb9ixQrGxsYGXYYkDY0k3+21rUM9ktQyBr8ktYzBL0ktY/BLUssY/JLUMga/JLWMwS9JLdNT8Cc5JsmtSTYlOXuS5cuSXJ7khiRfTfLMXvtKkubXjMGfZAlwAXAscCSwOsmRE5q9DdhQVc8GXg+8bxZ9JS0wSXq+afj0csR/FLCpqjZX1QPAZcBxE9ocCVwDUFXfAFYkOajHvpIWmKp6xG26+RouvQT/wcDtXdNbmnndvg78DkCSo4DDgEN67EvT79QkY0nGtm7d2lv1kh6VkZGRWR3Z99JuZGRkwHulmfTrf/W8E3hfkg3AjcA/Abtns4Kqugi4CGB0dNTDCGkebDtzN3BAn9c6q7e+BqCX4L8DOLRr+pBm3kOqagdwMkA6hwbfATYDj5mpr6TByXk7+j5ck4Ra09dVDrXZngeZj+GzXoL/euCIJIfTCe3jgdd1N0jyBOCeZhz/DcAXq2pHkhn7Shqsfp+gXbZsWV/XN+wmC/IkAz0/MmPwV9WuJGcAVwFLgEuqamOS05rlFwIrgQ8lKWAjcMp0fedmVyTN1lThM5sPA0/wDp8sxCdtdHS0/H/8C8OwBMCw1CnB3BzxJ1lfVaO9tF2QX8SihWMh/po6mWGpU1oIDH49ZGRkhO3bt/fUttcj7GXLlrFt27ZHU5Y0PNYs7alZnXtAz2076/3JHhY0OYNfD9m+ffucXOHRb35AaaEalqukDH49ZNZHIb2us8+89lx6dDy5q4fMxdH5XBxJz9GJMc8H6FEb5GvTk7vaI72+YBdCSHrtuRaqYXhtGvya1lQv4snmz9eHwTB9QKldhuW1afBrWsMSnAvxA0paqPwGLi0K4/8ieN26dRx++OFcc801PPDAA1xzzTUcfvjhrFu3zn8jLDUMfi0qa9eu5eKLL+boo49m77335uijj+biiy9m7dq1gy5NWjC8qkeLypIlS7jvvvvYe++9H5q3c+dO9ttvP3bv9pJNzb/5+u+cs7mqxyN+LSorV67kuuuue9i86667jpUrVw6oIrXdZN9aNt1tPhj8WlTOOeccTjnlFK699lp27tzJtddeyymnnMI555wz6NKkBcOrerSorF69GoA3vvGN3HLLLaxcuZK1a9c+NF+SY/yStCg4xi9JmpLBL0ktY/BLUssY/JLUMga/JLWMwS9JLWPwS1LLGPyS1DIGvyS1jMEvSS1j8EtSyxj8ktQyBr8ktYzBL0ktY/BLUssY/JLUMn4DlzSPZvPF2wvxS5K0OBj80jyaLMyTGPKaVw71SFLL9BT8SY5JcmuSTUnOnmT50iRXJvl6ko1JTu5adluSG5NsSOIX6UrSgM041JNkCXAB8GJgC3B9kiuq6uauZqcDN1fVy5MsB25N8tGqeqBZfnRV3dXv4qUFa83SnpvWuQf03n7NT/awIOnnehnjPwrYVFWbAZJcBhwHdAd/Afunc+bq8cA2YFefa5WGRs7b0fdx+yTUmr6uUi3Vy1DPwcDtXdNbmnndzgdWAncCNwJvqqoHm2UFfD7J+iSnTrWRJKcmGUsytnXr1p53QJI0O/06uftSYAPwFGAVcH6SA5plz6uqVcCxwOlJnj/ZCqrqoqoararR5cuX96ksaXCS9PW2bNmyQe+SFolegv8O4NCu6UOaed1OBj5dHZuA7wDPAKiqO5qfPwQupzN0JC1qVdXzrdf227ZtG/BeabHoJfivB45IcniSfYDjgSsmtPke8CKAJAcBTwc2J3lckv2b+Y8DXgLc1K/ipWEz2ZH8dPOluTDjyd2q2pXkDOAqYAlwSVVtTHJas/xC4B3ApUluBAK8taruSvJU4PLmRbwXsK6qPjdH+yIteP6hlhaCLMQX4ujoaI2Necm/JPUqyfqqGu2lrX+5K0ktY/BLUssY/JLUMga/JLWMwS9JLWPwS1LLGPyS1DIGvyS1jMEvSS1j8EtSyxj8ktQyBr8ktYzBL0ktY/BLUssY/JLUMga/JLWMwS9JLWPwS1LLGPyS1DIGvyS1jMEvSS1j8EtSyxj8ktQyBr8ktYzBL0ktY/BLUssY/JLUMga/JLWMwS9JLWPwS1LLGPyS1DIGvyS1jMEvSS1j8EtSy/QU/EmOSXJrkk1Jzp5k+dIkVyb5epKNSU7uta8kaX7NGPxJlgAXAMcCRwKrkxw5odnpwM1V9RzghcC7k+zTY19J0jzq5Yj/KGBTVW2uqgeAy4DjJrQpYP8kAR4PbAN29dhXkjSPegn+g4Hbu6a3NPO6nQ+sBO4EbgTeVFUP9tgXgCSnJhlLMrZ169Yey5ckzVa/Tu6+FNgAPAVYBZyf5IDZrKCqLqqq0aoaXb58eZ/KkiRN1Evw3wEc2jV9SDOv28nAp6tjE/Ad4Bk99pUkzaNegv964IgkhyfZBzgeuGJCm+8BLwJIchDwdGBzj30lSfNor5kaVNWuJGcAVwFLgEuqamOS05rlFwLvAC5NciMQ4K1VdRfAZH3nZlckSb1IVQ26hkcYHR2tsbGxQZchSUMjyfqqGu2lrX+5K0ktY/BLUssY/JLUMga/JLWMwS9JLWPwS1LLGPyS1DIGvyS1jMEvSS1j8EtSyxj8ktQyBr8ktYzBL0ktY/BLUssY/JLUMga/JLWMwS9JLTPjVy9qbiSZVfuF+E1pkoaTwT8gkwV5EgNe0pxzqEeSWsbgnwcjIyMkmfEG9NQuCSMjIwPeK0nDyqGeebB9+/a+D+HM9hyBJI3ziF+SWsbgl6SWcahnHtS5B8Capf1fpyTtAYN/HuS8HXMyxl9r+rpKSS1h8M+Tfp+MXbZsWV/XJ6k9DP550OvRvn/AJWk+eHJXklrG4JekljH4JallDH5JahmDX5Jaxqt6BmSqyzunmu/VPpL6pacj/iTHJLk1yaYkZ0+y/A+TbGhuNyXZnWSkWXZbkhubZWP93oFhVVWzuklSv8x4xJ9kCXAB8GJgC3B9kiuq6ubxNlX1LuBdTfuXA2dV1bau1RxdVXf1tXJJ0h7p5Yj/KGBTVW2uqgeAy4Djpmm/GvhYP4qTJPVfL8F/MHB71/SWZt4jJHkscAzwqa7ZBXw+yfokp061kSSnJhlLMrZ169YeypIk7Yl+X9XzcuBLE4Z5nldVq4BjgdOTPH+yjlV1UVWNVtXo8uXL+1yWJGlcL8F/B3Bo1/QhzbzJHM+EYZ6quqP5+UPgcjpDR5KkAekl+K8HjkhyeJJ96IT7FRMbJVkKvAD42655j0uy//h94CXATf0oXJK0Z2a8qqeqdiU5A7gKWAJcUlUbk5zWLL+wafoq4Oqq+llX94OAy5tr0/cC1lXV5/q5A5Kk2clCvEZ8dHS0xsa85F+SepVkfVWN9tLWf9kgSS1j8EtSyxj8ktQyBr8ktYzBL0ktY/BLUssY/JLUMga/JLWMwS9JLWPwS1LLGPyS1DIGvyS1jMEvSS1j8EtSyxj8ktQyBr8ktYzBL0ktY/BLUssY/JLUMga/JLWMwS9JLWPwS1LLGPyS1DIGvyS1jMEvSS1j8EtSyxj8ktQyBr8ktYzBL0ktY/BLUssY/JLUMga/JLWMwS9JLWPwS1LL9BT8SY5JcmuSTUnOnmT5HybZ0NxuSrI7yUgvfSVJ82vG4E+yBLgAOBY4Elid5MjuNlX1rqpaVVWrgD8GvlBV23rpK0maX70c8R8FbKqqzVX1AHAZcNw07VcDH9vDvpKkOdZL8B8M3N41vaWZ9whJHgscA3xqD/qemmQsydjWrVt7KEuStCf6fXL35cCXqmrbbDtW1UVVNVpVo8uXL+9zWZKkcb0E/x3AoV3ThzTzJnM8Px/mmW1fSdI86CX4rweOSHJ4kn3ohPsVExslWQq8APjb2faVJM2fvWZqUFW7kpwBXAUsAS6pqo1JTmuWX9g0fRVwdVX9bKa+/d4JSVLvUlWDruERRkdHa2xsbI/6JplV+4W4/5Jm5nv94ZKsr6rRXtrOeMQ/bKZ6cpMs+ideahPf63vOf9kgSS1j8EtSyxj8ktQyBr8ktYzBL2nBGxkZIUlPN6CndiMjIwPeq8FZdFf1SFp8tm/f3vcrdWZ7OehiMtRH/B4FSNLsDfURv0cBUjvUuQfAmqX9X2dLDXXw+2KQ2iHn7ZiTg7xa09dVDo2hDn5fDJI0e0M9xi9Jmr2hPuKX1B79Pv+2bNmyvq5vmAx98PtikBa/2Qzp+k/aZjbUwe+LQZJmzzF+SWoZg1+SWsbgl6SWGeox/slMd7J3smWO+0vDyff6nlt0we+TK7WD7/U951CPJLWMwS9JLWPwS1LLGPyS1DIGvyS1jMEvSS1j8EtSyxj8ktQyWYh/BJFkK/DdPq/2QOCuPq+z34ahRrDOfrPO/hqGOueixsOqankvDRdk8M+FJGNVNTroOqYzDDWCdfabdfbXMNQ56Bod6pGkljH4Jall2hT8Fw26gB4MQ41gnf1mnf01DHUOtMbWjPFLkjradMQvSWIRBn+SuyeZtybJHUk2JLk5yeoFWNe3knw6yZET2hyYZGeS0+a6riS/leSbSQ5rarsnyROnaFtJ3t01/QdJ1jT3T0qytdmvDUnesIBqe36SryXZleR3J2xvd1fNV+xJzTPsz/j6Nyb5epK3JPmFJC/t2u7dSW5t7n+43zXMss6bklyZ5AnN/BVJ7u2qdUOSfQZU43TPcff76htJ/keSecu6praPdE3v1bwf/lczfVKS8yfpd1uSG5PckOTqJE+aqxoXXfBP4z1VtQo4DvjLJHsPuqDGe6pqVVUdAXwcuCZJ97W4/w74CjCnH1ZJXgS8Hzi2qsb/huIu4C1TdLkf+J0kB06x/OPNfq2qqg8uoNq+B5wErJtk2b1dNb/i0dQ8hfH1/yvgxcCxwLlVddX4doEx4IRm+vVzUMNs6nwmsA04vWvZt7seo1VV9cCAapzp9Tf+fj8SeBbwgnmrDH4GPDPJY5rpFwN39Nj36Kp6Np3XwdvmojhoV/ADUFXfAu4Blg26lomq6uPA1cDrumavphNwByc5ZC62m+T5wP8EXlZV3+5adAnw2iQjk3TbRecE1VlzUdNc1VZVt1XVDcCDc1Fvr6rqh8CpwBmZ7jsEB+/LwMGDLmISvb7+9gH2A7bPeUUP91ngt5v7q4GPzbL/F4Gn9bWiLq0L/iS/AnyreeMtRF8DngGQ5FDgyVX1VeATwGvnYHv7Ap8BXllV35iw7G46AfumKfpeAJyQZOkky17d/Nr6yWY/FlJtU9mvGQb6SpJXzr7c2amqzcAS4IkztR2EJEuAFwHdw16/2DXMc8GAShs33XN8VpINwPeBb1bVhvktjcuA45PsBzwb+H+z7P8y4Ma+V9VoU/CflWQjnSdg7aCLmUb30d9r6QQ+dF5IczHcsxP4R+CUKZa/Hzgxyf4TF1TVDuDDwJkTFl0JrKiqZwF/D3xoAdU2ncOq6lfo/Mb13iS/OMt6F4vHNKH5A+AgOs/huO6hntMn7z4/ZniOx4d6ngg8Lsnx81zbDcAKOu/Zz86i67XNY38A8GdzUBrQruB/TzO2+mrg4uaTeCH6ZeCW5v5q4KQkt9E56np2kiP6vL0HgdcARyV5xJhiVf2Yznj4VG/y99IJ5sd19flRVd3fTH4Q+NWFUtt0quqO5udm4B/oPBdzJslTgd3AQvvt894mNA+jcyAy0ICfwbTPcVXtBD4HPH8+i2pcAfx3ZjfMc/T4+Z3m9T0n2hT8AFTVFXROnJw46FomSvJq4CXAx5L8EvD4qjq4qlZU1Qo6RwB9P+qvqnvojEeekGSyo+s/B34f2GuSvtvo/FbyUL8kT+5q8gp+/kE28NqmkmRZkn2b+wcCzwVu3tO6e9jecuBC4PxaoH9M0zz2ZwJvSfKIx3chmOk5bs6fPBf49mTL59glwHlVNWdDNntqMQb/Y5Ns6bq9eZI2fwK8eT4v8ZqmrrOa8dJvAb8H/GZVbaUT8JdPWMenmKOre5o30DHA25O8YsKyu5pa9p2i+7vp/LfBcWeOX7JIJzhOWii1Jfm1JFvoXC31l83wH8BKYKyp+VrgnVXV7+B/zPjlnMDn6ZzIP6/P2+irqvon4Abm+KqyR2ni6w9+PsZ/E53zKB+Y76KqaktVvX+KxSdNyIM5uXBjKv7lriS1zGI84pckTcPgl6SWMfglqWUMfklqGYNfklrG4JekljH4JallDH5Japn/D5vj9lbqJ3FZAAAAAElFTkSuQmCC", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "for x in list1:\n", " # filter only data for turbine x\n", " dfx = df[(df[\"turbine_id\"] == x)].copy()\n", "\n", " for y in list2:\n", " # copying fault to new column (mins) (fault when turbine category id\n", " # is y)\n", " def f(c):\n", " if c[\"TurbineCategory_id\"] == y:\n", " return 0\n", " else:\n", " return 1\n", "\n", " dfx[\"mins\"] = dfx.apply(f, axis=1)\n", "\n", " # sort values by timestamp in descending order\n", " dfx = dfx.sort_values(by=\"timestamp\", ascending=False)\n", "\n", " # reset index\n", " dfx.reset_index(drop=True, inplace=True)\n", "\n", " # assigning value to first cell if it's not 0\n", " if dfx.loc[0, \"mins\"] == 0:\n", " dfx.set_value(0, \"mins\", 0)\n", " else:\n", " dfx.set_value(0, \"mins\", 999999999)\n", "\n", " # using previous value's row to evaluate time\n", " for i, e in enumerate(dfx[\"mins\"]):\n", " if e == 1:\n", " dfx.at[i, \"mins\"] = dfx.at[i - 1, \"mins\"] + 10\n", "\n", " # sort in ascending order\n", " dfx = dfx.sort_values(by=\"timestamp\")\n", "\n", " # reset index\n", " dfx.reset_index(drop=True, inplace=True)\n", "\n", " # convert to hours and round to nearest hour\n", " dfx[\"hours\"] = dfx[\"mins\"].astype(np.int64)\n", " dfx[\"hours\"] = dfx[\"hours\"] / 60\n", " dfx[\"hours\"] = round(dfx[\"hours\"]).astype(np.int64)\n", "\n", " # > 48 hours - label as normal (9999)\n", " def f1(c):\n", " if c[\"hours\"] > 48:\n", " return 9999\n", " else:\n", " return c[\"hours\"]\n", "\n", " dfx[\"hours\"] = dfx.apply(f1, axis=1)\n", "\n", " # filter out curtailment - curtailed when turbine is pitching outside\n", " # 0deg <= normal <= 3.5deg\n", " def f2(c):\n", " if (\n", " 0 <= c[\"pitch\"] <= 3.5\n", " or c[\"hours\"] != 9999\n", " or (\n", " (c[\"pitch\"] > 3.5 or c[\"pitch\"] < 0)\n", " and (\n", " c[\"ap_av\"] <= (0.1 * dfx[\"ap_av\"].max())\n", " or c[\"ap_av\"] >= (0.9 * dfx[\"ap_av\"].max())\n", " )\n", " )\n", " ):\n", " return \"normal\"\n", " else:\n", " return \"curtailed\"\n", "\n", " dfx[\"curtailment\"] = dfx.apply(f2, axis=1)\n", "\n", " # filter unusual readings, i.e. for normal operation, power <= 0 in\n", " # operating wind speeds, power > 100 before cut-in, runtime < 600 and\n", " # other downtime categories\n", " def f3(c):\n", " if c[\"hours\"] == 9999 and (\n", " (\n", " 3 < c[\"ws_av\"] < 25\n", " and (\n", " c[\"ap_av\"] <= 0\n", " or c[\"runtime\"] < 600\n", " or c[\"EnvironmentalCategory_id\"] > 1\n", " or c[\"GridCategory_id\"] > 1\n", " or c[\"InfrastructureCategory_id\"] > 1\n", " or c[\"AvailabilityCategory_id\"] == 2\n", " or 12 <= c[\"TurbineCategory_id\"] <= 15\n", " or 21 <= c[\"TurbineCategory_id\"] <= 22\n", " )\n", " )\n", " or (c[\"ws_av\"] < 3 and c[\"ap_av\"] > 100)\n", " ):\n", " # remove unusual readings, i.e., zero power at operating wind\n", " # speeds, power > 0 before cut-in ...\n", " return \"unusual\"\n", " else:\n", " return \"normal\"\n", "\n", " dfx[\"unusual\"] = dfx.apply(f3, axis=1)\n", "\n", " # round to 6 hour intervals\n", " def f4(c):\n", " if 1 <= c[\"hours\"] <= 6:\n", " return 6\n", " elif 7 <= c[\"hours\"] <= 12:\n", " return 12\n", " elif 13 <= c[\"hours\"] <= 18:\n", " return 18\n", " elif 19 <= c[\"hours\"] <= 24:\n", " return 24\n", " elif 25 <= c[\"hours\"] <= 30:\n", " return 30\n", " elif 31 <= c[\"hours\"] <= 36:\n", " return 36\n", " elif 37 <= c[\"hours\"] <= 42:\n", " return 42\n", " elif 43 <= c[\"hours\"] <= 48:\n", " return 48\n", " else:\n", " return c[\"hours\"]\n", "\n", " dfx[\"hours6\"] = dfx.apply(f4, axis=1)\n", "\n", " # change label for unusual and curtailed data\n", " def f5(c):\n", " if c[\"unusual\"] == \"unusual\" or c[\"curtailment\"] == \"curtailed\":\n", " return -9999\n", " else:\n", " return c[\"hours6\"]\n", "\n", " dfx[\"hours_%s\" % y] = dfx.apply(f5, axis=1)\n", "\n", " # drop unnecessary columns\n", " dfx = dfx.drop(\"hours6\", axis=1)\n", " dfx = dfx.drop(\"hours\", axis=1)\n", " dfx = dfx.drop(\"mins\", axis=1)\n", " dfx = dfx.drop(\"curtailment\", axis=1)\n", " dfx = dfx.drop(\"unusual\", axis=1)\n", "\n", " df2 = dfx[\n", " [\n", " \"ap_av\",\n", " \"ws_av\",\n", " \"wd_av\",\n", " \"pitch\",\n", " \"ap_max\",\n", " \"ap_dev\",\n", " \"reactive_power\",\n", " \"rs_av\",\n", " \"gen_sp\",\n", " \"nac_pos\",\n", " \"hours_11\",\n", " ]\n", " ].copy()\n", " df2 = df2.dropna()\n", "\n", " # splitting data set\n", " features = [\n", " \"ap_av\",\n", " \"ws_av\",\n", " \"wd_av\",\n", " \"pitch\",\n", " \"ap_max\",\n", " \"ap_dev\",\n", " \"reactive_power\",\n", " \"rs_av\",\n", " \"gen_sp\",\n", " \"nac_pos\",\n", " ]\n", " X = df2[features]\n", " Y = df2[\"hours_11\"]\n", " Xn = preprocessing.normalize(X)\n", " validation_size = 0.20\n", " seed = 7\n", " X_train, X_validation, Y_train, Y_validation = (\n", " model_selection.train_test_split(\n", " Xn, Y, test_size=validation_size, random_state=seed\n", " )\n", " )\n", "\n", " models = []\n", " models.append((\"LR\", LogisticRegression(class_weight=\"balanced\")))\n", " models.append((\"LDA\", LinearDiscriminantAnalysis()))\n", " models.append((\"KNN5\", KNeighborsClassifier()))\n", " models.append((\"KNN15\", KNeighborsClassifier(n_neighbors=15)))\n", " models.append((\"DT\", DecisionTreeClassifier(class_weight=\"balanced\")))\n", " models.append((\"RF\", RandomForestClassifier(class_weight=\"balanced\")))\n", " models.append((\"NB\", GaussianNB()))\n", " models.append((\"MLP\", MLPClassifier()))\n", "\n", " # evaluate each model in turn\n", " results = []\n", " names = []\n", " msg1 = \"Results for Turbine %s\" % x + \" with Turbine Category %s\" % y\n", " print(msg1)\n", " for name, model in models:\n", " kfold = model_selection.KFold(n_splits=10, random_state=seed)\n", " cv_results = model_selection.cross_val_score(\n", " model, X_train, Y_train, cv=kfold, scoring=\"accuracy\"\n", " )\n", " results.append(cv_results)\n", " names.append(name)\n", " msg = \"{}: {:f} ({:f})\".format(\n", " name, cv_results.mean(), cv_results.std()\n", " )\n", " print(msg)\n", "\n", " # compare algorithms\n", " fig = plt.figure()\n", " fig.suptitle(\"Algorithm Comparison\")\n", " ax = fig.add_subplot(111)\n", " plt.boxplot(results)\n", " ax.set_xticklabels(names)\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.903127204326\n", "[[ 369 5 4 1 1 0 0 0 3 37]\n", " [ 3 57 13 1 1 0 2 0 0 202]\n", " [ 5 9 60 2 0 0 0 0 1 160]\n", " [ 1 7 6 50 3 0 0 4 0 140]\n", " [ 0 1 4 3 22 0 1 1 2 124]\n", " [ 0 0 3 0 5 33 2 2 2 98]\n", " [ 0 1 0 0 2 3 31 1 0 124]\n", " [ 4 1 0 3 0 1 1 19 3 114]\n", " [ 1 0 3 1 0 0 1 3 22 107]\n", " [ 4 22 356 9 6 6 7 3 2 14701]]\n", " precision recall f1-score support\n", "\n", " 0 0.95 0.88 0.91 420\n", " 6 0.55 0.20 0.30 279\n", " 12 0.13 0.25 0.17 237\n", " 18 0.71 0.24 0.36 211\n", " 24 0.55 0.14 0.22 158\n", " 30 0.77 0.23 0.35 145\n", " 36 0.69 0.19 0.30 162\n", " 42 0.58 0.13 0.21 146\n", " 48 0.63 0.16 0.25 138\n", " 9999 0.93 0.97 0.95 15116\n", "\n", "avg / total 0.90 0.90 0.89 17012\n", "\n" ] } ], "source": [ "clf2 = RandomForestClassifier(class_weight=\"balanced\")\n", "clf2.fit(X_train, Y_train)\n", "predictions2 = clf2.predict(X_validation)\n", "print(accuracy_score(Y_validation, predictions2))\n", "print(confusion_matrix(Y_validation, predictions2))\n", "print(classification_report(Y_validation, predictions2))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.913825534917\n", "[[ 361 6 2 3 0 0 1 0 0 47]\n", " [ 6 28 7 3 5 1 2 1 0 226]\n", " [ 7 9 40 4 3 0 0 0 0 174]\n", " [ 3 9 11 40 3 1 0 2 1 141]\n", " [ 1 0 0 7 15 3 0 2 0 130]\n", " [ 3 3 2 1 3 28 3 2 1 99]\n", " [ 2 2 0 2 1 3 30 1 1 120]\n", " [ 4 0 2 6 0 1 4 23 3 103]\n", " [ 2 0 2 1 1 0 1 1 19 111]\n", " [ 15 36 19 21 13 15 16 11 8 14962]]\n", " precision recall f1-score support\n", "\n", " 0 0.89 0.86 0.88 420\n", " 6 0.30 0.10 0.15 279\n", " 12 0.47 0.17 0.25 237\n", " 18 0.45 0.19 0.27 211\n", " 24 0.34 0.09 0.15 158\n", " 30 0.54 0.19 0.28 145\n", " 36 0.53 0.19 0.27 162\n", " 42 0.53 0.16 0.24 146\n", " 48 0.58 0.14 0.22 138\n", " 9999 0.93 0.99 0.96 15116\n", "\n", "avg / total 0.89 0.91 0.89 17012\n", "\n" ] } ], "source": [ "clf = KNeighborsClassifier()\n", "clf.fit(X_train, Y_train)\n", "predictions = clf.predict(X_validation)\n", "print(accuracy_score(Y_validation, predictions))\n", "print(confusion_matrix(Y_validation, predictions))\n", "print(classification_report(Y_validation, predictions))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "hours6\n", "0 2204\n", "6 1719\n", "12 1289\n", "18 1070\n", "24 913\n", "30 787\n", "36 756\n", "42 727\n", "48 684\n", "9999 121179\n", "dtype: int64\n" ] } ], "source": [ "print(dfx.groupby(\"hours6\").size())" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }