{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using TensorFlow backend.\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import LabelEncoder\n", "from keras.models import Model\n", "from keras.layers import LSTM, Activation, Dense, Dropout, Input, Embedding\n", "from keras.optimizers import RMSprop\n", "from keras.preprocessing.text import Tokenizer\n", "from keras.preprocessing import sequence\n", "from keras.utils import to_categorical\n", "from keras.callbacks import EarlyStopping\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
v1v2Unnamed: 2Unnamed: 3Unnamed: 4
0hamGo until jurong point, crazy.. Available only ...NaNNaNNaN
1hamOk lar... Joking wif u oni...NaNNaNNaN
2spamFree entry in 2 a wkly comp to win FA Cup fina...NaNNaNNaN
3hamU dun say so early hor... U c already then say...NaNNaNNaN
4hamNah I don't think he goes to usf, he lives aro...NaNNaNNaN
\n", "
" ], "text/plain": [ " v1 v2 Unnamed: 2 \\\n", "0 ham Go until jurong point, crazy.. Available only ... NaN \n", "1 ham Ok lar... Joking wif u oni... NaN \n", "2 spam Free entry in 2 a wkly comp to win FA Cup fina... NaN \n", "3 ham U dun say so early hor... U c already then say... NaN \n", "4 ham Nah I don't think he goes to usf, he lives aro... NaN \n", "\n", " Unnamed: 3 Unnamed: 4 \n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('spam.csv', delimiter=',', encoding='latin-1')\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 5572 entries, 0 to 5571\n", "Data columns (total 2 columns):\n", "v1 5572 non-null object\n", "v2 5572 non-null object\n", "dtypes: object(2)\n", "memory usage: 87.1+ KB\n" ] } ], "source": [ "df.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis=1, inplace=True)\n", "df.info()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
v1v2
0hamGo until jurong point, crazy.. Available only ...
1hamOk lar... Joking wif u oni...
2spamFree entry in 2 a wkly comp to win FA Cup fina...
3hamU dun say so early hor... U c already then say...
4hamNah I don't think he goes to usf, he lives aro...
\n", "
" ], "text/plain": [ " v1 v2\n", "0 ham Go until jurong point, crazy.. Available only ...\n", "1 ham Ok lar... Joking wif u oni...\n", "2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n", "3 ham U dun say so early hor... U c already then say...\n", "4 ham Nah I don't think he goes to usf, he lives aro..." ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Number of ham and spam messages')" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEWCAYAAABxMXBSAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAGctJREFUeJzt3Xm0HWWd7vHvQ8KgggISERLa0Ird\nzlMEbO2WdgDEVli2KF6HqCi2V1v7Ltux75VBXWrrFWe7aUVAW5F2jCNGBYerAokTIiq5gCQGIZgw\nOV3B3/2j3iNFPOfkFGSfIef7WWuvXfXWW7Xfqr3Pfna9NZxUFZIkTdV2M90ASdLcYnBIkgYxOCRJ\ngxgckqRBDA5J0iAGhyRpEINDU5LklCSvnaHXTpL3J9mU5Nxxpj8zyTdmom1bU5KDkqyb6XZIW2Jw\nzFFJLk1yRZLb9cqek+TsGWzWqDwMeDSwpKr2n+nGSPOdwTG3LQRePNONGCrJgoGz3AW4tKp+NYr2\nSBrG4Jjb3gT8c5JdN5+QZGmSSrKwV3Z2kue04Wcm+T9JTkxydZKLk/xVK1+b5Mokyzdb7B5JVia5\nLslXk9ylt+y/bNM2JvlJkif1pp2S5D1JPpfkV8DfjtPevZOsaPOvSfLcVn408F7gIUmuT3L8RBsj\nyZtbd9YlSR7TK39Wkgtbuy9O8rzetIOSrEvysrbOlyc5IslhSX7a2vOqSV7zsUm+m+Tatt2OG+c9\nWJ7ksiRXJfmX3vTbtG2zKcmPgAdP8jpp79WVSa5J8oMk9+5t33+b5L15W2vbtUlWJ/nr3rTjkvxX\nkg+2ec9Pcvckr2yvtTbJwZO069IkL23t+VWS9yXZM8nn2/K+lGS3Xv0Dk3yzfea+n+Sg3rRntvfn\nuvYePrWV362t0zVtG35kiut2mySntu17YXuP1/Wm753kY0k2tNd7UW/a/klWteVekeQtE22Deamq\nfMzBB3Ap8Cjg48BrW9lzgLPb8FKggIW9ec4GntOGnwncADwLWAC8FrgMeBewI3AwcB2wc6t/Shv/\nmzb9bcA32rTbAWvbshYCDwSuAu7Vm/ca4KF0P1Z2Gmd9vgq8G9gJuD+wAXhkr63fmGRbPBP4PfDc\nti7PB9YDadMfC9wVCPBw4NfAA9u0g9p2eDWwfVvGBuBDwC7AvYDfAn8+wWsfBNynrdd9gSuAIzZ7\nD/4DuA1wP+B3wD3a9DcAXwd2B/YBfgism+B1DgFWA7u29bgHsNeW3ps2/WnAHdt78xLgF2PvAXBc\nW79D2vTTgEuAf+ltj0u28Dn8NrAnsBi4EvgO8IDWlq8Ax7a6i4FfAoe17fXoNr6I7jN0LfAXre5e\n3PT5+XBrz3bt8/GwKa7bG+g+V7sBS4AfjG3ftqzV7X3fAfhz4GLgkDb9W8DT2/DOwIEz/Tc/mx4z\n3gAft/CNuyk47k33pbyI4cFxUW/afVr9PXtlvwTu34ZPAU7vTdsZuJHuC+/JwNc3a9+/974wTgFO\nm2Rd9mnL2qVX9nrglF5btxQca3rjt23rcucJ6n8SeHEbPgj4DbCgje/S5j2gV381LQym8L68FThx\ns/dgSW/6ucBRbfhi4NDetGOYODgeAfwUOBDYbrNpE743EyxrE3C/NnwcsLI37XHA9eNsj10n+Rw+\ntTf+MeA9vfF/BD7Zhl8OfGCz+c8EltMFx9XA3wO32azOacBJ/e04yfbvr9sfg6CNP4ebguMA4LLN\n5n0l8P42/DXgeGCPrfl3u6087Kqa46rqh8BngFfcgtmv6A3/pi1v87Kde+Nre697PbAR2JvuGMQB\nrfvh6iRXA08F7jzevOPYG9hYVdf1yn5G9wt1qn7Ra9uv2+DOAEkek+TbrdvparpfvHv05v1lVd3Y\nhn/TnifbDn+U5IAkZ7XujmuAf9hs2TdrG93eztiy9ubm2+VnE61cVX0FeCfdHuEVSU5KcvtelYne\nG5K8pHXVXNPW/w6btXHzdb1qnO0x7vpPMP9E2+4uwJGbfU4eRrfn9Cu6HyD/AFye5LNJ/rLN9zK6\nvaxzk1yQ5NljC9/Cum2+ffvDdwH23qwtr6LbcwI4Grg78OMk5yX5u0nWf94xOLYNx9J1KfS/aMcO\nJN+2V9b/Ir8l9hkbSLIzXRfLero/yK9W1a69x85V9fzevJPdhnk9sHuSXXplfwb8/Fa2lyQ70v0K\nfjPd3tSuwOfovoi2hg8BK+h+3d8B+LcBy76c3jalW+cJVdXbq+pBdN1ndwde2ps87nvT+vxfDjwJ\n2K2t/zUD2rg1raXb4+h/Tm5XVW8AqKozq+rRdN1UP6br4qOqflFVz62qvYHnAe9uxz22tG6X03VR\njelv67V0XXD9tuxSVYe117yoqp4C3Al4I/DR9M5gnO8Mjm1AVa0BPgK8qFe2ge6L92lJFrRfaXe9\nlS91WJKHJdkBeA1wTlWtpdvjuXuSpyfZvj0enOQeU2z/WuCbwOuT7JTkvnS/+P7zVrYXuv7rHemO\nW9yQ7qD5hAd7b4Fd6PaWfptkf+C/DZj3DOCVSXZLsoSuW2dcbXsekGR7uh8Fv6Xrjhoz0XuzC90x\nnA3AwiSvBm7PzPgg8Lgkh7TP5E7pTk5Y0g6oP759Of+OrrvsRoAkR7btA11XVLVpW1q3/vZdDLyw\nN+1c4NokL28H0RckuXeSB7fXfFqSRVX1B7ouNLj59p7XDI5txwl0/cR9z6X7VfpLul+p37yVr/Eh\nur2bjcCD6LqjaF1MBwNH0e09/ILuV9qOA5b9FLpjAuuBT9AdH1l5K9s71rYX0X2JbKL7Yl9xa5fb\n89+BE5JcR3eg9YwB8x5P1z11CfBF4AOT1L093S/wTW2eX9LtRY0Z972hO4bwebrjIz+jC5zJug1H\npgXZ4XRdQhtaO15K9z20Hd3B7fV06/Bwum0L3dlm5yS5nu69e3FVXcKW1+0EYB3d9v0S8FG6UKJ1\nxT2O7kSMS+hO5ngvXVcXwKHABe0130Z3XOq3W29rzG1jZ51ImqOSnEJ30Pd/znRbZrMkz6cLgIfP\ndFvmOvc4JG2TkuyV5KFJtkvyF3R7NJ+Y6XZtCxZuuYokzUk70J0Wvi/dcYrT6a4V0q1kV5UkaRC7\nqiRJg4y0qyrJpXS3QrgRuKGqliXZne7U0aV0V50+qao2JQnd2QuH0V0k9cyq+k5bznJg7MDfa6vq\n1Mled4899qilS5du9fWRpG3Z6tWrr6qqRVuqNx3HOP62qq7qjb8C+HJVvSHJK9r4y4HHAPu1xwHA\ne+iuRt6d7jTDZXTnb69OsqKqNk30gkuXLmXVqlWjWRtJ2kYlmfDuBX0z0VV1ODC2x3AqcESv/LTq\nfBvYNcledDdfW1lVG1tYrKQ7x1qSNANGHRwFfLHd7viYVrZnVV0O0J7v1MoXc/OLd9a1sonKbybJ\nMe02yKs2bNiwlVdDkjRm1F1VD62q9UnuBKxM8uNJ6o5375yapPzmBVUn0d1Bk2XLlnmqmCSNyEj3\nOKpqfXu+ku7Cm/3p7uy5F3QX6NDdvx+6PYn+TciW0N1+YKJySdIMGFlwJLnd2N1O243LDqb7RzUr\n6O6/T3v+VBteATwjnQOBa1pX1pnAwe1GZbu15Zw5qnZLkiY3yq6qPYFPdGfZshD4UFV9Icl5wBnp\n/iXoZcCRrf7n6E7FXUN3Ou6zAKpqY5LXAOe1eidU1cYRtluSNIlt8srxZcuWlafjStIwSVZX1bIt\n1fPKcUnSIAaHJGkQ7447gQe99LSZboJmodVvesZMN0Gace5xSJIGMTgkSYMYHJKkQQwOSdIgBock\naRCDQ5I0iMEhSRrE4JAkDWJwSJIGMTgkSYMYHJKkQQwOSdIgBockaRCDQ5I0iMEhSRrE4JAkDWJw\nSJIGMTgkSYMYHJKkQQwOSdIgBockaRCDQ5I0iMEhSRrE4JAkDWJwSJIGMTgkSYMYHJKkQQwOSdIg\nBockaRCDQ5I0iMEhSRpk5MGRZEGS7yb5TBvfN8k5SS5K8pEkO7TyHdv4mjZ9aW8Zr2zlP0lyyKjb\nLEma2HTscbwYuLA3/kbgxKraD9gEHN3KjwY2VdXdgBNbPZLcEzgKuBdwKPDuJAumod2SpHGMNDiS\nLAEeC7y3jQd4BPDRVuVU4Ig2fHgbp01/ZKt/OHB6Vf2uqi4B1gD7j7LdkqSJjXqP463Ay4A/tPE7\nAldX1Q1tfB2wuA0vBtYCtOnXtPp/LB9nnj9KckySVUlWbdiwYWuvhySpGVlwJPk74MqqWt0vHqdq\nbWHaZPPcVFB1UlUtq6plixYtGtxeSdLULBzhsh8KPD7JYcBOwO3p9kB2TbKw7VUsAda3+uuAfYB1\nSRYCdwA29srH9OeRJE2zke1xVNUrq2pJVS2lO7j9lap6KnAW8MRWbTnwqTa8oo3Tpn+lqqqVH9XO\nutoX2A84d1TtliRNbpR7HBN5OXB6ktcC3wXe18rfB3wgyRq6PY2jAKrqgiRnAD8CbgBeUFU3Tn+z\nJUkwTcFRVWcDZ7fhixnnrKiq+i1w5ATzvw543ehaKEmaKq8clyQNYnBIkgYxOCRJgxgckqRBDA5J\n0iAGhyRpEINDkjSIwSFJGsTgkCQNYnBIkgYxOCRJgxgckqRBDA5J0iAGhyRpEINDkjSIwSFJGsTg\nkCQNYnBIkgYxOCRJgxgckqRBDA5J0iAGhyRpEINDkjSIwSFJGsTgkCQNYnBIkgYxOCRJgxgckqRB\nDA5J0iAGhyRpEINDkjSIwSFJGsTgkCQNYnBIkgYZWXAk2SnJuUm+n+SCJMe38n2TnJPkoiQfSbJD\nK9+xja9p05f2lvXKVv6TJIeMqs2SpC0b5R7H74BHVNX9gPsDhyY5EHgjcGJV7QdsAo5u9Y8GNlXV\n3YATWz2S3BM4CrgXcCjw7iQLRthuSdIkRhYc1bm+jW7fHgU8AvhoKz8VOKINH97GadMfmSSt/PSq\n+l1VXQKsAfYfVbslSZMb6TGOJAuSfA+4ElgJ/F/g6qq6oVVZByxuw4uBtQBt+jXAHfvl48zTf61j\nkqxKsmrDhg2jWB1JEiMOjqq6saruDyyh20u4x3jV2nMmmDZR+eavdVJVLauqZYsWLbqlTZYkbcG0\nnFVVVVcDZwMHArsmWdgmLQHWt+F1wD4AbfodgI398nHmkSRNs1GeVbUoya5t+DbAo4ALgbOAJ7Zq\ny4FPteEVbZw2/StVVa38qHbW1b7AfsC5o2q3JGlyC7dc5RbbCzi1nQG1HXBGVX0myY+A05O8Fvgu\n8L5W/33AB5KsodvTOAqgqi5IcgbwI+AG4AVVdeMI2y1JmsTIgqOqfgA8YJzyixnnrKiq+i1w5ATL\neh3wuq3dRknScF45LkkaxOCQJA1icEiSBplScCT58lTKJEnbvkkPjifZCbgtsEeS3bjpYrzbA3uP\nuG2SpFloS2dVPQ/4J7qQWM1NwXEt8K4RtkuSNEtNGhxV9TbgbUn+sareMU1tkiTNYlO6jqOq3pHk\nr4Cl/Xmq6rQRtUuSNEtNKTiSfAC4K/A9YOyq7QIMDkmaZ6Z65fgy4J7t3lGSpHlsqtdx/BC48ygb\nIkmaG6a6x7EH8KMk59L9S1gAqurxI2mVJGnWmmpwHDfKRkiS5o6pnlX11VE3RJI0N0z1rKrruOnf\nte4AbA/8qqpuP6qGSZJmp6nucezSH09yBOP8Tw1J0rbvFt0dt6o+CTxiK7dFkjQHTLWr6gm90e3o\nruvwmg5JmoemelbV43rDNwCXAodv9dZIkma9qR7jeNaoGyJJmhum+o+cliT5RJIrk1yR5GNJloy6\ncZKk2WeqB8ffD6yg+78ci4FPtzJJ0jwz1eBYVFXvr6ob2uMUYNEI2yVJmqWmGhxXJXlakgXt8TTg\nl6NsmCRpdppqcDwbeBLwC+By4ImAB8wlaR6a6um4rwGWV9UmgCS7A2+mCxRJ0jwy1T2O+46FBkBV\nbQQeMJomSZJms6kGx3ZJdhsbaXscU91bkSRtQ6b65f+/gW8m+SjdrUaeBLxuZK2SJM1aU71y/LQk\nq+hubBjgCVX1o5G2TJI0K025u6kFhWEhSfPcLbqtuiRp/jI4JEmDGBySpEFGFhxJ9klyVpILk1yQ\n5MWtfPckK5Nc1J53a+VJ8vYka5L8IMkDe8ta3upflGT5qNosSdqyUe5x3AC8pKruARwIvCDJPYFX\nAF+uqv2AL7dxgMcA+7XHMcB74I/XjBwLHED3f86P7V9TIkmaXiMLjqq6vKq+04avAy6kuyX74cCp\nrdqpwBFt+HDgtOp8G9g1yV7AIcDKqtrYrl5fCRw6qnZLkiY3Lcc4kiylu0XJOcCeVXU5dOEC3KlV\nWwys7c22rpVNVL75axyTZFWSVRs2bNjaqyBJakYeHEl2Bj4G/FNVXTtZ1XHKapLymxdUnVRVy6pq\n2aJF/qsQSRqVkQZHku3pQuM/q+rjrfiK1gVFe76yla8D9unNvgRYP0m5JGkGjPKsqgDvAy6sqrf0\nJq0Axs6MWg58qlf+jHZ21YHANa0r60zg4CS7tYPiB7cySdIMGOUdbh8KPB04P8n3WtmrgDcAZyQ5\nGrgMOLJN+xxwGLAG+DXtH0VV1cYkrwHOa/VOaLd1lyTNgJEFR1V9g/GPTwA8cpz6BbxggmWdDJy8\n9VonSbqlvHJckjSIwSFJGsTgkCQNYnBIkgYxOCRJgxgckqRBDA5J0iAGhyRpEINDkjSIwSFJGsTg\nkCQNYnBIkgYxOCRJgxgckqRBDA5J0iAGhyRpEINDkjSIwSFJGsTgkCQNYnBIkgYxOCRJgxgckqRB\nDA5J0iAGhyRpEINDkjSIwSFJGsTgkCQNYnBIkgYxOCRJgxgckqRBDA5J0iAGhyRpEINDkjSIwSFJ\nGmRkwZHk5CRXJvlhr2z3JCuTXNSed2vlSfL2JGuS/CDJA3vzLG/1L0qyfFTtlSRNzSj3OE4BDt2s\n7BXAl6tqP+DLbRzgMcB+7XEM8B7oggY4FjgA2B84dixsJEkzY2TBUVVfAzZuVnw4cGobPhU4old+\nWnW+DeyaZC/gEGBlVW2sqk3ASv40jCRJ02i6j3HsWVWXA7TnO7XyxcDaXr11rWyi8j+R5Jgkq5Ks\n2rBhw1ZvuCSpM1sOjmecspqk/E8Lq06qqmVVtWzRokVbtXGSpJtMd3Bc0bqgaM9XtvJ1wD69ekuA\n9ZOUS5JmyHQHxwpg7Myo5cCneuXPaGdXHQhc07qyzgQOTrJbOyh+cCuTJM2QhaNacJIPAwcBeyRZ\nR3d21BuAM5IcDVwGHNmqfw44DFgD/Bp4FkBVbUzyGuC8Vu+Eqtr8gLskaRqNLDiq6ikTTHrkOHUL\neMEEyzkZOHkrNk2SdCvMloPjkqQ5wuCQJA1icEiSBjE4JEmDGBySpEFGdlaVpNG47IT7zHQTNAv9\n2avPn7bXco9DkjSIwSFJGsTgkCQNYnBIkgYxOCRJgxgckqRBDA5J0iAGhyRpEINDkjSIwSFJGsTg\nkCQNYnBIkgYxOCRJgxgckqRBDA5J0iAGhyRpEINDkjSIwSFJGsTgkCQNYnBIkgYxOCRJgxgckqRB\nDA5J0iAGhyRpEINDkjSIwSFJGsTgkCQNYnBIkgaZM8GR5NAkP0myJskrZro9kjRfzYngSLIAeBfw\nGOCewFOS3HNmWyVJ89OcCA5gf2BNVV1cVf8POB04fIbbJEnz0sKZbsAULQbW9sbXAQf0KyQ5Bjim\njV6f5CfT1Lb5YA/gqpluxGyQNy+f6Sbo5vxsjjk2W2Mpd5lKpbkSHONtkbrZSNVJwEnT05z5Jcmq\nqlo20+2QNudnc2bMla6qdcA+vfElwPoZaoskzWtzJTjOA/ZLsm+SHYCjgBUz3CZJmpfmRFdVVd2Q\n5IXAmcAC4OSqumCGmzWf2AWo2crP5gxIVW25liRJzVzpqpIkzRIGhyRpEINjHkuyNMkPZ7odkuYW\ng0OSNIjBoQVJ/iPJBUm+mOQ2SZ6b5Lwk30/ysSS3BUhySpL3JDkrycVJHp7k5CQXJjllhtdDc1yS\n2yX5bPvc/TDJk5NcmuSNSc5tj7u1uo9Lck6S7yb5UpI9W/lxSU5tn+VLkzwhyb8mOT/JF5JsP7Nr\nuW0wOLQf8K6quhdwNfD3wMer6sFVdT/gQuDoXv3dgEcA/wP4NHAicC/gPknuP60t17bmUGB9Vd2v\nqu4NfKGVX1tV+wPvBN7ayr4BHFhVD6C7d93Lesu5K/BYuvvZfRA4q6ruA/ymletWMjh0SVV9rw2v\nBpYC907y9STnA0+lC4Yxn67uHO7zgSuq6vyq+gNwQZtXuqXOBx7V9jD+uqquaeUf7j0/pA0vAc5s\nn9GXcvPP6Oer6vdteQu4KYDOx8/oVmFw6He94RvpLgo9BXhh+5V2PLDTOPX/sNm8f2COXFCq2amq\nfgo8iO4L/vVJXj02qV+tPb8DeGf7jD6PcT6j7QfN7+umi9X8jG4lBofGswtweesPfupMN0bzQ5K9\ngV9X1QeBNwMPbJOe3Hv+Vhu+A/DzNuwti6eZ6avx/C/gHOBndL/+dpnZ5mieuA/wpiR/AH4PPB/4\nKLBjknPofug+pdU9DvivJD8Hvg3sO/3Nnb+85YikWSvJpcCyqvJ/bswidlVJkgZxj0OSNIh7HJKk\nQQwOSdIgBockaRCDQ7oVklw/oO5xSf55VMuXpovBIUkaxOCQtrKJ7tza3C/JV5JclOS5vXle2u5I\n/IMkx89As6UpMzikrW+yO7fel+4OrQ8BXp1k7yQH092leH/g/sCDkvzNNLdZmjJvOSJtfUuAjyTZ\nC9gBuKQ37VNV9RvgN0nOoguLhwEHA99tdXamC5KvTV+TpakzOKSt7x3AW6pqRZKD6O6rNGbzK24L\nCPD6qvr36WmedOvYVSVtfZPdufXwJDsluSNwEHAecCbw7CQ7AyRZnORO09VYaSj3OKRb57ZJ1vXG\n38Lkd249F/gs8GfAa6pqPbA+yT2AbyUBuB54GnDl6JsvDee9qiRJg9hVJUkaxOCQJA1icEiSBjE4\nJEmDGBySpEEMDknSIAaHJGmQ/w8HGwM7lDAXhgAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.countplot(df.v1)\n", "plt.xlabel('Label')\n", "plt.title('Number of ham and spam messages')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0],\n", " [0],\n", " [1],\n", " ...,\n", " [0],\n", " [0],\n", " [0]], dtype=int64)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = df.v2\n", "Y = df.v1\n", "le = LabelEncoder()\n", "Y = le.fit_transform(Y)\n", "Y = Y.reshape(-1,1)\n", "Y" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "max_words = 1000\n", "max_len = 150\n", "tok = Tokenizer(num_words=max_words)\n", "tok.fit_on_texts(X_train)\n", "sequences = tok.texts_to_sequences(X_train)\n", "sequences_matrix = sequence.pad_sequences(sequences, maxlen=max_len)\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def RNN():\n", " inputs = Input(shape=[max_len])\n", " layer = Embedding(max_words, 50, input_length=max_len)(inputs)\n", " layer = LSTM(64)(layer)\n", " layer = Dense(256)(layer)\n", " layer = Activation('relu')(layer)\n", " layer = Dropout(0.5)(layer)\n", " layer = Dense(1)(layer)\n", " layer = Activation('sigmoid')(layer)\n", " model = Model(inputs=inputs, outputs=layer)\n", " return model\n", " " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "input_1 (InputLayer) (None, 150) 0 \n", "_________________________________________________________________\n", "embedding_1 (Embedding) (None, 150, 50) 50000 \n", "_________________________________________________________________\n", "lstm_1 (LSTM) (None, 64) 29440 \n", "_________________________________________________________________\n", "dense_1 (Dense) (None, 256) 16640 \n", "_________________________________________________________________\n", "activation_1 (Activation) (None, 256) 0 \n", "_________________________________________________________________\n", "dropout_1 (Dropout) (None, 256) 0 \n", "_________________________________________________________________\n", "dense_2 (Dense) (None, 1) 257 \n", "_________________________________________________________________\n", "activation_2 (Activation) (None, 1) 0 \n", "=================================================================\n", "Total params: 96,337\n", "Trainable params: 96,337\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" ] } ], "source": [ "model = RNN()\n", "model.summary()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "model.compile(loss='binary_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train on 3788 samples, validate on 948 samples\n", "Epoch 1/10\n", "3788/3788 [==============================] - 28s 7ms/step - loss: 0.3214 - acc: 0.8783 - val_loss: 0.1130 - val_acc: 0.9705\n", "Epoch 2/10\n", "3788/3788 [==============================] - 24s 6ms/step - loss: 0.0802 - acc: 0.9807 - val_loss: 0.0539 - val_acc: 0.9800\n", "Epoch 3/10\n", "3788/3788 [==============================] - 25s 6ms/step - loss: 0.0447 - acc: 0.9894 - val_loss: 0.0536 - val_acc: 0.9852\n", "Epoch 4/10\n", "3788/3788 [==============================] - 24s 6ms/step - loss: 0.0362 - acc: 0.9900 - val_loss: 0.0489 - val_acc: 0.9842\n", "Epoch 5/10\n", "3788/3788 [==============================] - 25s 7ms/step - loss: 0.0242 - acc: 0.9923 - val_loss: 0.0622 - val_acc: 0.9831\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit(sequences_matrix, Y_train, batch_size=128, epochs=10, validation_split=0.2, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "836/836 [==============================] - 2s 2ms/step\n" ] } ], "source": [ "test_sequences = tok.texts_to_sequences(X_test)\n", "test_sequences = sequence.pad_sequences(test_sequences, maxlen=max_len)\n", "accr = model.evaluate(test_sequences, Y_test)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Test set\n", " Loss: 0.056\n", " Accuracy: 0.982\n" ] } ], "source": [ "print('Test set\\n Loss: {:0.3f}\\n Accuracy: {:0.3f}'.format(accr[0], accr[1]))" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[0.00255568]]\n" ] } ], "source": [ "text = 'hi, call me back when you have time.'\n", "seq = tok.texts_to_sequences([text])\n", "seq_matrix = sequence.pad_sequences(seq, maxlen=max_len)\n", "pred = model.predict(seq_matrix)\n", "print(pred)" ] } ], "metadata": { "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.2" } }, "nbformat": 4, "nbformat_minor": 2 }