diff --git a/Assignment/Shreesha Assignment-2 solution.ipynb b/Assignment/Shreesha Assignment-2 solution.ipynb new file mode 100644 index 0000000..7f2a816 --- /dev/null +++ b/Assignment/Shreesha Assignment-2 solution.ipynb @@ -0,0 +1,604 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "#%matplotlib notebook\n", + "%matplotlib inline\n", + "import requests" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "import the dataset into a dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " EmployeeName JobTitle \\\n", + "Id \n", + "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", + "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", + "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", + "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", + "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", + "\n", + " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", + "Id \n", + "1 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n", + "2 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", + "3 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n", + "4 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n", + "5 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n", + "\n", + " Year Notes Agency Status \n", + "Id \n", + "1 2011 NaN San Francisco NaN \n", + "2 2011 NaN San Francisco NaN \n", + "3 2011 NaN San Francisco NaN \n", + "4 2011 NaN San Francisco NaN \n", + "5 2011 NaN San Francisco NaN \n" + ] + } + ], + "source": [ + "df = pd.read_csv('Salaries.csv',index_col = 'Id')\n", + "print(df.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the column names" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n", + " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n", + " 'Status'],\n", + " dtype='object')" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the number of rows and cols" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(148654, 12)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the dataframe info (types of data in columns and not null values etc.)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.info" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display stats of the dataframe like count, mean, std, max, 25% etc....." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display null values per column" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "EmployeeName 0\n", + "JobTitle 0\n", + "BasePay 609\n", + "OvertimePay 4\n", + "OtherPay 4\n", + "Benefits 36163\n", + "TotalPay 0\n", + "TotalPayBenefits 0\n", + "Year 0\n", + "Notes 148654\n", + "Agency 0\n", + "Status 148654\n", + "dtype: int64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "null_data = df.isnull().sum()\n", + "null_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "remove columns will all values as NaN" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "null_val_col = null_data[null_data==df.shape[0]].keys()\n", + "null_val_col\n", + "df = df.drop(columns = null_val_col,axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display number of unique values in each column" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "EmployeeName 110811\n", + "JobTitle 2159\n", + "BasePay 109489\n", + "OvertimePay 65998\n", + "OtherPay 83225\n", + "Benefits 98465\n", + "TotalPay 138486\n", + "TotalPayBenefits 142098\n", + "Year 4\n", + "Agency 1\n" + ] + } + ], + "source": [ + "for col in df.columns:\n", + " print(col,df[col].nunique())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "mean of total pay of all people based on year" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Year\n", + "2011 71744.103871\n", + "2012 74113.262265\n", + "2013 77611.443142\n", + "2014 75463.918140\n", + "Name: TotalPay, dtype: float64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('Year')['TotalPay'].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "how many people have 0 overtime pay" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "77321" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df[df['OvertimePay']==0]['OvertimePay'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "max, min, mean, median and other stats of TotalPay of people having 0 OvertimePay" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 77321.000000\n", + "mean 60229.348901\n", + "std 49307.912350\n", + "min -618.130000\n", + "25% 13290.450000\n", + "50% 58158.590000\n", + "75% 91115.090000\n", + "max 567595.430000\n", + "Name: TotalPay, dtype: float64" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df['OvertimePay']==0]['TotalPay'].describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "find Id of that person with max TotalPay you got in previous question" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([1], dtype='int64', name='Id')" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df['TotalPay']==567595.430000].index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "name of employee with total pay benefits = 87619.78" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df[df['TotalPayBenefits']==87619.78])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "how many people have BasePay > 150000 and OvertimePay > 100000" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "12" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df[(df['BasePay']>150000) & (df['OvertimePay']>100000)])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "which job title generally has highest average TotalPayBenefits" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Id\n", + "1 GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY\n", + "Name: JobTitle, dtype: object" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df['TotalPay']==max(df['TotalPay'])]['JobTitle']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How many employees are POLICE" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2512" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# .str.contains()\n", + "len(df[df['JobTitle'].str.contains('POLICE')])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Assignment/assignment-1.ipynb b/Assignment/assignment-1.ipynb new file mode 100644 index 0000000..916d00c --- /dev/null +++ b/Assignment/assignment-1.ipynb @@ -0,0 +1,370 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Assignment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a python list => \\[1,2,3,4,5\\]\n", + "\n", + "Convert it into numpy array and print it" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 2, 3, 4, 5])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = [1,2,3,4,5]\n", + "a = np.array(a)\n", + "a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a python matrix (3 x 3) => \\[[1,2,3],[4,5,6],[7,8,9]\\]\n", + "\n", + "Convert it into numpy array and print it" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b = [[1,2,3],[4,5,6],[7,8,9]]\n", + "b = np.array(b)\n", + "b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a matrix (3 x 3) using built-in methods (like arange(), reshape() etc.):\n", + "\n", + "\\[ [1,3,5],\n", + "\n", + " [7,9,11],\n", + " \n", + " [13,15,17] \\]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 1, 3, 5],\n", + " [ 7, 9, 11],\n", + " [13, 15, 17]])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = np.arange(1,18,2).reshape(3,3)\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a numpy array with 10 random numbers from 0 to 10 (there should be few numbers greater than 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([3, 8, 8, 5, 1, 8, 2, 6, 5, 6])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d = np.random.randint(low = 0, high = 10, size = 10)\n", + "d" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create numpy array => \\[1,2,3,4,5\\] and convert it to 2D array with 5 rows" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1],\n", + " [2],\n", + " [3],\n", + " [4],\n", + " [5]])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "e = np.array([1,2,3,4,5])\n", + "e.reshape(5,1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print the shape of the above created array" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5,)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "e.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a numpy array with 10 elements in it. Access and print its 3rd, 4th and 9th element." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "5\n", + "9\n" + ] + } + ], + "source": [ + "f = d = np.random.randint(low = 0, high = 10, size = 10)\n", + "print(f[2])\n", + "print(f[3])\n", + "print(f[8])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print alternate elements of that array" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([4, 1, 1, 1, 9])" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f[::2]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Change last 3 elements into 100 using broadcasting and print" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 4, 4, 1, 5, 1, 5, 1, 100, 100, 100])" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f[-3:] = 100\n", + "f" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a 5 x 5 matrix (fill it with any element you like), print it.\n", + "\n", + "Then print the middle (3 x 3) matrix." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 7 14 5 20 8]\n", + " [ 4 3 1 16 11]\n", + " [ 1 10 2 13 3]\n", + " [15 24 2 3 24]\n", + " [18 24 12 5 24]]\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[ 3, 1, 16],\n", + " [10, 2, 13],\n", + " [24, 2, 3]])" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g = np.random.randint(low = 1, high = 25, size = 25).reshape(5,5)\n", + "print(g)\n", + "g[1:4,1:4]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}