{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "843a017a",
"metadata": {},
"source": [
"Cleaning and Prepping Data"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "ba6cd5c4",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Number of Cows | \n",
" Number of Buffaloes | \n",
" Location of Farm | \n",
" Average Daily Milk Production (litres) | \n",
" Milk Collection Centre | \n",
" Yearly Expenditure on Animal Health (INR) | \n",
" Yeary Income from Selling Manure (INR) | \n",
" Primary Feed for Livestock | \n",
" Satisfaction with Government Support | \n",
" Monthly Operating Costs (INR) | \n",
" Monthly Revenue (INR) | \n",
" Use of Automation | \n",
" Number of Family Members/Employees Working at the Farm | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 172 | \n",
" 11 | \n",
" jamnagar | \n",
" 1075 | \n",
" mother dairy | \n",
" 50642 | \n",
" 43224 | \n",
" company products | \n",
" 7.0 | \n",
" 92664 | \n",
" 76967.0 | \n",
" no | \n",
" 38 | \n",
"
\n",
" \n",
" 1 | \n",
" 47 | \n",
" 23 | \n",
" vadodara | \n",
" 350 | \n",
" dudhsagar dairy | \n",
" 99740 | \n",
" 7011 | \n",
" company products | \n",
" 3.0 | \n",
" 40929 | \n",
" 42516.0 | \n",
" no | \n",
" 39 | \n",
"
\n",
" \n",
" 2 | \n",
" 117 | \n",
" 187 | \n",
" rajkot | \n",
" 1520 | \n",
" aavin | \n",
" 95110 | \n",
" 7011 | \n",
" company products | \n",
" 9.0 | \n",
" 82689 | \n",
" 64924.0 | \n",
" yes | \n",
" 12 | \n",
"
\n",
" \n",
" 3 | \n",
" 192 | \n",
" 130 | \n",
" jamnagar | \n",
" 1610 | \n",
" selling privately to consumers | \n",
" 24881 | \n",
" 40605 | \n",
" company products | \n",
" 1.0 | \n",
" 191993 | \n",
" 88905.0 | \n",
" yes | \n",
" 44 | \n",
"
\n",
" \n",
" 4 | \n",
" 323 | \n",
" 98 | \n",
" ahmedabad | \n",
" 2360 | \n",
" verka | \n",
" 54038 | \n",
" 19521 | \n",
" natural plants | \n",
" 5.0 | \n",
" 47018 | \n",
" 82671.0 | \n",
" yes | \n",
" 25 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Number of Cows Number of Buffaloes Location of Farm \\\n",
"0 172 11 jamnagar \n",
"1 47 23 vadodara \n",
"2 117 187 rajkot \n",
"3 192 130 jamnagar \n",
"4 323 98 ahmedabad \n",
"\n",
" Average Daily Milk Production (litres) Milk Collection Centre \\\n",
"0 1075 mother dairy \n",
"1 350 dudhsagar dairy \n",
"2 1520 aavin \n",
"3 1610 selling privately to consumers \n",
"4 2360 verka \n",
"\n",
" Yearly Expenditure on Animal Health (INR) \\\n",
"0 50642 \n",
"1 99740 \n",
"2 95110 \n",
"3 24881 \n",
"4 54038 \n",
"\n",
" Yeary Income from Selling Manure (INR) Primary Feed for Livestock \\\n",
"0 43224 company products \n",
"1 7011 company products \n",
"2 7011 company products \n",
"3 40605 company products \n",
"4 19521 natural plants \n",
"\n",
" Satisfaction with Government Support Monthly Operating Costs (INR) \\\n",
"0 7.0 92664 \n",
"1 3.0 40929 \n",
"2 9.0 82689 \n",
"3 1.0 191993 \n",
"4 5.0 47018 \n",
"\n",
" Monthly Revenue (INR) Use of Automation \\\n",
"0 76967.0 no \n",
"1 42516.0 no \n",
"2 64924.0 yes \n",
"3 88905.0 yes \n",
"4 82671.0 yes \n",
"\n",
" Number of Family Members/Employees Working at the Farm \n",
"0 38 \n",
"1 39 \n",
"2 12 \n",
"3 44 \n",
"4 25 "
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"file_path = '/Users/dhruvtrivedi/Downloads/Final Project Stat 371/Farm_Data_Gujarat.csv'\n",
"farm_data = pd.read_csv(file_path)\n",
"\n",
"# Display the first few rows\n",
"farm_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "e7c6aa30",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"vadodara 60\n",
"jamnagar 59\n",
"surat 55\n",
"ahmedabad 52\n",
"rajkot 51\n",
"Name: Location of Farm, dtype: int64"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Checking the frequency of each category in 'Location of Farm'\n",
"location_counts = farm_data['Location of Farm'].value_counts()\n",
"location_counts\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4a751c20",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Number of Cows | \n",
" Number of Buffaloes | \n",
" Average Daily Milk Production (litres) | \n",
" Milk Collection Centre | \n",
" Yearly Expenditure on Animal Health (INR) | \n",
" Yeary Income from Selling Manure (INR) | \n",
" Primary Feed for Livestock | \n",
" Satisfaction with Government Support | \n",
" Monthly Operating Costs (INR) | \n",
" Monthly Revenue (INR) | \n",
" Use of Automation | \n",
" Number of Family Members/Employees Working at the Farm | \n",
" ahmedabad | \n",
" jamnagar | \n",
" rajkot | \n",
" surat | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 172 | \n",
" 11 | \n",
" 1075 | \n",
" mother dairy | \n",
" 50642 | \n",
" 43224 | \n",
" company products | \n",
" 7.0 | \n",
" 92664 | \n",
" 76967.0 | \n",
" no | \n",
" 38 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 47 | \n",
" 23 | \n",
" 350 | \n",
" dudhsagar dairy | \n",
" 99740 | \n",
" 7011 | \n",
" company products | \n",
" 3.0 | \n",
" 40929 | \n",
" 42516.0 | \n",
" no | \n",
" 39 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 117 | \n",
" 187 | \n",
" 1520 | \n",
" aavin | \n",
" 95110 | \n",
" 7011 | \n",
" company products | \n",
" 9.0 | \n",
" 82689 | \n",
" 64924.0 | \n",
" yes | \n",
" 12 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" 192 | \n",
" 130 | \n",
" 1610 | \n",
" selling privately to consumers | \n",
" 24881 | \n",
" 40605 | \n",
" company products | \n",
" 1.0 | \n",
" 191993 | \n",
" 88905.0 | \n",
" yes | \n",
" 44 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" 323 | \n",
" 98 | \n",
" 2360 | \n",
" verka | \n",
" 54038 | \n",
" 19521 | \n",
" natural plants | \n",
" 5.0 | \n",
" 47018 | \n",
" 82671.0 | \n",
" yes | \n",
" 25 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Number of Cows Number of Buffaloes \\\n",
"0 172 11 \n",
"1 47 23 \n",
"2 117 187 \n",
"3 192 130 \n",
"4 323 98 \n",
"\n",
" Average Daily Milk Production (litres) Milk Collection Centre \\\n",
"0 1075 mother dairy \n",
"1 350 dudhsagar dairy \n",
"2 1520 aavin \n",
"3 1610 selling privately to consumers \n",
"4 2360 verka \n",
"\n",
" Yearly Expenditure on Animal Health (INR) \\\n",
"0 50642 \n",
"1 99740 \n",
"2 95110 \n",
"3 24881 \n",
"4 54038 \n",
"\n",
" Yeary Income from Selling Manure (INR) Primary Feed for Livestock \\\n",
"0 43224 company products \n",
"1 7011 company products \n",
"2 7011 company products \n",
"3 40605 company products \n",
"4 19521 natural plants \n",
"\n",
" Satisfaction with Government Support Monthly Operating Costs (INR) \\\n",
"0 7.0 92664 \n",
"1 3.0 40929 \n",
"2 9.0 82689 \n",
"3 1.0 191993 \n",
"4 5.0 47018 \n",
"\n",
" Monthly Revenue (INR) Use of Automation \\\n",
"0 76967.0 no \n",
"1 42516.0 no \n",
"2 64924.0 yes \n",
"3 88905.0 yes \n",
"4 82671.0 yes \n",
"\n",
" Number of Family Members/Employees Working at the Farm ahmedabad \\\n",
"0 38 0 \n",
"1 39 0 \n",
"2 12 0 \n",
"3 44 0 \n",
"4 25 1 \n",
"\n",
" jamnagar rajkot surat \n",
"0 1 0 0 \n",
"1 0 0 0 \n",
"2 0 1 0 \n",
"3 1 0 0 \n",
"4 0 0 0 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Correcting the dummy variables for 'Location of Farm', with 'Vadodara' as the baseline\n",
"location_dummies_corrected = pd.get_dummies(farm_data['Location of Farm']).drop(['vadodara'], axis=1)\n",
"\n",
"# Removing the original 'Location of Farm' column\n",
"transformed_farm_data = farm_data.drop('Location of Farm', axis=1).join(location_dummies_corrected)\n",
"transformed_farm_data.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a02ae613",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Satisfaction_1_4 145\n",
"Satisfaction_8_10 77\n",
"Satisfaction_5_7 70\n",
"Name: satisfaction_with_government_support, dtype: int64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Categorizing 'Satisfaction with Government Support' into three groups\n",
"def categorize_satisfaction(score):\n",
" if 1 <= score <= 4:\n",
" return 'Satisfaction_1_4'\n",
" elif 5 <= score <= 7:\n",
" return 'Satisfaction_5_7'\n",
" elif 8 <= score <= 10:\n",
" return 'Satisfaction_8_10'\n",
"\n",
"# Applying the categorization\n",
"transformed_farm_data['satisfaction_with_government_support'] = farm_data['Satisfaction with Government Support'].apply(categorize_satisfaction)\n",
"\n",
"# Checking the frequency \n",
"satisfaction_counts = transformed_farm_data['satisfaction_with_government_support'].value_counts()\n",
"satisfaction_counts\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "85dca4e7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"kwality limited 36\n",
"aavin 33\n",
"mother dairy 32\n",
"parag milk foods ltd 31\n",
"orissa state cooperative milk producers federation 29\n",
"amul 28\n",
"selling privately to consumers 27\n",
"dudhsagar dairy 24\n",
"karnataka co-operative milk federation 20\n",
"verka 16\n",
"dynamix dairy 16\n",
"milk collection centre 7\n",
"Name: Milk Collection Centre, dtype: int64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Dropping the original 'Satisfaction with Government Support' variable\n",
"transformed_farm_data = transformed_farm_data.drop('Satisfaction with Government Support', axis=1)\n",
"\n",
"# Proceeding with the 'Milk Collection Centre' variable\n",
"# Checking the frequency of each category in 'Milk Collection Centre'\n",
"milk_collection_counts = transformed_farm_data['Milk Collection Centre'].value_counts()\n",
"milk_collection_counts\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "000b2105",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Number of Cows | \n",
" Number of Buffaloes | \n",
" Average Daily Milk Production (litres) | \n",
" Yearly Expenditure on Animal Health (INR) | \n",
" Yeary Income from Selling Manure (INR) | \n",
" Primary Feed for Livestock | \n",
" Monthly Operating Costs (INR) | \n",
" Monthly Revenue (INR) | \n",
" Use of Automation | \n",
" Number of Family Members/Employees Working at the Farm | \n",
" ... | \n",
" amul | \n",
" dudhsagar dairy | \n",
" dynamix dairy | \n",
" karnataka co-operative milk federation | \n",
" milk collection centre | \n",
" mother dairy | \n",
" orissa state cooperative milk producers federation | \n",
" parag milk foods ltd | \n",
" selling privately to consumers | \n",
" verka | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 172 | \n",
" 11 | \n",
" 1075 | \n",
" 50642 | \n",
" 43224 | \n",
" company products | \n",
" 92664 | \n",
" 76967.0 | \n",
" no | \n",
" 38 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 47 | \n",
" 23 | \n",
" 350 | \n",
" 99740 | \n",
" 7011 | \n",
" company products | \n",
" 40929 | \n",
" 42516.0 | \n",
" no | \n",
" 39 | \n",
" ... | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 117 | \n",
" 187 | \n",
" 1520 | \n",
" 95110 | \n",
" 7011 | \n",
" company products | \n",
" 82689 | \n",
" 64924.0 | \n",
" yes | \n",
" 12 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" 192 | \n",
" 130 | \n",
" 1610 | \n",
" 24881 | \n",
" 40605 | \n",
" company products | \n",
" 191993 | \n",
" 88905.0 | \n",
" yes | \n",
" 44 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" 323 | \n",
" 98 | \n",
" 2360 | \n",
" 54038 | \n",
" 19521 | \n",
" natural plants | \n",
" 47018 | \n",
" 82671.0 | \n",
" yes | \n",
" 25 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 26 columns
\n",
"
"
],
"text/plain": [
" Number of Cows Number of Buffaloes \\\n",
"0 172 11 \n",
"1 47 23 \n",
"2 117 187 \n",
"3 192 130 \n",
"4 323 98 \n",
"\n",
" Average Daily Milk Production (litres) \\\n",
"0 1075 \n",
"1 350 \n",
"2 1520 \n",
"3 1610 \n",
"4 2360 \n",
"\n",
" Yearly Expenditure on Animal Health (INR) \\\n",
"0 50642 \n",
"1 99740 \n",
"2 95110 \n",
"3 24881 \n",
"4 54038 \n",
"\n",
" Yeary Income from Selling Manure (INR) Primary Feed for Livestock \\\n",
"0 43224 company products \n",
"1 7011 company products \n",
"2 7011 company products \n",
"3 40605 company products \n",
"4 19521 natural plants \n",
"\n",
" Monthly Operating Costs (INR) Monthly Revenue (INR) Use of Automation \\\n",
"0 92664 76967.0 no \n",
"1 40929 42516.0 no \n",
"2 82689 64924.0 yes \n",
"3 191993 88905.0 yes \n",
"4 47018 82671.0 yes \n",
"\n",
" Number of Family Members/Employees Working at the Farm ... amul \\\n",
"0 38 ... 0 \n",
"1 39 ... 0 \n",
"2 12 ... 0 \n",
"3 44 ... 0 \n",
"4 25 ... 0 \n",
"\n",
" dudhsagar dairy dynamix dairy karnataka co-operative milk federation \\\n",
"0 0 0 0 \n",
"1 1 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"\n",
" milk collection centre mother dairy \\\n",
"0 0 1 \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"\n",
" orissa state cooperative milk producers federation parag milk foods ltd \\\n",
"0 0 0 \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"\n",
" selling privately to consumers verka \n",
"0 0 0 \n",
"1 0 0 \n",
"2 0 0 \n",
"3 1 0 \n",
"4 0 1 \n",
"\n",
"[5 rows x 26 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"transformed_farm_data['Milk Collection Centre'] = transformed_farm_data['Milk Collection Centre'].replace('Milk Collection Centre', pd.NA)\n",
"\n",
"# Correcting the dummy variables for 'Milk Collection Centre', with the most frequent category ('Kwality Limited') as the baseline\n",
"milk_collection_dummies = pd.get_dummies(transformed_farm_data['Milk Collection Centre']).drop(['kwality limited'], axis=1)\n",
"\n",
"# Removing the original 'Milk Collection Centre' variable\n",
"transformed_farm_data = transformed_farm_data.drop('Milk Collection Centre', axis=1).join(milk_collection_dummies)\n",
"\n",
"# Displaying the first few rows to verify the changes\n",
"transformed_farm_data.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "01b918c4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(company products 142\n",
" natural plants 131\n",
" Name: Primary Feed for Livestock, dtype: int64,\n",
" no 160\n",
" yes 113\n",
" Name: Use of Automation, dtype: int64)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Checking the frequency \n",
"primary_feed_counts = transformed_farm_data['Primary Feed for Livestock'].value_counts()\n",
"\n",
"# Checking the frequency\n",
"automation_counts = transformed_farm_data['Use of Automation'].value_counts()\n",
"\n",
"primary_feed_counts, automation_counts\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "fda822b5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Number of Cows | \n",
" Number of Buffaloes | \n",
" Average Daily Milk Production (litres) | \n",
" Yearly Expenditure on Animal Health (INR) | \n",
" Yeary Income from Selling Manure (INR) | \n",
" Monthly Operating Costs (INR) | \n",
" Monthly Revenue (INR) | \n",
" Number of Family Members/Employees Working at the Farm | \n",
" ahmedabad | \n",
" jamnagar | \n",
" ... | \n",
" dynamix dairy | \n",
" karnataka co-operative milk federation | \n",
" milk collection centre | \n",
" mother dairy | \n",
" orissa state cooperative milk producers federation | \n",
" parag milk foods ltd | \n",
" selling privately to consumers | \n",
" verka | \n",
" natural plants | \n",
" yes | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 172 | \n",
" 11 | \n",
" 1075 | \n",
" 50642 | \n",
" 43224 | \n",
" 92664 | \n",
" 76967.0 | \n",
" 38 | \n",
" 0 | \n",
" 1 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 47 | \n",
" 23 | \n",
" 350 | \n",
" 99740 | \n",
" 7011 | \n",
" 40929 | \n",
" 42516.0 | \n",
" 39 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 117 | \n",
" 187 | \n",
" 1520 | \n",
" 95110 | \n",
" 7011 | \n",
" 82689 | \n",
" 64924.0 | \n",
" 12 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 192 | \n",
" 130 | \n",
" 1610 | \n",
" 24881 | \n",
" 40605 | \n",
" 191993 | \n",
" 88905.0 | \n",
" 44 | \n",
" 0 | \n",
" 1 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 323 | \n",
" 98 | \n",
" 2360 | \n",
" 54038 | \n",
" 19521 | \n",
" 47018 | \n",
" 82671.0 | \n",
" 25 | \n",
" 1 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 26 columns
\n",
"
"
],
"text/plain": [
" Number of Cows Number of Buffaloes \\\n",
"0 172 11 \n",
"1 47 23 \n",
"2 117 187 \n",
"3 192 130 \n",
"4 323 98 \n",
"\n",
" Average Daily Milk Production (litres) \\\n",
"0 1075 \n",
"1 350 \n",
"2 1520 \n",
"3 1610 \n",
"4 2360 \n",
"\n",
" Yearly Expenditure on Animal Health (INR) \\\n",
"0 50642 \n",
"1 99740 \n",
"2 95110 \n",
"3 24881 \n",
"4 54038 \n",
"\n",
" Yeary Income from Selling Manure (INR) Monthly Operating Costs (INR) \\\n",
"0 43224 92664 \n",
"1 7011 40929 \n",
"2 7011 82689 \n",
"3 40605 191993 \n",
"4 19521 47018 \n",
"\n",
" Monthly Revenue (INR) \\\n",
"0 76967.0 \n",
"1 42516.0 \n",
"2 64924.0 \n",
"3 88905.0 \n",
"4 82671.0 \n",
"\n",
" Number of Family Members/Employees Working at the Farm ahmedabad \\\n",
"0 38 0 \n",
"1 39 0 \n",
"2 12 0 \n",
"3 44 0 \n",
"4 25 1 \n",
"\n",
" jamnagar ... dynamix dairy karnataka co-operative milk federation \\\n",
"0 1 ... 0 0 \n",
"1 0 ... 0 0 \n",
"2 0 ... 0 0 \n",
"3 1 ... 0 0 \n",
"4 0 ... 0 0 \n",
"\n",
" milk collection centre mother dairy \\\n",
"0 0 1 \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"\n",
" orissa state cooperative milk producers federation parag milk foods ltd \\\n",
"0 0 0 \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"\n",
" selling privately to consumers verka natural plants yes \n",
"0 0 0 0 0 \n",
"1 0 0 0 0 \n",
"2 0 0 0 1 \n",
"3 1 0 0 1 \n",
"4 0 1 1 1 \n",
"\n",
"[5 rows x 26 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 'Company Products' as the baseline\n",
"primary_feed_dummies = pd.get_dummies(transformed_farm_data['Primary Feed for Livestock']).drop(['company products'], axis=1)\n",
"\n",
"# 'No' as the baseline\n",
"automation_dummies = pd.get_dummies(transformed_farm_data['Use of Automation']).drop(['no'], axis=1)\n",
"\n",
"# Removing the original 'Primary Feed for Livestock' and 'Use of Automation' variables\n",
"transformed_farm_data = transformed_farm_data.drop(['Primary Feed for Livestock', 'Use of Automation'], axis=1)\n",
"\n",
"# Adding the new dummy variables\n",
"transformed_farm_data = transformed_farm_data.join(primary_feed_dummies).join(automation_dummies)\n",
"\n",
"# Display\n",
"transformed_farm_data.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "80561131",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(299, 7)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Total number of rows in the dataset\n",
"total_rows = transformed_farm_data.shape[0]\n",
"\n",
"# Counting the number of rows with NA entries\n",
"rows_with_na = transformed_farm_data.isna().any(axis=1).sum()\n",
"\n",
"total_rows, rows_with_na\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "e00c9f9e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"292"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Removing rows with NA entries\n",
"transformed_farm_data_cleaned = transformed_farm_data.dropna()\n",
"\n",
"# Counting the total number of rows after removing NA entries\n",
"total_rows_after_removal = transformed_farm_data_cleaned.shape[0]\n",
"\n",
"total_rows_after_removal\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}