Original files for the project exceed GitHub's maximum file size limit. Please contact me at [email protected] to access complete code and data files for the project.
#Do all necessary Pacakge Imports
from imports import *
#Create a dataHandler class for our dataframe
data = dh()
#Get Data for crisis in asia and southern asia
southern_asia_df = data.filter_data(region=['Southern Asia', 'South-Eastern Asia'])
asia_df = data.filter_data(region=['Middle East', 'Southern Asia', 'South-Eastern Asia'])
#Get Data for crisis in asia and southern asia caused by rebel groups
asia_rebel_groups_df = data.filter_data(region=['Middle East', 'Southern Asia', 'South-Eastern Asia'], rebel_groups=True)
southern_asia_rebel_groups_df = data.filter_data(region=['Southern Asia'], rebel_groups=True)
NOTE: To better understand the predictors look at documentation in Understanding_data folder.
data.crisis_to_fatalities_graph(asia_df, 'country_paired')
data.crisis_to_fatalities_graph(asia_df, type='crisis map')
data.crisis_to_fatalities_graph(asia_rebel_groups_df, 'country_paired')
data.crisis_to_fatalities_graph(asia_rebel_groups_df, type='crisis map')
data.crisis_to_fatalities_graph(asia_df, type='event_paired')
data.crisis_to_fatalities_graph(asia_rebel_groups_df, type='event_paired')
#Get Data for crisis in Afghanistan caused by rebel groups
afg_rebel_groups_df = data.filter_data(country=['Afghanistan'], rebel_groups=True)
#Get Random Forest [the best parameters have been found using grid search]
afg_rebel_groups_rf = data.random_forest(afg_rebel_groups_df, max_depth= 30, min_samples_leaf=1, min_samples_split = 10, n_estimators=500, model_name = 'afghanistan')
asia_rebel_groups_rf = data.load_model('Afghanistan')
data.plot_confusion_matrix(afg_rebel_groups_df, afg_rebel_groups_rf, dataset_type = 'train', normalize = True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fab923ebe48>
data.plot_confusion_matrix(afg_rebel_groups_df, afg_rebel_groups_rf, dataset_type = 'test', normalize = True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fab444f72b0>
data.classifier_accuracy(afg_rebel_groups_df, afg_rebel_groups_rf)
{'training_accuracy': 0.8340914013428633,
'testing_accuracy': 0.8232618583495777,
'training_F1_weighted': 0.7805860368800424,
'testing_F1_weighted': 0.7674833570687929,
'training_precision_weighted': 0.8515754941098571,
'testing_precision_weighted': 0.8386940263095153,
'training_recall_weighted': 0.8340914013428633,
'testing_recall_weighted': 0.8232618583495777}
southern_asia_rebel_groups_rf = data.load_model('southern_asia')
data.plot_confusion_matrix(southern_asia_rebel_groups_df, southern_asia_rebel_groups_rf, dataset_type = 'train', normalize = True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fc528549f28>
data.plot_confusion_matrix(southern_asia_rebel_groups_df, southern_asia_rebel_groups_rf, dataset_type = 'test', normalize = True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fc528549ac8>
data.classifier_accuracy(southern_asia_rebel_groups_df, southern_asia_rebel_groups_rf)
{'training_accuracy': 0.8423524022218405,
'testing_accuracy': 0.8268338773406632,
'training_F1_weighted': 0.7898185645638188,
'testing_F1_weighted': 0.7707830293459663,
'training_precision_weighted': 0.8712670574510711,
'testing_precision_weighted': 0.8306234964296919,
'training_recall_weighted': 0.8423524022218405,
'testing_recall_weighted': 0.8268338773406632}
asia_rebel_groups_rf = data.load_model('asia')
data.plot_confusion_matrix(asia_rebel_groups_df, asia_rebel_groups_rf, dataset_type = 'train', normalize = True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fc539b6a5c0>
data.plot_confusion_matrix(asia_rebel_groups_df, asia_rebel_groups_rf, dataset_type = 'test', normalize = True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fc549f98b00>
data.classifier_accuracy(asia_rebel_groups_df, asia_rebel_groups_rf)
{'training_accuracy': 0.853942423468457,
'testing_accuracy': 0.84090411558669,
'training_F1_weighted': 0.8372359852046187,
'testing_F1_weighted': 0.8226387702490333,
'training_precision_weighted': 0.8441646669875207,
'testing_precision_weighted': 0.8279729962978163,
'training_recall_weighted': 0.853942423468457,
'testing_recall_weighted': 0.84090411558669}
Create Dataset for Islamic State classification
isis_df = data.filter_data(region = ['Southern Asia', 'Middle East', 'South-Eastern Asia'], rebel_groups= True, actor_name= 'islamic state')
isis_rf = data.load_model('isis')
data.plot_confusion_matrix(isis_df, isis_rf, dataset_type = 'train', normalize = True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe8183ffe48>
data.plot_confusion_matrix(isis_df, isis_rf, dataset_type = 'test', normalize = True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fe85d373588>
data.classifier_accuracy(isis_df, isis_rf)
{'training_accuracy': 0.925958500276882,
'testing_accuracy': 0.9200664484291787,
'training_F1_weighted': 0.9238511827444541,
'testing_F1_weighted': 0.9177113406155695,
'training_precision_weighted': 0.9330340541392627,
'testing_precision_weighted': 0.9277676133005858,
'training_recall_weighted': 0.925958500276882,
'testing_recall_weighted': 0.9200664484291787}