In [ ]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

Chargement des contours des pays¶

In [ ]:
countries = gpd.read_file("countries.geojson")
In [ ]:
countries.plot()
Out[ ]:
<Axes: >
No description has been provided for this image
In [ ]:
countries
Out[ ]:
name ISO3166-1-Alpha-3 ISO3166-1-Alpha-2 geometry
0 Indonesia IDN ID MULTIPOLYGON (((117.70361 4.16342, 117.70361 4...
1 Malaysia MYS MY MULTIPOLYGON (((117.70361 4.16342, 117.69711 4...
2 Chile CHL CL MULTIPOLYGON (((-69.51009 -17.50659, -69.50611...
3 Bolivia BOL BO POLYGON ((-69.51009 -17.50659, -69.51009 -17.5...
4 Peru PER PE MULTIPOLYGON (((-69.51009 -17.50659, -69.63832...
... ... ... ... ...
253 Macao S.A.R MAC MO MULTIPOLYGON (((113.5586 22.16303, 113.56943 2...
254 Ashmore and Cartier Islands -99 -99 POLYGON ((123.59702 -12.42832, 123.59775 -12.4...
255 Bajo Nuevo Bank (Petrel Is.) -99 -99 POLYGON ((-79.98929 15.79495, -79.98782 15.796...
256 Serranilla Bank -99 -99 POLYGON ((-78.63707 15.86209, -78.64041 15.864...
257 Scarborough Reef -99 -99 POLYGON ((117.75389 15.15437, 117.75569 15.151...

258 rows × 4 columns

On assigne la colonne "name" comme index de la couche des pays

In [ ]:
countries = countries.set_index("name")

Les noms ne correspondent pas à ceux dans les données téléchargées sur Our World in Data, on va les renommer

In [ ]:
countries = countries.rename(index={
    "United States of America": "United States",
    "Democratic Republic of the Congo": "Democratic Republic of Congo",
    "Ivory Coast": "Cote d\'Ivoire",
    "Republic of the Congo": "Congo",
    "Czech Republic": "Czechia",
    "The Bahamas": "Bahamas",
    "Guinea Bissau": "Guinea-Bissau",
    "Federated States of Micronesia": "Micronesia (country)",
    "Macedonia": "North Macedonia",
    "eSwatini": "Eswatini",
    "Republic of Serbia": "Serbia",
    "United Republic of Tanzania": "Tanzania",
    "São Tomé and Principe": "Sao Tome and Principe",
    "Cape Verde": "Cabo Verde"
}
)

Chargement des données¶

Cas 1: On a téléchargé des données pour une seule date¶

In [ ]:
life_expectancy = pd.read_csv("life-expectancy.csv")
In [ ]:
life_expectancy.head()
Out[ ]:
Entity Code Year Period life expectancy at birth time
0 Afghanistan AFG 2023 66.0346 2023
1 Albania ALB 2023 79.6019 2023
2 Algeria DZA 2023 76.2610 2023
3 Andorra AND 2023 84.0406 2023
4 Angola AGO 2023 64.6170 2023
In [ ]:
life_expectancy = life_expectancy.set_index("Entity")
In [ ]:
life_expectancy.head()
Out[ ]:
Code Year Period life expectancy at birth time
Entity
Afghanistan AFG 2023 66.0346 2023
Albania ALB 2023 79.6019 2023
Algeria DZA 2023 76.2610 2023
Andorra AND 2023 84.0406 2023
Angola AGO 2023 64.6170 2023
In [ ]:
countries["life_expectancy"]=np.arange(len(countries))
countries.head()
Out[ ]:
ISO3166-1-Alpha-3 ISO3166-1-Alpha-2 geometry life_expectancy
name
Indonesia IDN ID MULTIPOLYGON (((117.70361 4.16342, 117.70361 4... 0
Malaysia MYS MY MULTIPOLYGON (((117.70361 4.16342, 117.69711 4... 1
Chile CHL CL MULTIPOLYGON (((-69.51009 -17.50659, -69.50611... 2
Bolivia BOL BO POLYGON ((-69.51009 -17.50659, -69.51009 -17.5... 3
Peru PER PE MULTIPOLYGON (((-69.51009 -17.50659, -69.63832... 4
In [ ]:
life_expectancy["Period life expectancy at birth"]
Out[ ]:
Period life expectancy at birth
Entity
Afghanistan 66.0346
Albania 79.6019
Algeria 76.2610
Andorra 84.0406
Angola 64.6170
... ...
Vietnam 74.5883
Western Sahara 71.3850
Yemen 69.2952
Zambia 66.3487
Zimbabwe 62.7748

201 rows × 1 columns


In [ ]:
countries["life_expectancy"]=life_expectancy["Period life expectancy at birth"]
countries.head()
Out[ ]:
ISO3166-1-Alpha-3 ISO3166-1-Alpha-2 geometry life_expectancy
name
Indonesia IDN ID MULTIPOLYGON (((117.70361 4.16342, 117.70361 4... 71.1457
Malaysia MYS MY MULTIPOLYGON (((117.70361 4.16342, 117.69711 4... 76.6571
Chile CHL CL MULTIPOLYGON (((-69.51009 -17.50659, -69.50611... 81.1667
Bolivia BOL BO POLYGON ((-69.51009 -17.50659, -69.51009 -17.5... 68.5814
Peru PER PE MULTIPOLYGON (((-69.51009 -17.50659, -69.63832... 77.7401
In [ ]:
countries.plot(column="life_expectancy", figsize=(12,6), legend=True).set_title("life expectancy")
Out[ ]:
Text(0.5, 1.0, 'life expectancy')
No description has been provided for this image

On fait la même chose pour les autres variables étudiées

In [164]:
countries["population"] = pd.read_csv("population.csv").set_index("Entity")["Population (historical)"]
In [165]:
countries["log_population"] = np.log10(countries["population"])
In [166]:
countries.plot(column="log_population", figsize=(12,6), legend=True).set_title("population (log scale)")
Out[166]:
Text(0.5, 1.0, 'population (log scale)')
No description has been provided for this image
In [169]:
countries["co2_emissions"] = pd.read_csv("co-emissions-per-capita.csv").set_index("Entity")["Annual CO₂ emissions (per capita)"]
countries.plot(column="co2_emissions", figsize=(12,6), legend=True).set_title("Emissions annuelles de CO₂ par habitant")
Out[169]:
Text(0.5, 1.0, 'Emissions annuelles de CO₂ par habitant')
No description has been provided for this image
In [170]:
countries["child_mortality"] = pd.read_csv("child-mortality.csv").set_index("Entity")["Child mortality rate"]
countries.plot(column="child_mortality", figsize=(12,6), legend=True).set_title("Taux de mortalité infantile")
Out[170]:
Text(0.5, 1.0, 'Taux de mortalité infantile')
No description has been provided for this image

Cas 2 : on a téléchargé la série temporelle¶

In [171]:
hdi = pd.read_csv("human-development-index.csv")
In [172]:
hdi.head()
Out[172]:
Entity Code Year Human Development Index World regions according to OWID
0 Afghanistan AFG 1990 0.285 NaN
1 Afghanistan AFG 1991 0.291 NaN
2 Afghanistan AFG 1992 0.301 NaN
3 Afghanistan AFG 1993 0.311 NaN
4 Afghanistan AFG 1994 0.305 NaN
In [173]:
hdi_by_year = hdi.pivot_table(index='Entity', columns='Year', values='Human Development Index')
hdi_by_year.head()
Out[173]:
Year 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 ... 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023
Entity
Afghanistan 0.285 0.291 0.301 0.311 0.305 0.329 0.334 0.338 0.338 0.347 ... 0.497000 0.49600 0.495000 0.4960 0.498000 0.507000 0.501000 0.486000 0.495000 0.496000
Africa NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 0.536723 0.54259 0.546639 0.5504 0.555833 0.560354 0.559465 0.561104 0.571485 0.576059
Albania 0.654 0.638 0.622 0.624 0.629 0.638 0.647 0.645 0.659 0.671 ... 0.797000 0.79700 0.797000 0.7980 0.801000 0.805000 0.794000 0.794000 0.806000 0.810000
Algeria 0.595 0.596 0.601 0.603 0.603 0.608 0.615 0.624 0.634 0.643 ... 0.732000 0.73700 0.743000 0.7460 0.749000 0.753000 0.742000 0.755000 0.761000 0.763000
Andorra NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 0.866000 0.86900 0.872000 0.8730 0.875000 0.876000 0.851000 0.871000 0.893000 0.913000

5 rows × 34 columns

In [174]:
def function_to_rename_columns_name(column_name):
    return f"hdi_{column_name}"

hdi_by_year = hdi_by_year.rename(columns=function_to_rename_columns_name)
hdi_by_year.head()
Out[174]:
Year hdi_1990 hdi_1991 hdi_1992 hdi_1993 hdi_1994 hdi_1995 hdi_1996 hdi_1997 hdi_1998 hdi_1999 ... hdi_2014 hdi_2015 hdi_2016 hdi_2017 hdi_2018 hdi_2019 hdi_2020 hdi_2021 hdi_2022 hdi_2023
Entity
Afghanistan 0.285 0.291 0.301 0.311 0.305 0.329 0.334 0.338 0.338 0.347 ... 0.497000 0.49600 0.495000 0.4960 0.498000 0.507000 0.501000 0.486000 0.495000 0.496000
Africa NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 0.536723 0.54259 0.546639 0.5504 0.555833 0.560354 0.559465 0.561104 0.571485 0.576059
Albania 0.654 0.638 0.622 0.624 0.629 0.638 0.647 0.645 0.659 0.671 ... 0.797000 0.79700 0.797000 0.7980 0.801000 0.805000 0.794000 0.794000 0.806000 0.810000
Algeria 0.595 0.596 0.601 0.603 0.603 0.608 0.615 0.624 0.634 0.643 ... 0.732000 0.73700 0.743000 0.7460 0.749000 0.753000 0.742000 0.755000 0.761000 0.763000
Andorra NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 0.866000 0.86900 0.872000 0.8730 0.875000 0.876000 0.851000 0.871000 0.893000 0.913000

5 rows × 34 columns

In [175]:
hdi_by_year.isna().sum(axis=0)
Out[175]:
0
Year
hdi_1990 60
hdi_1991 60
hdi_1992 60
hdi_1993 59
hdi_1994 59
hdi_1995 51
hdi_1996 51
hdi_1997 51
hdi_1998 51
hdi_1999 46
hdi_2000 24
hdi_2001 24
hdi_2002 23
hdi_2003 22
hdi_2004 20
hdi_2005 7
hdi_2006 7
hdi_2007 6
hdi_2008 5
hdi_2009 6
hdi_2010 2
hdi_2011 1
hdi_2012 1
hdi_2013 1
hdi_2014 1
hdi_2015 1
hdi_2016 1
hdi_2017 1
hdi_2018 1
hdi_2019 1
hdi_2020 1
hdi_2021 1
hdi_2022 1
hdi_2023 0

In [176]:
countries[["hdi_2023", "hdi_2013"]]=hdi_by_year[["hdi_2023", "hdi_2013"]]
In [177]:
countries
Out[177]:
ISO3166-1-Alpha-3 ISO3166-1-Alpha-2 geometry life_expectancy population log_population child_mortality co2_emissions hdi_2023 hdi_2013
name
Indonesia IDN ID MULTIPOLYGON (((117.70361 4.16342, 117.70361 4... 71.1457 281190068.0 8.449000 2.06 2.711182 0.728 0.690
Malaysia MYS MY MULTIPOLYGON (((117.70361 4.16342, 117.69711 4... 76.6571 35126295.0 7.545632 0.81 7.865810 0.819 0.791
Chile CHL CL MULTIPOLYGON (((-69.51009 -17.50659, -69.50611... 81.1667 19658833.0 7.293558 0.72 3.947550 0.878 0.845
Bolivia BOL BO POLYGON ((-69.51009 -17.50659, -69.51009 -17.5... 68.5814 12244161.0 7.087929 2.31 2.010797 0.733 0.699
Peru PER PE MULTIPOLYGON (((-69.51009 -17.50659, -69.63832... 77.7401 33845616.0 7.529502 1.58 1.951944 0.794 0.757
... ... ... ... ... ... ... ... ... ... ...
Macao S.A.R MAC MO MULTIPOLYGON (((113.5586 22.16303, 113.56943 2... NaN NaN NaN NaN NaN NaN NaN
Ashmore and Cartier Islands -99 -99 POLYGON ((123.59702 -12.42832, 123.59775 -12.4... NaN NaN NaN NaN NaN NaN NaN
Bajo Nuevo Bank (Petrel Is.) -99 -99 POLYGON ((-79.98929 15.79495, -79.98782 15.796... NaN NaN NaN NaN NaN NaN NaN
Serranilla Bank -99 -99 POLYGON ((-78.63707 15.86209, -78.64041 15.864... NaN NaN NaN NaN NaN NaN NaN
Scarborough Reef -99 -99 POLYGON ((117.75389 15.15437, 117.75569 15.151... NaN NaN NaN NaN NaN NaN NaN

258 rows × 10 columns

In [178]:
countries.plot(column='hdi_2023', figsize=(12,6), legend=True).set_title("Human Development Index (2023)")
Out[178]:
Text(0.5, 1.0, 'Human Development Index (2023)')
No description has been provided for this image

Gestion des valeurs NaN¶

La plupart des méthodes d'analyse statistiques ne supportent pas les NaN, par exemple l'ACP.

Si l'analyse de ces unités spatiales est importante, on peut leur attribuer une valeur, par exemple la moyenne pour chaque variable. Si les variables avec un NaN sont corrélées avec une variable pour laquelle la valeur est connue, on peut inférer avec une régression linéaire.

Dans notre cas, on va simplement ignorer les individus avec des NaN, avec la méthode dropna().

In [179]:
countries = countries.dropna()
countries
Out[179]:
ISO3166-1-Alpha-3 ISO3166-1-Alpha-2 geometry life_expectancy population log_population child_mortality co2_emissions hdi_2023 hdi_2013
name
Indonesia IDN ID MULTIPOLYGON (((117.70361 4.16342, 117.70361 4... 71.1457 281190068.0 8.449000 2.06 2.711182 0.728 0.690
Malaysia MYS MY MULTIPOLYGON (((117.70361 4.16342, 117.69711 4... 76.6571 35126295.0 7.545632 0.81 7.865810 0.819 0.791
Chile CHL CL MULTIPOLYGON (((-69.51009 -17.50659, -69.50611... 81.1667 19658833.0 7.293558 0.72 3.947550 0.878 0.845
Bolivia BOL BO POLYGON ((-69.51009 -17.50659, -69.51009 -17.5... 68.5814 12244161.0 7.087929 2.31 2.010797 0.733 0.699
Peru PER PE MULTIPOLYGON (((-69.51009 -17.50659, -69.63832... 77.7401 33845616.0 7.529502 1.58 1.951944 0.794 0.757
... ... ... ... ... ... ... ... ... ... ...
Nauru NRU NR POLYGON ((166.93881 -0.49041, 166.95558 -0.497... 62.1094 11900.0 4.075547 0.89 4.933193 0.703 0.640
Micronesia (country) FSM FM MULTIPOLYGON (((163.02605 5.34089, 163.03045 5... 67.1983 112646.0 5.051716 2.31 1.278669 0.615 0.603
Vanuatu VUT VU MULTIPOLYGON (((169.84034 -20.1408, 169.86052 ... 71.4769 320422.0 5.505722 1.68 0.588156 0.621 0.592
Palau PLW PW MULTIPOLYGON (((134.2715 7.07453, 134.27931 7.... 69.2690 17751.0 4.249223 2.23 12.180102 0.786 0.791
Bahrain BHR BH MULTIPOLYGON (((50.55161 26.19424, 50.59474 26... 81.2835 1569674.0 6.195809 0.86 24.710030 0.899 0.846

186 rows × 10 columns

In [180]:
countries.plot()
Out[180]:
<Axes: >
No description has been provided for this image

Exploration du jeu¶

In [182]:
countries.describe()# Affiche des statistiques univariées pour chaque variable
Out[182]:
life_expectancy population log_population child_mortality co2_emissions hdi_2023 hdi_2013
count 186.000000 1.860000e+02 186.000000 186.000000 186.000000 186.000000 186.000000
mean 73.048182 4.303034e+07 6.854228 2.478011 4.360875 0.740656 0.710382
std 7.025400 1.534759e+08 0.937179 2.469786 5.568655 0.150070 0.154175
min 54.462300 9.837000e+03 3.992863 0.210000 0.055592 0.388000 0.359000
25% 67.450850 2.538054e+06 6.404222 0.610000 0.808994 0.621250 0.583250
50% 73.456050 9.495406e+06 6.977426 1.560000 2.862610 0.762500 0.739000
75% 78.125250 3.354244e+07 7.525590 3.845000 5.346318 0.856750 0.829250
max 84.712300 1.438070e+09 9.157780 11.480000 40.127865 0.972000 0.953000
In [183]:
pd.plotting.scatter_matrix(countries, figsize=(10, 10))
Out[183]:
array([[<Axes: xlabel='life_expectancy', ylabel='life_expectancy'>,
        <Axes: xlabel='population', ylabel='life_expectancy'>,
        <Axes: xlabel='log_population', ylabel='life_expectancy'>,
        <Axes: xlabel='child_mortality', ylabel='life_expectancy'>,
        <Axes: xlabel='co2_emissions', ylabel='life_expectancy'>,
        <Axes: xlabel='hdi_2023', ylabel='life_expectancy'>,
        <Axes: xlabel='hdi_2013', ylabel='life_expectancy'>],
       [<Axes: xlabel='life_expectancy', ylabel='population'>,
        <Axes: xlabel='population', ylabel='population'>,
        <Axes: xlabel='log_population', ylabel='population'>,
        <Axes: xlabel='child_mortality', ylabel='population'>,
        <Axes: xlabel='co2_emissions', ylabel='population'>,
        <Axes: xlabel='hdi_2023', ylabel='population'>,
        <Axes: xlabel='hdi_2013', ylabel='population'>],
       [<Axes: xlabel='life_expectancy', ylabel='log_population'>,
        <Axes: xlabel='population', ylabel='log_population'>,
        <Axes: xlabel='log_population', ylabel='log_population'>,
        <Axes: xlabel='child_mortality', ylabel='log_population'>,
        <Axes: xlabel='co2_emissions', ylabel='log_population'>,
        <Axes: xlabel='hdi_2023', ylabel='log_population'>,
        <Axes: xlabel='hdi_2013', ylabel='log_population'>],
       [<Axes: xlabel='life_expectancy', ylabel='child_mortality'>,
        <Axes: xlabel='population', ylabel='child_mortality'>,
        <Axes: xlabel='log_population', ylabel='child_mortality'>,
        <Axes: xlabel='child_mortality', ylabel='child_mortality'>,
        <Axes: xlabel='co2_emissions', ylabel='child_mortality'>,
        <Axes: xlabel='hdi_2023', ylabel='child_mortality'>,
        <Axes: xlabel='hdi_2013', ylabel='child_mortality'>],
       [<Axes: xlabel='life_expectancy', ylabel='co2_emissions'>,
        <Axes: xlabel='population', ylabel='co2_emissions'>,
        <Axes: xlabel='log_population', ylabel='co2_emissions'>,
        <Axes: xlabel='child_mortality', ylabel='co2_emissions'>,
        <Axes: xlabel='co2_emissions', ylabel='co2_emissions'>,
        <Axes: xlabel='hdi_2023', ylabel='co2_emissions'>,
        <Axes: xlabel='hdi_2013', ylabel='co2_emissions'>],
       [<Axes: xlabel='life_expectancy', ylabel='hdi_2023'>,
        <Axes: xlabel='population', ylabel='hdi_2023'>,
        <Axes: xlabel='log_population', ylabel='hdi_2023'>,
        <Axes: xlabel='child_mortality', ylabel='hdi_2023'>,
        <Axes: xlabel='co2_emissions', ylabel='hdi_2023'>,
        <Axes: xlabel='hdi_2023', ylabel='hdi_2023'>,
        <Axes: xlabel='hdi_2013', ylabel='hdi_2023'>],
       [<Axes: xlabel='life_expectancy', ylabel='hdi_2013'>,
        <Axes: xlabel='population', ylabel='hdi_2013'>,
        <Axes: xlabel='log_population', ylabel='hdi_2013'>,
        <Axes: xlabel='child_mortality', ylabel='hdi_2013'>,
        <Axes: xlabel='co2_emissions', ylabel='hdi_2013'>,
        <Axes: xlabel='hdi_2023', ylabel='hdi_2013'>,
        <Axes: xlabel='hdi_2013', ylabel='hdi_2013'>]], dtype=object)
No description has been provided for this image
In [184]:
countries.iloc[:,3:].corr()#J'enlève les colones de code et de géométrie pour calculer les corrélations
Out[184]:
life_expectancy population log_population child_mortality co2_emissions hdi_2023 hdi_2013
life_expectancy 1.000000 0.014349 -0.002132 -0.877801 0.481439 0.904435 0.900622
population 0.014349 1.000000 0.441910 -0.003487 0.010886 -0.011176 -0.038145
log_population -0.002132 0.441910 1.000000 0.148135 -0.071339 -0.113981 -0.124327
child_mortality -0.877801 -0.003487 0.148135 1.000000 -0.421016 -0.862051 -0.861931
co2_emissions 0.481439 0.010886 -0.071339 -0.421016 1.000000 0.540600 0.538467
hdi_2023 0.904435 -0.011176 -0.113981 -0.862051 0.540600 1.000000 0.987957
hdi_2013 0.900622 -0.038145 -0.124327 -0.861931 0.538467 0.987957 1.000000

ACP¶

Centrer-réduire les variables :

In [185]:
countries_cr = (countries.iloc[:,3:] - countries.iloc[:,3:].mean(axis=0))/countries.iloc[:,3:].std(axis=0)
countries_cr
Out[185]:
life_expectancy population log_population child_mortality co2_emissions hdi_2023 hdi_2013
name
Indonesia -0.270801 1.551773 1.701673 -0.169250 -0.296246 -0.084333 -0.132199
Malaysia 0.513696 -0.051500 0.737750 -0.675366 0.629404 0.522051 0.522902
Chile 1.155595 -0.152281 0.468779 -0.711807 -0.074223 0.915201 0.873155
Bolivia -0.635805 -0.200593 0.249366 -0.068026 -0.422019 -0.051016 -0.073824
Peru 0.667851 -0.059845 0.720539 -0.363599 -0.432587 0.355462 0.302373
... ... ... ... ... ... ... ...
Nauru -1.557033 -0.280294 -2.964942 -0.642975 0.102775 -0.250923 -0.456507
Micronesia (country) -0.832676 -0.279638 -1.923339 -0.068026 -0.553492 -0.837316 -0.696494
Vanuatu -0.223657 -0.278284 -1.438899 -0.323109 -0.677492 -0.797335 -0.767842
Palau -0.537931 -0.280256 -2.779624 -0.100418 1.404150 0.302153 0.522902
Bahrain 1.172221 -0.270145 -0.702554 -0.655122 3.654232 1.055136 0.879641

186 rows × 7 columns

In [186]:
from sklearn.decomposition import PCA
pca = PCA()
countries_transformed = pca.fit_transform(countries_cr)
countries_transformed
Out[186]:
array([[-0.38782093,  2.2577136 , -0.27033415, ..., -0.1075837 ,
        -0.43054918,  0.01581441],
       [ 1.20067973,  0.5528224 ,  0.16633161, ..., -0.20558946,
        -0.20109657,  0.00503812],
       [ 1.67981178,  0.33923759, -0.67661612, ...,  0.11003823,
         0.21270873,  0.02796701],
       ...,
       [-0.82181178, -1.25417263, -0.44619198, ..., -0.78633218,
         0.18600237, -0.02108663],
       [ 0.82865724, -2.15334798,  1.48835281, ...,  0.28458681,
        -0.27605793, -0.14799218],
       [ 2.97659196, -0.46833059,  2.87956824, ..., -0.29321703,
         0.35584879,  0.11548285]])

On va remettre countries_transformed dans un geodataframe pour en permettre la visualisation cartographique.

In [187]:
countries_transformed_gdf = gpd.GeoDataFrame(countries_transformed, index=countries.index, geometry=countries.geometry, crs=countries.crs)
countries_transformed_gdf
Out[187]:
0 1 2 3 4 5 6 geometry
name
Indonesia -0.387821 2.257714 -0.270334 -0.008092 -0.107584 -0.430549 0.015814 MULTIPOLYGON (((117.70361 4.16342, 117.70361 4...
Malaysia 1.200680 0.552822 0.166332 -0.592792 -0.205589 -0.201097 0.005038 MULTIPOLYGON (((117.70361 4.16342, 117.69711 4...
Chile 1.679812 0.339238 -0.676616 -0.487564 0.110038 0.212709 0.027967 MULTIPOLYGON (((-69.51009 -17.50659, -69.50611...
Bolivia -0.473525 -0.034564 -0.311355 -0.194320 0.045559 -0.577952 0.034945 POLYGON ((-69.51009 -17.50659, -69.51009 -17.5...
Peru 0.613848 0.518250 -0.737999 -0.537023 -0.034877 0.158002 0.035943 MULTIPOLYGON (((-69.51009 -17.50659, -69.63832...
... ... ... ... ... ... ... ... ...
Nauru -0.535845 -2.413022 0.479206 2.095558 -0.443317 -1.015918 0.172378 POLYGON ((166.93881 -0.49041, 166.95558 -0.497...
Micronesia (country) -1.136291 -1.647075 -0.119571 1.298190 -0.484725 -0.144627 -0.092071 MULTIPOLYGON (((163.02605 5.34089, 163.03045 5...
Vanuatu -0.821812 -1.254173 -0.446192 0.941192 -0.786332 0.186002 -0.021087 MULTIPOLYGON (((169.84034 -20.1408, 169.86052 ...
Palau 0.828657 -2.153348 1.488353 1.650089 0.284587 -0.276058 -0.147992 MULTIPOLYGON (((134.2715 7.07453, 134.27931 7....
Bahrain 2.976592 -0.468331 2.879568 -0.114218 -0.293217 0.355849 0.115483 MULTIPOLYGON (((50.55161 26.19424, 50.59474 26...

186 rows × 8 columns

Taux de variance expliqué cumulé

In [188]:
plt.figure(figsize=(10,10))
plt.plot(np.cumsum(pca.explained_variance_ratio_))
Out[188]:
[<matplotlib.lines.Line2D at 0x7fc1904c80e0>]
No description has been provided for this image

Là par exemple, la première composante principale explique environ 60% de la variance, et la somme des 3 premières environ 90%. Si on garde 3 composantes, on garde "90% de l'information" en n'utilisant que 3 variables au lieu de 7.

In [189]:
pca.components_[0]
Out[189]:
array([ 0.46899691, -0.0141802 , -0.06995472, -0.45681452,  0.31120297,
        0.48464288,  0.484295  ])

La première composante oppose l'esperance de vie, les emissions de CO2 et l'hdi au 2 dates d'un côté à la mortalité infantile.

In [190]:
pca.components_[1]
Out[190]:
array([ 0.08856495,  0.70726013,  0.7007067 , -0.00339905,  0.02488523,
        0.01782879, -0.00088282])

La deuxième se concentre surtout sur la population et log population

In [191]:
countries_transformed_gdf.plot(column=0, figsize=(12,6), legend=True).set_title("Première composante principale")
Out[191]:
Text(0.5, 1.0, 'Première composante principale')
No description has been provided for this image

On voit que les pays equatoriaux sont associés avec une faible valeur de la première CP, c'est-à-dire à une faible espérance de vie et un faible hdi, et un taux de mortalité infantile plus élevé.

In [192]:
countries_transformed_gdf.plot(column=1, figsize=(12,6), legend=True).set_title("Seconde composante principale")
Out[192]:
Text(0.5, 1.0, 'Seconde composante principale')
No description has been provided for this image

Avec cette composante principale, des pays comme l'Inde ou la Chine ont des valeurs élevées parce qu'elles ont une population importante.

Valeurs pour la France :

In [193]:
countries_transformed_gdf.loc["France"]
Out[193]:
France
0 2.136315
1 0.983079
2 -0.820932
3 -0.693335
4 0.250692
5 0.224911
6 -0.013748
geometry MULTIPOLYGON (((-54.111527 2.11427, -54.134908...

La France a une valeur positive pour la première CP, donc elle a plutôt une bonne espérance de vie, un haut taux d'emissions de CO2... Elle a une valeur au dessus de 0 pour la 2ème CP, mais pas très haute : elle est un peu plus peuplée que la moyenne (68 millions vs 43 millions d'habitants)

Calculer la corrélation entre une variable et une composante principale

In [194]:
np.corrcoef(countries_cr["life_expectancy"], countries_transformed_gdf[0])
Out[194]:
array([[1.        , 0.94254968],
       [0.94254968, 1.        ]])

On peut vérifier que c'est la même valeur qu'obtenu avec la formule suivante :

In [195]:
pca.components_[0][0] * np.sqrt(pca.explained_variance_[0])
Out[195]:
np.float64(0.9425496753669508)

Cercle des corrélations

In [196]:
plt.figure(figsize =(10, 10))
ax = plt.subplot()
plt.xlim(-1.3,1.3)
plt.ylim(-1.3,1.3)
ax.add_patch(plt.Circle((0, 0), radius = 1, color='black', fill=False))
for i, variable in enumerate(list(countries_cr.columns)):
    plt.plot([0, pca.components_[0][i] * np.sqrt(pca.explained_variance_[0])], [0, pca.components_[1][i] * np.sqrt(pca.explained_variance_[1])], color='red')
    plt.text(pca.components_[0][i] * np.sqrt(pca.explained_variance_[0]), pca.components_[1][i] * np.sqrt(pca.explained_variance_[1]), variable)
No description has been provided for this image

On retrouve les interprétations faites précédemments :

  • Les variables hdi 2013 et 2023, l'esperance de vie et les emissions sont corrélées entre elles
  • Elles ne sont pas corrélées à la population (angle proche de 90°)
  • Elles ont une corrélation négatives avec la mortalité infantile

Classification non supervisée¶

In [221]:
from scipy.cluster.hierarchy import dendrogram, linkage
Z = linkage(countries_transformed_gdf.iloc[:,:3], 'ward')#Sur les 3 premières composantes de l'ACP
plt.figure(figsize=(25, 8))
ax=plt.subplot()
dn = dendrogram(Z)

country_names = countries_transformed_gdf.iloc[dn["leaves"]].index
ax.set_xticklabels(country_names, rotation=90)

# Set colors for the tick labels
colors = ["red" if name == "France" else "black" for name in country_names]

# Apply colors to the tick labels
tick_labels = ax.get_xticklabels()
for tick_label, color in zip(tick_labels, colors):
    tick_label.set_color(color)

plt.show()
No description has been provided for this image

Les pays les moins similaires sont ceux du groupe orange

In [222]:
from sklearn.cluster import AgglomerativeClustering

cah = AgglomerativeClustering(n_clusters=2, linkage="ward")
cah_class_2 = cah.fit_predict(countries_cr).astype(int)
cah_class_2 = gpd.GeoDataFrame(pd.Series(cah_class_2, index=countries.index), geometry=countries.geometry)
cah_class_2.plot(column=0, figsize=(12,5), legend=True)
Out[222]:
<Axes: >
No description has been provided for this image
In [236]:
countries_transformed_gdf.plot(kind="scatter", x=0, y=1, c=cah_class_2[0], figsize=(12,5), cmap="viridis")
Out[236]:
<Axes: xlabel='0', ylabel='1'>
No description has been provided for this image
In [233]:
from sklearn.cluster import AgglomerativeClustering

cah = AgglomerativeClustering(n_clusters=5, linkage="ward")
cah_class_5 = cah.fit_predict(countries_cr).astype(int)
cah_class_5 = gpd.GeoDataFrame(pd.Series(cah_class_5, index=countries.index), geometry=countries.geometry)
cah_class_5.plot(column=0, figsize=(12,5), legend=True)
Out[233]:
<Axes: >
No description has been provided for this image
In [235]:
countries_transformed_gdf.plot(kind="scatter", x=0, y=1, c=cah_class_5[0], figsize=(12,5), cmap="viridis")
Out[235]:
<Axes: xlabel='0', ylabel='1'>
No description has been provided for this image
In [255]:
from sklearn.cluster import KMeans

k_means = KMeans(n_clusters=2)
k_means_class_2 = k_means.fit_predict(countries_cr).astype(int)
k_means_class_2 = gpd.GeoDataFrame(pd.Series(k_means_class_2, index=countries.index), geometry=countries.geometry)
k_means_class_2.plot(column=0, figsize=(12,5), legend=True)
Out[255]:
<Axes: >
No description has been provided for this image
In [256]:
from sklearn.cluster import KMeans

k_means = KMeans(n_clusters=5)
k_means_class_5 = k_means.fit_predict(countries_cr).astype(int)
k_means_class_5 = gpd.GeoDataFrame(pd.Series(k_means_class_5, index=countries.index), geometry=countries.geometry)
k_means_class_5.plot(column=0, figsize=(12,5), legend=True)
Out[256]:
<Axes: >
No description has been provided for this image
In [250]:
inertie_totale = ((countries_cr - countries_cr.mean(axis=0))**2).sum().sum()
inertie_totale
Out[250]:
np.float64(1295.0)
In [257]:
for i, clustering in enumerate([cah_class_2, k_means_class_2, cah_class_5, k_means_class_5]):
    inertie_inter = 0
    for c in range(max(clustering[0])+1):
        centre_c = countries_cr.loc[clustering[0]==c].mean()
        inertie_inter += ((countries_cr.mean(axis=0) - centre_c)**2).sum().sum()
    Q = inertie_inter / inertie_totale
    print("La qualité du clustering", i, "est", Q)
La qualité du clustering 0 est 0.006609839407358844
La qualité du clustering 1 est 0.005332036190339916
La qualité du clustering 2 est 0.093074028932465
La qualité du clustering 3 est 0.09009296871477224

Classification supervisée¶

In [258]:
countries["political_regime"] = pd.read_csv("political-regime.csv").set_index("Entity")["Political regime"]
countries.plot(column="political_regime", figsize=(12,6), legend=True).set_title("Régime politique")
/usr/local/lib/python3.12/dist-packages/geopandas/geodataframe.py:1968: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
Out[258]:
Text(0.5, 1.0, 'Régime politique')
No description has been provided for this image

Quel est le nombre de pays par régime politique ?

In [259]:
countries["political_regime"].value_counts()
Out[259]:
count
political_regime
2.0 53
1.0 53
3.0 33
0.0 28

In [262]:
for variable in ["life_expectancy", "population", "co2_emissions", "child_mortality", "hdi_2013", "hdi_2023"]:
    countries.boxplot(column=variable, by="political_regime", figsize=(12,6))
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [286]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
clf = RandomForestClassifier()#construct the classifier
X_train, X_test, y_train, y_test = train_test_split(
	countries_cr[~countries["political_regime"].isna()],#On ne prend pas les pays pour lesquels "political_regime" vaut NaN, on pourra justement utiliser le modèle pour essayer de prédire pour ces pays
  countries[~countries["political_regime"].isna()]["political_regime"],
	train_size=0.7)
clf.fit(X_train, y_train)#train the classifier
y_pred = clf.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
[[2 2 5 0]
 [4 7 5 0]
 [1 4 7 2]
 [0 0 3 9]]
              precision    recall  f1-score   support

         0.0       0.29      0.22      0.25         9
         1.0       0.54      0.44      0.48        16
         2.0       0.35      0.50      0.41        14
         3.0       0.82      0.75      0.78        12

    accuracy                           0.49        51
   macro avg       0.50      0.48      0.48        51
weighted avg       0.51      0.49      0.49        51

Globalement, le modèle n'est pas très bon puisqu'il se trompe pour plus de la moitié des cas. On peut voir que la classe sur laquelle le modèle s'en sort le mieux est "démocratie libérale", avec un f1-score de 78% avec quelques confusion avec la classe "démocratie éléctorale". Au contraire, il a du mal à détecter les autocraties fermées, qui sont minoritaires dans le jeu d'entraînement.

La première cause de ces mauvaises performances est que les variables choisies ne permettent pas assez de séparer les classes. Par ailleurs, le nombre de pays est assez faible ce qui rend l'apprentissage difficile.

In [295]:
results = gpd.GeoDataFrame({"y_pred":y_pred,"y_true":y_test},index=X_test.index, geometry=countries.loc[X_test.index, "geometry"])
results
Out[295]:
y_pred y_true geometry
name
Costa Rica 2.0 3.0 MULTIPOLYGON (((-83.6965 10.93659, -83.68687 1...
Belgium 3.0 3.0 POLYGON ((2.5218 51.08754, 2.542 51.09687, 2.5...
Mauritania 1.0 1.0 MULTIPOLYGON (((-8.68238 27.28542, -8.48441 27...
Israel 3.0 3.0 POLYGON ((34.24835 31.21145, 34.2644 31.22419,...
South Korea 3.0 3.0 MULTIPOLYGON (((128.36492 38.62434, 128.39478 ...
Malawi 1.0 2.0 MULTIPOLYGON (((34.96462 -11.57356, 34.65125 -...
Spain 3.0 3.0 MULTIPOLYGON (((-5.34073 35.84736, -5.3629 35....
Austria 3.0 2.0 POLYGON ((16.94504 48.60417, 16.95434 48.5574,...
Cuba 2.0 0.0 MULTIPOLYGON (((-75.09501 19.89723, -75.09495 ...
Eritrea 2.0 0.0 MULTIPOLYGON (((43.11769 12.70791, 42.90036 12...
Jordan 2.0 0.0 POLYGON ((35.61176 32.6819, 35.61233 32.68154,...
Malta 3.0 2.0 MULTIPOLYGON (((14.54802 35.89004, 14.56316 35...
Uruguay 2.0 3.0 POLYGON ((-57.60279 -30.19052, -57.58684 -30.2...
Latvia 2.0 3.0 POLYGON ((27.35294 57.5276, 27.52817 57.52848,...
Peru 2.0 2.0 MULTIPOLYGON (((-69.51009 -17.50659, -69.63832...
Turkey 2.0 1.0 MULTIPOLYGON (((43.44043 41.10659, 43.43629 41...
Finland 3.0 3.0 MULTIPOLYGON (((28.95408 69.02726, 28.83346 68...
Hungary 2.0 1.0 POLYGON ((22.8776 47.94674, 22.86117 47.93382,...
North Macedonia 2.0 2.0 POLYGON ((20.56715 41.87318, 20.5903 41.85473,...
Angola 2.0 1.0 MULTIPOLYGON (((13.0737 -4.63532, 13.06533 -4....
Laos 2.0 0.0 POLYGON ((102.11866 22.39755, 102.12542 22.383...
Qatar 0.0 0.0 POLYGON ((50.80787 24.74665, 50.82667 24.74868...
Russia 0.0 1.0 MULTIPOLYGON (((87.81632 49.16584, 87.71638 49...
Georgia 2.0 2.0 POLYGON ((41.5512 42.40646, 41.50017 42.64057,...
South Sudan 0.0 0.0 POLYGON ((35.92084 4.61933, 35.85654 4.6196, 3...
Bulgaria 1.0 2.0 POLYGON ((26.33336 41.71304, 26.29491 41.71032...
Mozambique 0.0 1.0 MULTIPOLYGON (((32.11388 -26.84001, 32.1174 -2...
Comoros 2.0 1.0 MULTIPOLYGON (((43.78875 -12.30804, 43.79705 -...
Benin 0.0 1.0 POLYGON ((3.5964 11.69577, 3.57439 11.67304, 3...
El Salvador 1.0 1.0 MULTIPOLYGON (((-90.09831 13.7314, -90.11431 1...
Solomon Islands 2.0 2.0 MULTIPOLYGON (((160.50367 -11.73602, 160.56446...
Mexico 0.0 2.0 MULTIPOLYGON (((-97.13927 25.96581, -97.16747 ...
Myanmar 1.0 0.0 MULTIPOLYGON (((92.57588 21.97757, 92.58384 21...
Syria 2.0 0.0 POLYGON ((35.75759 32.74435, 35.7842 32.77795,...
Panama 2.0 2.0 MULTIPOLYGON (((-82.5736 9.5762, -82.56188 9.5...
Papua New Guinea 1.0 1.0 MULTIPOLYGON (((140.97446 -2.60052, 140.98732 ...
Italy 3.0 3.0 MULTIPOLYGON (((7.02208 45.92526, 7.06694 45.8...
Netherlands 3.0 3.0 MULTIPOLYGON (((7.19459 53.24502, 7.19747 53.2...
Senegal 1.0 2.0 POLYGON ((-12.26413 14.77494, -12.25651 14.745...
Denmark 3.0 3.0 MULTIPOLYGON (((8.66078 54.89631, 8.66879 54.9...
Algeria 0.0 1.0 POLYGON ((-4.82161 24.99506, -4.99519 25.10209...
United Kingdom 3.0 3.0 MULTIPOLYGON (((-7.2471 55.06932, -7.25674 55....
Jamaica 2.0 2.0 POLYGON ((-76.26374 18.01236, -76.25678 17.996...
Haiti 1.0 0.0 MULTIPOLYGON (((-71.75744 19.71011, -71.74861 ...
Cote d'Ivoire 1.0 1.0 MULTIPOLYGON (((-7.98966 10.16199, -7.9709 10....
Gabon 2.0 1.0 MULTIPOLYGON (((13.29457 2.16106, 13.29488 2.1...
Iraq 1.0 1.0 POLYGON ((44.76614 37.14192, 44.75254 37.11314...
Togo 1.0 1.0 POLYGON ((-0.16611 11.13498, -0.11506 11.12466...
Poland 2.0 2.0 POLYGON ((18.8332 49.51026, 18.83743 49.52695,...
Guinea-Bissau 1.0 1.0 MULTIPOLYGON (((-13.72828 12.67339, -13.73631 ...
Liberia 1.0 2.0 POLYGON ((-11.47619 6.91942, -11.44449 6.93394...
In [285]:
results.plot(column="y_pred", figsize=(12,6), legend=True).set_title("Régime politique prédit")
results.plot(column="y_true", figsize=(12,6), legend=True).set_title("Régime politique réel")
Out[285]:
Text(0.5, 1.0, 'Régime politique réel')
No description has been provided for this image
No description has been provided for this image

On peut utiliser le modèle (même s'il n'est pas très bon) pour prédire le régime politique des pays pour lesquels on a pas de données

In [290]:
predictions_pays_regimes_inconnus = gpd.GeoDataFrame(
    {"regime prédit":clf.predict(countries_cr[countries["political_regime"].isna()])},
    index=countries[countries["political_regime"].isna()].index,
    geometry=countries[countries["political_regime"].isna()].geometry
    )
predictions_pays_regimes_inconnus
Out[290]:
regime prédit geometry
name
Palestine 1.0 MULTIPOLYGON (((34.4812 31.58314, 34.52013 31....
Brunei 2.0 MULTIPOLYGON (((115.14617 4.90852, 115.1468 4....
Andorra 3.0 POLYGON ((1.70701 42.50278, 1.6975 42.49446, 1...
Belize 2.0 MULTIPOLYGON (((-89.19314 16.39263, -89.18435 ...
Bahamas 3.0 MULTIPOLYGON (((-72.99957 21.45171, -73.03366 ...
Kiribati 2.0 MULTIPOLYGON (((173.03826 1.34105, 173.02442 1...
Marshall Islands 1.0 MULTIPOLYGON (((168.10108 5.59931, 168.10133 5...
Grenada 3.0 MULTIPOLYGON (((-61.61294 12.21442, -61.60456 ...
Saint Vincent and the Grenadines 3.0 MULTIPOLYGON (((-61.194 13.03685, -61.20832 13...
Saint Lucia 3.0 POLYGON ((-60.88679 14.01008, -60.88296 13.980...
Dominica 3.0 POLYGON ((-61.36286 15.20181, -61.37409 15.204...
Antigua and Barbuda 3.0 MULTIPOLYGON (((-61.88362 17.04902, -61.87922 ...
Saint Kitts and Nevis 3.0 MULTIPOLYGON (((-62.59923 17.20295, -62.57844 ...
Tonga 3.0 MULTIPOLYGON (((-173.95637 -18.56732, -173.941...
Samoa 3.0 MULTIPOLYGON (((-172.20104 -13.59254, -172.197...
Tuvalu 3.0 MULTIPOLYGON (((179.19125 -8.54209, 179.20004 ...
Nauru 3.0 POLYGON ((166.93881 -0.49041, 166.95558 -0.497...
Micronesia (country) 2.0 MULTIPOLYGON (((163.02605 5.34089, 163.03045 5...
Palau 3.0 MULTIPOLYGON (((134.2715 7.07453, 134.27931 7....

BONUS: probabilités¶

In [291]:
y_proba = clf.predict_proba(X_test)
y_proba
Out[291]:
array([[0.12, 0.26, 0.5 , 0.12],
       [0.02, 0.05, 0.09, 0.84],
       [0.02, 0.65, 0.33, 0.  ],
       [0.01, 0.02, 0.09, 0.88],
       [0.  , 0.18, 0.1 , 0.72],
       [0.1 , 0.71, 0.19, 0.  ],
       [0.  , 0.08, 0.07, 0.85],
       [0.01, 0.02, 0.09, 0.88],
       [0.03, 0.29, 0.68, 0.  ],
       [0.14, 0.23, 0.63, 0.  ],
       [0.05, 0.34, 0.61, 0.  ],
       [0.06, 0.03, 0.41, 0.5 ],
       [0.01, 0.07, 0.86, 0.06],
       [0.01, 0.04, 0.72, 0.23],
       [0.21, 0.14, 0.63, 0.02],
       [0.31, 0.22, 0.37, 0.1 ],
       [0.  , 0.1 , 0.03, 0.87],
       [0.01, 0.17, 0.74, 0.08],
       [0.01, 0.24, 0.67, 0.08],
       [0.08, 0.25, 0.67, 0.  ],
       [0.07, 0.44, 0.47, 0.02],
       [0.48, 0.07, 0.23, 0.22],
       [0.39, 0.38, 0.18, 0.05],
       [0.03, 0.16, 0.71, 0.1 ],
       [0.53, 0.3 , 0.17, 0.  ],
       [0.02, 0.56, 0.39, 0.03],
       [0.6 , 0.31, 0.09, 0.  ],
       [0.  , 0.42, 0.44, 0.14],
       [0.42, 0.42, 0.16, 0.  ],
       [0.02, 0.75, 0.23, 0.  ],
       [0.01, 0.21, 0.56, 0.22],
       [0.4 , 0.26, 0.34, 0.  ],
       [0.12, 0.52, 0.36, 0.  ],
       [0.04, 0.44, 0.5 , 0.02],
       [0.08, 0.34, 0.48, 0.1 ],
       [0.03, 0.78, 0.19, 0.  ],
       [0.  , 0.08, 0.05, 0.87],
       [0.01, 0.  , 0.06, 0.93],
       [0.14, 0.59, 0.27, 0.  ],
       [0.  , 0.05, 0.11, 0.84],
       [0.37, 0.27, 0.35, 0.01],
       [0.06, 0.01, 0.15, 0.78],
       [0.01, 0.26, 0.72, 0.01],
       [0.04, 0.75, 0.21, 0.  ],
       [0.23, 0.61, 0.16, 0.  ],
       [0.01, 0.23, 0.75, 0.01],
       [0.26, 0.52, 0.21, 0.01],
       [0.03, 0.63, 0.34, 0.  ],
       [0.12, 0.08, 0.42, 0.38],
       [0.02, 0.57, 0.41, 0.  ],
       [0.06, 0.59, 0.35, 0.  ]])

Chaque ligne correspond aux probabilités données par le modèle pour les 4 classes.

In [293]:
results[["proba_0", "proba_1", "proba_2", "proba_3"]] = y_proba
results
Out[293]:
y_pred y_true geometry proba_0 proba_1 proba_2 proba_3
name
Fiji 2.0 1.0 MULTIPOLYGON (((-180 -16.16961, -180 -16.14911... 0.12 0.26 0.50 0.12
Paraguay 2.0 2.0 POLYGON ((-62.65036 -22.23446, -62.62752 -22.1... 0.02 0.05 0.09 0.84
Spain 3.0 3.0 MULTIPOLYGON (((-5.34073 35.84736, -5.3629 35.... 0.02 0.65 0.33 0.00
Switzerland 3.0 3.0 POLYGON ((10.45381 46.86443, 10.44854 46.83223... 0.01 0.02 0.09 0.88
Senegal 1.0 2.0 POLYGON ((-12.26413 14.77494, -12.25651 14.745... 0.00 0.18 0.10 0.72
Portugal 2.0 3.0 MULTIPOLYGON (((-7.10486 38.82719, -7.15101 38... 0.10 0.71 0.19 0.00
Peru 2.0 2.0 MULTIPOLYGON (((-69.51009 -17.50659, -69.63832... 0.00 0.08 0.07 0.85
Mali 0.0 0.0 POLYGON ((-12.26413 14.77494, -12.24679 14.767... 0.01 0.02 0.09 0.88
Tajikistan 2.0 1.0 MULTIPOLYGON (((70.5659 41.01838, 70.59345 41.... 0.03 0.29 0.68 0.00
Nicaragua 2.0 1.0 MULTIPOLYGON (((-85.70174 11.08088, -85.70242 ... 0.14 0.23 0.63 0.00
Latvia 2.0 3.0 POLYGON ((27.35294 57.5276, 27.52817 57.52848,... 0.05 0.34 0.61 0.00
Germany 3.0 3.0 MULTIPOLYGON (((13.81572 48.76643, 13.78586 48... 0.06 0.03 0.41 0.50
Turkmenistan 1.0 0.0 MULTIPOLYGON (((61.26968 35.6185, 61.24539 35.... 0.01 0.07 0.86 0.06
Comoros 1.0 1.0 MULTIPOLYGON (((43.78875 -12.30804, 43.79705 -... 0.01 0.04 0.72 0.23
Slovenia 3.0 3.0 POLYGON ((13.64292 45.45943, 13.64282 45.45945... 0.21 0.14 0.63 0.02
Sweden 3.0 3.0 MULTIPOLYGON (((20.62316 69.03636, 20.67546 69... 0.31 0.22 0.37 0.10
France 3.0 3.0 MULTIPOLYGON (((-54.11153 2.11427, -54.13491 2... 0.00 0.10 0.03 0.87
Libya 1.0 0.0 POLYGON ((11.50511 33.18122, 11.52589 33.17695... 0.01 0.17 0.74 0.08
Saudi Arabia 2.0 0.0 MULTIPOLYGON (((50.80787 24.74665, 50.88101 24... 0.01 0.24 0.67 0.08
East Timor 2.0 2.0 MULTIPOLYGON (((124.45053 -9.18019, 124.4515 -... 0.08 0.25 0.67 0.00
Philippines 1.0 1.0 MULTIPOLYGON (((120.86069 5.57437, 120.87306 5... 0.07 0.44 0.47 0.02
Rwanda 1.0 1.0 POLYGON ((29.01536 -2.72071, 29.00012 -2.70366... 0.48 0.07 0.23 0.22
Venezuela 1.0 1.0 MULTIPOLYGON (((-60.02098 8.55801, -59.9597 8.... 0.39 0.38 0.18 0.05
Congo 2.0 1.0 POLYGON ((18.62639 3.47687, 18.63455 3.44922, ... 0.03 0.16 0.71 0.10
Bahrain 2.0 0.0 MULTIPOLYGON (((50.55161 26.19424, 50.59474 26... 0.53 0.30 0.17 0.00
Malaysia 1.0 1.0 MULTIPOLYGON (((117.70361 4.16342, 117.69711 4... 0.02 0.56 0.39 0.03
Sudan 1.0 0.0 MULTIPOLYGON (((22.86106 10.91915, 22.90024 11... 0.60 0.31 0.09 0.00
Argentina 2.0 2.0 MULTIPOLYGON (((-67.1939 -22.82222, -67.14269 ... 0.00 0.42 0.44 0.14
China 2.0 0.0 MULTIPOLYGON (((78.91769 33.38626, 78.91595 33... 0.42 0.42 0.16 0.00
Tanzania 1.0 1.0 MULTIPOLYGON (((32.92086 -9.4079, 32.90546 -9.... 0.02 0.75 0.23 0.00
Belgium 3.0 3.0 POLYGON ((2.5218 51.08754, 2.542 51.09687, 2.5... 0.01 0.21 0.56 0.22
Albania 2.0 2.0 POLYGON ((20.56715 41.87318, 20.54172 41.86158... 0.40 0.26 0.34 0.00
Guinea-Bissau 1.0 1.0 MULTIPOLYGON (((-13.72828 12.67339, -13.73631 ... 0.12 0.52 0.36 0.00
Zambia 1.0 2.0 POLYGON ((32.92086 -9.4079, 32.92303 -9.46629,... 0.04 0.44 0.50 0.02
Moldova 2.0 2.0 POLYGON ((26.61789 48.25897, 26.61861 48.26718... 0.08 0.34 0.48 0.10
Jordan 2.0 0.0 POLYGON ((35.61176 32.6819, 35.61233 32.68154,... 0.03 0.78 0.19 0.00
New Zealand 3.0 3.0 MULTIPOLYGON (((166.13697 -50.86435, 166.20525... 0.00 0.08 0.05 0.87
Morocco 2.0 0.0 POLYGON ((-8.81704 27.66146, -8.81845 27.6594,... 0.01 0.00 0.06 0.93
Finland 3.0 3.0 MULTIPOLYGON (((28.95408 69.02726, 28.83346 68... 0.14 0.59 0.27 0.00
Iran 1.0 0.0 MULTIPOLYGON (((44.80699 39.6399, 44.80965 39.... 0.00 0.05 0.11 0.84
Iceland 3.0 3.0 MULTIPOLYGON (((-14.56363 66.38451, -14.61075 ... 0.37 0.27 0.35 0.01
Lesotho 1.0 2.0 POLYGON ((28.98085 -28.90904, 28.99542 -28.908... 0.06 0.01 0.15 0.78
Iraq 1.0 1.0 POLYGON ((44.76614 37.14192, 44.75254 37.11314... 0.01 0.26 0.72 0.01
Cuba 2.0 0.0 MULTIPOLYGON (((-75.09501 19.89723, -75.09495 ... 0.04 0.75 0.21 0.00
Trinidad and Tobago 2.0 2.0 MULTIPOLYGON (((-61.00227 10.69937, -61.02086 ... 0.23 0.61 0.16 0.00
Qatar 2.0 0.0 POLYGON ((50.80787 24.74665, 50.82667 24.74868... 0.01 0.23 0.75 0.01
Botswana 2.0 2.0 POLYGON ((25.25978 -17.79411, 25.21937 -17.879... 0.26 0.52 0.21 0.01
Zimbabwe 1.0 1.0 POLYGON ((25.25978 -17.79411, 25.2667 -17.8009... 0.03 0.63 0.34 0.00
El Salvador 2.0 1.0 MULTIPOLYGON (((-90.09831 13.7314, -90.11431 1... 0.12 0.08 0.42 0.38
United Arab Emirates 2.0 0.0 MULTIPOLYGON (((56.27906 25.62745, 56.3003 25.... 0.02 0.57 0.41 0.00
Netherlands 3.0 3.0 MULTIPOLYGON (((7.19459 53.24502, 7.19747 53.2... 0.06 0.59 0.35 0.00
In [294]:
for proba in ["proba_0", "proba_1", "proba_2", "proba_3"]:
    results.plot(column=proba, figsize=(12,6), legend=True).set_title(proba)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]: