Data Visualization (Python)

Data Visualization With Python

There are a lot of good code that makes it easy to tell a story with your data.

Libraries

I list some popular libraries to deal with:

  1. matplotlib
  2. seaborn
  3. plotly ...

Example 1

I have written a sript a while ago to plot a long sequence of data. In this first example I plot data represented LIDAR scanning of a road with segments having either cleanded ditches or not, while registering a lot I am intrested here in representing three quantitative variables extracted from LIDAR pointcloud.

What I really want to focus on is the fact that it is possible to split long sequence and plot two subplots,with second representing categories.

In [1]:
import random
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from matplotlib import pyplot as plt
from math import ceil


ROAD = 16
size_slice = 100

#  set heigth ratio using gridspec_kw
width = [1]
heights = [10, 1]
gs_kw = dict(width_ratios=width, height_ratios=heights)  # {'width_ratios': [1], 'height_ratios': [5, 1]}


# Helper splits off the df
def chunk(lst, n):
    """Yield successive n-sized chunks from lst.
    calling (chunk(list(range(0,15)), 4)))
    returns
    [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14]]

    """
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

def roundup(x):
    #  round up ticks to next hundred
    return int(ceil(x / 100.0)) * 100

# parameters
chainage = [9800, 9770, 9700, 9670, 9590, 9480, 9460, 9450, 9440, 9430, 9420, 9410, 9400, 9390, 9380, 9370, 9360, 9350, 9340, 9330, 9320, 9310, 9300, 9290, 9280, 9270, 9260, 9250, 9240, 9230, 9220, 9210, 9200, 9190, 9180, 9170, 9160, 9150, 9140, 9130, 9120, 9110, 9100, 9090, 9080, 9070, 9060, 9050, 9040, 9030, 9020, 9010, 9000, 8990, 8980, 8970, 8960, 8950, 8940, 8930, 8920, 8910, 8900, 8890, 8880, 8870, 8860, 8850, 8840, 8830, 8820, 8810, 8800, 8790, 8780, 8770, 8760, 8750, 8740, 8730, 8720, 8710, 8700, 8690, 8680, 8670, 8660, 8650, 8640, 8630, 8620, 8610, 8600, 8590, 8580, 8570, 8560, 8550, 8540, 8530, 8520, 8510, 8500, 8480, 8470, 8460, 8450, 8440, 8430, 8420, 8410, 8400, 8390, 8380, 8370, 8360, 8350, 8340, 8330, 8320, 8310, 8300, 8290, 8280, 8270, 8260, 8250, 8240, 8230, 8220, 8210, 8200, 8190, 8180, 8170, 8160, 8150, 8140, 8130, 8120, 8110, 8100, 8090, 8080, 8070, 8060, 8050, 8040, 8030, 8020, 8010, 8000, 7990, 7980, 7970, 7960, 7950, 7940, 7930, 7920, 7910, 7900, 7890, 7880, 7870, 7820, 7810, 7800, 7790, 7780, 7770, 7760, 7750, 7740, 7730, 7720, 7710, 7700, 7690, 7680, 7670, 7660, 7650, 7640, 7630, 7620, 7610, 7600, 7590, 7580, 7570, 7560, 7550, 7540, 7530, 7520, 7510, 7500, 7490, 7480, 7470, 7460, 7450, 7440, 7430, 7420, 7410, 7400, 7390, 7360, 7350, 7340, 7330, 7320, 7310, 7300, 7290, 7280, 7270, 7260, 7250, 7240, 7230, 7220, 7210, 7200, 7190, 7180, 7170, 7160, 7150, 7140, 7130, 7120, 7110, 7100, 7090, 7080, 7070, 7060, 7050, 7040, 7030, 7020, 7010, 7000, 6990, 6980, 6970, 6960, 6950, 6940, 6930, 6920, 6910, 6900, 6890, 6330, 6320, 6310, 6300, 6290, 6280, 6270, 6260, 6250, 6240, 6230, 6220, 6210, 6200, 6190, 6180, 6170, 6160, 6150, 6140, 6130, 6120, 6110, 6100, 6090, 6080, 6070, 6060, 6050, 6040, 6030, 6020, 6010, 6000, 5990, 5980, 5970]
sci = [81, 42, 160, 113, 71, 83, 65, 110, 136, 109, 133, 189, 196, 189, 179, 168, 174, 180, 181, 202, 140, 143, 105, 136, 194, 137, 157, 205, 295, 287, 249, 224, 269, 234, 141, 164, 189, 147, 146, 133, 173, 163, 168, 188, 178, 159, 184, 191, 157, 144, 181, 152, 164, 161, 145, 193, 217, 211, 189, 196, 187, 171, 173, 162, 157, 143, 148, 158, 187, 152, 170, 200, 204, 155, 157, 179, 202, 202, 206, 216, 212, 206, 167, 139, 194, 152, 158, 151, 174, 181, 158, 166, 161, 175, 188, 174, 194, 249, 201, 180, 163, 152, 133, 77, 99, 128, 144, 134, 121, 153, 180, 196, 177, 168, 177, 194, 185, 269, 247, 255, 263, 285, 282, 211, 190, 201, 205, 217, 202, 180, 178, 232, 201, 190, 227, 178, 139, 172, 208, 210, 190, 179, 156, 169, 182, 196, 173, 154, 180, 184, 206, 182, 239, 237, 246, 207, 192, 213, 227, 200, 201, 187, 212, 255, 231, 218, 221, 203, 242, 271, 213, 186, 185, 177, 214, 222, 205, 182, 161, 210, 303, 248, 196, 236, 279, 222, 264, 187, 243, 209, 167, 177, 196, 191, 209, 166, 240, 178, 234, 222, 222, 207, 279, 291, 302, 225, 210, 289, 313, 317, 270, 232, 188, 174, 198, 214, 187, 196, 157, 201, 227, 253, 233, 229, 299, 269, 233, 235, 261, 276, 269, 236, 265, 301, 235, 221, 239, 206, 182, 164, 207, 224, 206, 182, 214, 183, 185, 244, 242, 267, 299, 293, 275, 248, 219, 259, 210, 175, 246, 286, 254, 225, 201, 169, 207, 228, 184, 185, 190, 189, 161, 124, 133, 139, 155, 154, 136, 141, 132, 103, 88, 91, 94, 120, 140, 164, 122, 144, 136, 126, 133, 165, 135, 145]
bci = [67, 57, 113, 71, 74, 75, 61, 95, 106, 104, 113, 125, 121, 124, 136, 126, 138, 136, 127, 133, 104, 112, 91, 98, 162, 108, 131, 144, 146, 145, 133, 138, 157, 156, 106, 122, 129, 103, 123, 99, 133, 119, 122, 129, 122, 111, 116, 120, 113, 103, 122, 117, 121, 110, 100, 116, 116, 120, 112, 100, 101, 97, 94, 91, 97, 75, 88, 99, 125, 102, 114, 111, 114, 91, 92, 125, 123, 140, 141, 153, 138, 130, 114, 114, 129, 96, 108, 117, 117, 125, 124, 101, 105, 118, 100, 104, 119, 139, 154, 130, 129, 123, 126, 80, 97, 116, 103, 95, 97, 111, 124, 124, 146, 125, 121, 138, 122, 167, 180, 176, 190, 176, 184, 147, 147, 153, 161, 157, 148, 135, 127, 146, 138, 126, 165, 141, 123, 135, 149, 144, 125, 131, 105, 102, 124, 139, 121, 134, 137, 122, 127, 130, 144, 152, 146, 130, 130, 126, 144, 137, 127, 124, 132, 143, 142, 155, 138, 147, 151, 148, 152, 149, 137, 136, 158, 154, 144, 142, 140, 149, 174, 180, 158, 176, 181, 164, 135, 142, 166, 167, 146, 145, 161, 162, 166, 125, 155, 135, 145, 152, 163, 156, 173, 198, 176, 154, 150, 167, 191, 192, 168, 148, 137, 117, 126, 143, 139, 130, 125, 146, 149, 162, 188, 172, 178, 171, 144, 159, 161, 163, 176, 152, 157, 167, 154, 143, 154, 136, 120, 100, 113, 139, 140, 128, 136, 107, 129, 157, 139, 151, 165, 167, 161, 167, 143, 140, 121, 164, 185, 190, 179, 160, 144, 131, 160, 176, 144, 150, 139, 145, 99, 85, 92, 109, 113, 110, 108, 107, 105, 92, 63, 100, 76, 81, 99, 120, 93, 109, 98, 87, 129, 107, 86, 100]
bdi = [164, 154, 255, 143, 181, 116, 148, 147, 157, 212, 94, 191, 127, 177, 216, 185, 140, 144, 169, 147, 124, 195, 141, 147, 158, 145, 138, 146, 75, 108, 112, 108, 120, 156, 118, 151, 160, 115, 74, 123, 167, 137, 128, 114, 133, 114, 85, 96, 147, 136, 128, 142, 176, 103, 105, 96, 45, 88, 47, 32, 105, 108, 69, 100, 113, 65, 38, 95, 129, 87, 116, 40, 103, 33, 94, 156, 44, 150, 182, 210, 134, 112, 109, 199, 165, 34, 111, 152, 135, 162, 154, 40, 114, 107, 86, 39, 115, 80, 205, 193, 117, 189, 225, 186, 165, 171, 143, 145, 157, 120, 163, 135, 187, 151, 126, 188, 116, 208, 185, 187, 244, 158, 191, 123, 161, 154, 178, 174, 160, 126, 111, 159, 109, 169, 209, 158, 158, 177, 161, 148, 122, 166, 48, 103, 157, 151, 109, 181, 94, 57, 94, 101, 147, 137, 135, 58, 134, 96, 125, 161, 144, 126, 157, 141, 129, 210, 144, 173, 132, 135, 183, 221, 167, 157, 216, 170, 150, 149, 188, 185, 153, 227, 149, 173, 169, 185, 169, 176, 173, 182, 206, 200, 253, 192, 93, 205, 185, 164, 121, 145, 205, 194, 204, 264, 208, 136, 173, 198, 184, 186, 162, 152, 160, 145, 156, 189, 181, 154, 174, 202, 162, 182, 97, 228, 174, 192, 172, 205, 182, 154, 182, 158, 191, 167, 165, 166, 179, 145, 59, 74, 85, 153, 172, 153, 139, 122, 130, 221, 142, 173, 171, 141, 158, 154, 147, 133, 124, 302, 260, 220, 237, 216, 196, 168, 184, 257, 235, 233, 191, 211, 138, 116, 145, 98, 177, 167, 169, 157, 184, 96, 91, 105, 159, 101, 148, 190, 152, 162, 159, 119, 99, 132, 120, 131]
"""
sci = [random.randint(40,320) for i in range(0,294)]
bci = [random.randint(50,200) for i in range(0,294)]
bdi = [random.randint(30,310) for i in range(0,294)]
"""

# height is the value for  control (not influenced) in the lowest subplot 
height = [1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, float('nan'), 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, float('nan'), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, float('nan'), float('nan'), 1.0, 1.0, float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, float('nan'), float('nan')]

# height_acm = list(df2_ac_mixed.height)
height_acm = [3.0 
              if i!=1.0  else float('nan') 
              for i in height]
height_ac = [random.choice([2.0, float('nan')]) 
             if i==3.0  else float('nan') 
             for i in height_acm]

#  notice here start and end computed based on chainage
start_of_chain, end_of_chain = min(chainage), max(chainage)
chainage_range = range(roundup(start_of_chain), roundup(end_of_chain), 100)
In [2]:
#  parameters decorated
_args = [chainage,sci,bci, bdi, height, height_ac, height_acm]
# divide all iterables into x-sized slices 
_par = [(chunk(i,size_slice)) for i in _args]
In [3]:
zipped_slices2 = zip(*_par)
def the_plotter(num_figures, value_collection):     
    # chainage, sci, bci, bdi, marker_chain, height, 
    # marker_chain_ac, height_ac, marker_chain_ac_mixed, height_acm
    chainage = value_collection[0]
    sci = value_collection[1]
    bci = value_collection[2]
    bdi = value_collection[3]
    height = value_collection[4]
    height_ac = value_collection[5]
    height_acm =  value_collection[6]
    
    f, (ax1, ax2) = plt.subplots(2, 1, constrained_layout=True, 
                                 sharey=False, figsize=(40, 15), 
                                 gridspec_kw=gs_kw)

    ax1.plot(chainage, sci, color="black", linewidth= 2.0)
    ax1.plot(chainage, bci, color="magenta")
    ax1.plot(chainage, bdi, color="dodgerblue")  # indigo
    ax1.set_ylim(top=500)
    ax1.legend(('sci', 'bci', 'bdi'), loc=2, prop={'size': 20})  #  legend size set
    # it is a bit countre intuitive
    # you do not split chainage_range into
    # smaller sequences since it is longer than 
    # visualized sequence it is cropped outside of it
    ax1.set_xticks(list(chainage_range))
    ax1.set_xticklabels(list(chainage_range), fontsize=14)  # list(xticks_computation(chainage_range)
    
    # ===================================================
    # PLOT 2
    # control 
    ax2.plot(chainage, height, linestyle='None', color="limegreen", marker='s', markersize=6)
    # 2x factor match
    ax2.plot(chainage, height_ac, linestyle='None', color="red", marker='s', markersize=6)
    # is invent & any pms
    ax2.plot(chainage, height_acm, linestyle='None', color="gold", marker='s', markersize=6)
    ax2.yaxis.set_ticks([1,2,3])
    ax2.set_yticklabels(['control','2x factor match','is invent & any/none pms'], fontsize=14)
    ax2.set_ylabel('factor levels', fontsize = 16)    
    
    #  =========================================================
    # BARS
    # add a bar chart to the upper subplot
    width = 10    #  the width of the bars, approx(end_of_chain - start_of_chain)/300
    x = chainage
    y = [i*500 for i in height if i]
    y2 = [i*250 for i in height_ac if i]
    y3 = [i*166 for i in height_acm]
    # control 
    rects1 = ax1.bar(x, y, width, color='limegreen', alpha = 0.6)    
    #  is invent & any/none pms
    rects3 = ax1.bar(x, y3, width, color='gold', alpha = 0.6)
    # 2x factor match
    rects2 = ax1.bar(x, y2, width, color='red', alpha = 0.3)
    
    # plt.savefig(f"roadnumber{ROAD}part{num_figures}_slice2000_v20200507.png")
    

# TO DO refactor as main
counter1=0
for chunk_element in reversed(list(zipped_slices2)):
    counter1 += 1
    the_plotter(counter1, chunk_element)

Example 2 The stacked barplot

This one is about representing distribution of frequencies (in %) of categorical variables in a correspondence analysis. This one is pulled from my favorite tuto about CA.

To create a cumulative barplot for percentages of a cross table I follow these steps:

Calculate the percentage for each category Calculate the cumulative percentage for each category Create a barplot with the categories on the x-axis and the cumulative percentages on the y-axis. you can add the percentage for each category as labels on the bars or as a separate table next to the plot

https://www.geeksforgeeks.org/create-a-stacked-bar-plot-in-matplotlib/

In [67]:
import pandas as pd
import numpy as np
data_ca = r'C:\thisAKcode.github.io\Pelican\content\other\CSV.csv'

df = pd.read_csv(data_ca, encoding='utf-8')
crosstab_orig = df.iloc[1:9, 1:-1].to_numpy()
countries = df.iloc[1:-1, 0].tolist()
prize_categories = df.columns[1:-1].tolist()
df
Out[67]:
col0 Chemistry Economic sciences Literature Medicin Peace Physics Total
0 Country NaN NaN NaN NaN NaN NaN NaN
1 Germany 24.0 1.0 8.0 18.0 5.0 24.0 80.0
2 Canada 4.0 3.0 2.0 4.0 1.0 4.0 18.0
3 France 8.0 3.0 11.0 12.0 10.0 9.0 53.0
4 UK 23.0 6.0 7.0 26.0 11.0 20.0 93.0
5 Italy 1.0 1.0 6.0 5.0 1.0 5.0 19.0
6 Japan 6.0 0.0 2.0 3.0 1.0 11.0 23.0
7 Russia 4.0 3.0 5.0 2.0 3.0 10.0 27.0
8 US 51.0 43.0 8.0 70.0 19.0 66.0 257.0
9 Total 121.0 60.0 49.0 140.0 51.0 149.0 570.0
In [68]:
# x.j
row_totals = np.sum(crosstab_orig, axis=1)
# xi.
column_totals = np.sum(crosstab_orig, axis=0)
# x..
grand_total = np.sum(crosstab_orig)

the code below will generate a stacked bar plot showing the percentages across rows for each category.

In [69]:
from matplotlib import pyplot as plt

data = {
    'col0': ['Country', 'Germany', 'Canada', 'France', 'UK', 'Italy', 'Japan', 'Russia', 'US', 'Total'],
    'Chemistry': [None, 24.0, 4.0, 8.0, 23.0, 1.0, 6.0, 4.0, 51.0, 121.0],
    'Economic sciences': [None, 1.0, 3.0, 3.0, 6.0, 1.0, 0.0, 3.0, 43.0, 60.0],
    'Literature': [None, 8.0, 2.0, 11.0, 7.0, 6.0, 2.0, 5.0, 8.0, 49.0],
    'Medicin': [None, 18.0, 4.0, 12.0, 26.0, 5.0, 3.0, 2.0, 70.0, 140.0],
    'Peace': [None, 5.0, 1.0, 10.0, 11.0, 1.0, 1.0, 3.0, 19.0, 51.0],
    'Physics': [None, 24.0, 4.0, 9.0, 20.0, 5.0, 11.0, 10.0, 66.0, 149.0]
}

df = pd.DataFrame(data)
df_percent = df.iloc[1:9, 1:8].div(df.iloc[1:9, 1:8].sum(axis=1), axis=0) * 100

ax = df_percent.plot(kind='bar', stacked=True)

# Set the x-axis labels
ax.set_xticklabels(df['col0'][1:9])

# Set the y-axis label
ax.set_ylabel('Percentage')

# Set the chart title
ax.set_title('Stacked Bar Plot of Percentages Across Rows')

# Display the legend
ax.legend(loc=(1.1, 0.5))

# Show the plot
plt.show()
In [70]:
df
Out[70]:
col0 Chemistry Economic sciences Literature Medicin Peace Physics
0 Country NaN NaN NaN NaN NaN NaN
1 Germany 24.0 1.0 8.0 18.0 5.0 24.0
2 Canada 4.0 3.0 2.0 4.0 1.0 4.0
3 France 8.0 3.0 11.0 12.0 10.0 9.0
4 UK 23.0 6.0 7.0 26.0 11.0 20.0
5 Italy 1.0 1.0 6.0 5.0 1.0 5.0
6 Japan 6.0 0.0 2.0 3.0 1.0 11.0
7 Russia 4.0 3.0 5.0 2.0 3.0 10.0
8 US 51.0 43.0 8.0 70.0 19.0 66.0
9 Total 121.0 60.0 49.0 140.0 51.0 149.0
In [71]:
df_percent
Out[71]:
Chemistry Economic sciences Literature Medicin Peace Physics
1 30.000000 1.250000 10.000000 22.500000 6.250000 30.000000
2 22.222222 16.666667 11.111111 22.222222 5.555556 22.222222
3 15.094340 5.660377 20.754717 22.641509 18.867925 16.981132
4 24.731183 6.451613 7.526882 27.956989 11.827957 21.505376
5 5.263158 5.263158 31.578947 26.315789 5.263158 26.315789
6 26.086957 0.000000 8.695652 13.043478 4.347826 47.826087
7 14.814815 11.111111 18.518519 7.407407 11.111111 37.037037
8 19.844358 16.731518 3.112840 27.237354 7.392996 25.680934

To create a stacked bar plot of percentages across columns with prize categories on the x-axis and country percentages on the y-axis, you can follow these steps:

In [72]:
import pandas as pd
# Transpose the DataFrame
df_transposed = df.set_index('col0').transpose()

# Recompute sum at the last line (Total) grouped by prize categories
df_transposed['Total'] = df_transposed.sum(axis=1)
In [73]:
df_transposed # .drop(columns = 'Country', inplace=True)
df_transposed2 = df_transposed.drop(columns = 'Country', inplace = False)
df_transposed2
Out[73]:
col0 Germany Canada France UK Italy Japan Russia US Total
Chemistry 24.0 4.0 8.0 23.0 1.0 6.0 4.0 51.0 242.0
Economic sciences 1.0 3.0 3.0 6.0 1.0 0.0 3.0 43.0 120.0
Literature 8.0 2.0 11.0 7.0 6.0 2.0 5.0 8.0 98.0
Medicin 18.0 4.0 12.0 26.0 5.0 3.0 2.0 70.0 280.0
Peace 5.0 1.0 10.0 11.0 1.0 1.0 3.0 19.0 102.0
Physics 24.0 4.0 9.0 20.0 5.0 11.0 10.0 66.0 298.0
In [74]:
df_transposed2.drop(columns=df_transposed2.columns[-1], inplace=True)
df = df_transposed2
df.loc['Total'] = df.sum()

# df = df.drop([0, 9])
df_percent = df.iloc[0:6, 0:9].div(df.iloc[0:6, 0:9].sum(axis=1), axis=0) * 100
  # 19 18
ax = df_percent.plot(kind='bar', stacked=True)

# Set the x-axis label
ax.set_xlabel('Prize Categories')

# Set the y-axis label
ax.set_ylabel('Percentage')

# Set the chart title
ax.set_title('Stacked Bar Plot of Percentages Across Columns')

# Display the legend
ax.legend(loc=(1.1, 0.5))


# Show the plot
plt.show()
In [75]:
df
Out[75]:
col0 Germany Canada France UK Italy Japan Russia US
Chemistry 24.0 4.0 8.0 23.0 1.0 6.0 4.0 51.0
Economic sciences 1.0 3.0 3.0 6.0 1.0 0.0 3.0 43.0
Literature 8.0 2.0 11.0 7.0 6.0 2.0 5.0 8.0
Medicin 18.0 4.0 12.0 26.0 5.0 3.0 2.0 70.0
Peace 5.0 1.0 10.0 11.0 1.0 1.0 3.0 19.0
Physics 24.0 4.0 9.0 20.0 5.0 11.0 10.0 66.0
Total 80.0 18.0 53.0 93.0 19.0 23.0 27.0 257.0
In [76]:
df_transposed
Out[76]:
col0 Country Germany Canada France UK Italy Japan Russia US Total
Chemistry NaN 24.0 4.0 8.0 23.0 1.0 6.0 4.0 51.0 242.0
Economic sciences NaN 1.0 3.0 3.0 6.0 1.0 0.0 3.0 43.0 120.0
Literature NaN 8.0 2.0 11.0 7.0 6.0 2.0 5.0 8.0 98.0
Medicin NaN 18.0 4.0 12.0 26.0 5.0 3.0 2.0 70.0 280.0
Peace NaN 5.0 1.0 10.0 11.0 1.0 1.0 3.0 19.0 102.0
Physics NaN 24.0 4.0 9.0 20.0 5.0 11.0 10.0 66.0 298.0
In [77]:
df_percent
Out[77]:
col0 Germany Canada France UK Italy Japan Russia US
Chemistry 19.834711 3.305785 6.611570 19.008264 0.826446 4.958678 3.305785 42.148760
Economic sciences 1.666667 5.000000 5.000000 10.000000 1.666667 0.000000 5.000000 71.666667
Literature 16.326531 4.081633 22.448980 14.285714 12.244898 4.081633 10.204082 16.326531
Medicin 12.857143 2.857143 8.571429 18.571429 3.571429 2.142857 1.428571 50.000000
Peace 9.803922 1.960784 19.607843 21.568627 1.960784 1.960784 5.882353 37.254902
Physics 16.107383 2.684564 6.040268 13.422819 3.355705 7.382550 6.711409 44.295302
In [ ]:
 

links

social