Example 1¶
I have written a sript a while ago to plot a long sequence of data. In this first example I plot data represented LIDAR scanning of a road with segments having either cleanded ditches or not, while registering a lot I am intrested here in representing three quantitative variables extracted from LIDAR pointcloud.
What I really want to focus on is the fact that it is possible to split long sequence and plot two subplots,with second representing categories.
import random
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from matplotlib import pyplot as plt
from math import ceil
ROAD = 16
size_slice = 100
# set heigth ratio using gridspec_kw
width = [1]
heights = [10, 1]
gs_kw = dict(width_ratios=width, height_ratios=heights) # {'width_ratios': [1], 'height_ratios': [5, 1]}
# Helper splits off the df
def chunk(lst, n):
"""Yield successive n-sized chunks from lst.
calling (chunk(list(range(0,15)), 4)))
returns
[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14]]
"""
for i in range(0, len(lst), n):
yield lst[i:i + n]
def roundup(x):
# round up ticks to next hundred
return int(ceil(x / 100.0)) * 100
# parameters
chainage = [9800, 9770, 9700, 9670, 9590, 9480, 9460, 9450, 9440, 9430, 9420, 9410, 9400, 9390, 9380, 9370, 9360, 9350, 9340, 9330, 9320, 9310, 9300, 9290, 9280, 9270, 9260, 9250, 9240, 9230, 9220, 9210, 9200, 9190, 9180, 9170, 9160, 9150, 9140, 9130, 9120, 9110, 9100, 9090, 9080, 9070, 9060, 9050, 9040, 9030, 9020, 9010, 9000, 8990, 8980, 8970, 8960, 8950, 8940, 8930, 8920, 8910, 8900, 8890, 8880, 8870, 8860, 8850, 8840, 8830, 8820, 8810, 8800, 8790, 8780, 8770, 8760, 8750, 8740, 8730, 8720, 8710, 8700, 8690, 8680, 8670, 8660, 8650, 8640, 8630, 8620, 8610, 8600, 8590, 8580, 8570, 8560, 8550, 8540, 8530, 8520, 8510, 8500, 8480, 8470, 8460, 8450, 8440, 8430, 8420, 8410, 8400, 8390, 8380, 8370, 8360, 8350, 8340, 8330, 8320, 8310, 8300, 8290, 8280, 8270, 8260, 8250, 8240, 8230, 8220, 8210, 8200, 8190, 8180, 8170, 8160, 8150, 8140, 8130, 8120, 8110, 8100, 8090, 8080, 8070, 8060, 8050, 8040, 8030, 8020, 8010, 8000, 7990, 7980, 7970, 7960, 7950, 7940, 7930, 7920, 7910, 7900, 7890, 7880, 7870, 7820, 7810, 7800, 7790, 7780, 7770, 7760, 7750, 7740, 7730, 7720, 7710, 7700, 7690, 7680, 7670, 7660, 7650, 7640, 7630, 7620, 7610, 7600, 7590, 7580, 7570, 7560, 7550, 7540, 7530, 7520, 7510, 7500, 7490, 7480, 7470, 7460, 7450, 7440, 7430, 7420, 7410, 7400, 7390, 7360, 7350, 7340, 7330, 7320, 7310, 7300, 7290, 7280, 7270, 7260, 7250, 7240, 7230, 7220, 7210, 7200, 7190, 7180, 7170, 7160, 7150, 7140, 7130, 7120, 7110, 7100, 7090, 7080, 7070, 7060, 7050, 7040, 7030, 7020, 7010, 7000, 6990, 6980, 6970, 6960, 6950, 6940, 6930, 6920, 6910, 6900, 6890, 6330, 6320, 6310, 6300, 6290, 6280, 6270, 6260, 6250, 6240, 6230, 6220, 6210, 6200, 6190, 6180, 6170, 6160, 6150, 6140, 6130, 6120, 6110, 6100, 6090, 6080, 6070, 6060, 6050, 6040, 6030, 6020, 6010, 6000, 5990, 5980, 5970]
sci = [81, 42, 160, 113, 71, 83, 65, 110, 136, 109, 133, 189, 196, 189, 179, 168, 174, 180, 181, 202, 140, 143, 105, 136, 194, 137, 157, 205, 295, 287, 249, 224, 269, 234, 141, 164, 189, 147, 146, 133, 173, 163, 168, 188, 178, 159, 184, 191, 157, 144, 181, 152, 164, 161, 145, 193, 217, 211, 189, 196, 187, 171, 173, 162, 157, 143, 148, 158, 187, 152, 170, 200, 204, 155, 157, 179, 202, 202, 206, 216, 212, 206, 167, 139, 194, 152, 158, 151, 174, 181, 158, 166, 161, 175, 188, 174, 194, 249, 201, 180, 163, 152, 133, 77, 99, 128, 144, 134, 121, 153, 180, 196, 177, 168, 177, 194, 185, 269, 247, 255, 263, 285, 282, 211, 190, 201, 205, 217, 202, 180, 178, 232, 201, 190, 227, 178, 139, 172, 208, 210, 190, 179, 156, 169, 182, 196, 173, 154, 180, 184, 206, 182, 239, 237, 246, 207, 192, 213, 227, 200, 201, 187, 212, 255, 231, 218, 221, 203, 242, 271, 213, 186, 185, 177, 214, 222, 205, 182, 161, 210, 303, 248, 196, 236, 279, 222, 264, 187, 243, 209, 167, 177, 196, 191, 209, 166, 240, 178, 234, 222, 222, 207, 279, 291, 302, 225, 210, 289, 313, 317, 270, 232, 188, 174, 198, 214, 187, 196, 157, 201, 227, 253, 233, 229, 299, 269, 233, 235, 261, 276, 269, 236, 265, 301, 235, 221, 239, 206, 182, 164, 207, 224, 206, 182, 214, 183, 185, 244, 242, 267, 299, 293, 275, 248, 219, 259, 210, 175, 246, 286, 254, 225, 201, 169, 207, 228, 184, 185, 190, 189, 161, 124, 133, 139, 155, 154, 136, 141, 132, 103, 88, 91, 94, 120, 140, 164, 122, 144, 136, 126, 133, 165, 135, 145]
bci = [67, 57, 113, 71, 74, 75, 61, 95, 106, 104, 113, 125, 121, 124, 136, 126, 138, 136, 127, 133, 104, 112, 91, 98, 162, 108, 131, 144, 146, 145, 133, 138, 157, 156, 106, 122, 129, 103, 123, 99, 133, 119, 122, 129, 122, 111, 116, 120, 113, 103, 122, 117, 121, 110, 100, 116, 116, 120, 112, 100, 101, 97, 94, 91, 97, 75, 88, 99, 125, 102, 114, 111, 114, 91, 92, 125, 123, 140, 141, 153, 138, 130, 114, 114, 129, 96, 108, 117, 117, 125, 124, 101, 105, 118, 100, 104, 119, 139, 154, 130, 129, 123, 126, 80, 97, 116, 103, 95, 97, 111, 124, 124, 146, 125, 121, 138, 122, 167, 180, 176, 190, 176, 184, 147, 147, 153, 161, 157, 148, 135, 127, 146, 138, 126, 165, 141, 123, 135, 149, 144, 125, 131, 105, 102, 124, 139, 121, 134, 137, 122, 127, 130, 144, 152, 146, 130, 130, 126, 144, 137, 127, 124, 132, 143, 142, 155, 138, 147, 151, 148, 152, 149, 137, 136, 158, 154, 144, 142, 140, 149, 174, 180, 158, 176, 181, 164, 135, 142, 166, 167, 146, 145, 161, 162, 166, 125, 155, 135, 145, 152, 163, 156, 173, 198, 176, 154, 150, 167, 191, 192, 168, 148, 137, 117, 126, 143, 139, 130, 125, 146, 149, 162, 188, 172, 178, 171, 144, 159, 161, 163, 176, 152, 157, 167, 154, 143, 154, 136, 120, 100, 113, 139, 140, 128, 136, 107, 129, 157, 139, 151, 165, 167, 161, 167, 143, 140, 121, 164, 185, 190, 179, 160, 144, 131, 160, 176, 144, 150, 139, 145, 99, 85, 92, 109, 113, 110, 108, 107, 105, 92, 63, 100, 76, 81, 99, 120, 93, 109, 98, 87, 129, 107, 86, 100]
bdi = [164, 154, 255, 143, 181, 116, 148, 147, 157, 212, 94, 191, 127, 177, 216, 185, 140, 144, 169, 147, 124, 195, 141, 147, 158, 145, 138, 146, 75, 108, 112, 108, 120, 156, 118, 151, 160, 115, 74, 123, 167, 137, 128, 114, 133, 114, 85, 96, 147, 136, 128, 142, 176, 103, 105, 96, 45, 88, 47, 32, 105, 108, 69, 100, 113, 65, 38, 95, 129, 87, 116, 40, 103, 33, 94, 156, 44, 150, 182, 210, 134, 112, 109, 199, 165, 34, 111, 152, 135, 162, 154, 40, 114, 107, 86, 39, 115, 80, 205, 193, 117, 189, 225, 186, 165, 171, 143, 145, 157, 120, 163, 135, 187, 151, 126, 188, 116, 208, 185, 187, 244, 158, 191, 123, 161, 154, 178, 174, 160, 126, 111, 159, 109, 169, 209, 158, 158, 177, 161, 148, 122, 166, 48, 103, 157, 151, 109, 181, 94, 57, 94, 101, 147, 137, 135, 58, 134, 96, 125, 161, 144, 126, 157, 141, 129, 210, 144, 173, 132, 135, 183, 221, 167, 157, 216, 170, 150, 149, 188, 185, 153, 227, 149, 173, 169, 185, 169, 176, 173, 182, 206, 200, 253, 192, 93, 205, 185, 164, 121, 145, 205, 194, 204, 264, 208, 136, 173, 198, 184, 186, 162, 152, 160, 145, 156, 189, 181, 154, 174, 202, 162, 182, 97, 228, 174, 192, 172, 205, 182, 154, 182, 158, 191, 167, 165, 166, 179, 145, 59, 74, 85, 153, 172, 153, 139, 122, 130, 221, 142, 173, 171, 141, 158, 154, 147, 133, 124, 302, 260, 220, 237, 216, 196, 168, 184, 257, 235, 233, 191, 211, 138, 116, 145, 98, 177, 167, 169, 157, 184, 96, 91, 105, 159, 101, 148, 190, 152, 162, 159, 119, 99, 132, 120, 131]
"""
sci = [random.randint(40,320) for i in range(0,294)]
bci = [random.randint(50,200) for i in range(0,294)]
bdi = [random.randint(30,310) for i in range(0,294)]
"""
# height is the value for control (not influenced) in the lowest subplot
height = [1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, float('nan'), 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, float('nan'), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, float('nan'), float('nan'), 1.0, 1.0, float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, float('nan'), float('nan'), float('nan'), float('nan'), float('nan'), 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, float('nan'), float('nan')]
# height_acm = list(df2_ac_mixed.height)
height_acm = [3.0
if i!=1.0 else float('nan')
for i in height]
height_ac = [random.choice([2.0, float('nan')])
if i==3.0 else float('nan')
for i in height_acm]
# notice here start and end computed based on chainage
start_of_chain, end_of_chain = min(chainage), max(chainage)
chainage_range = range(roundup(start_of_chain), roundup(end_of_chain), 100)
# parameters decorated
_args = [chainage,sci,bci, bdi, height, height_ac, height_acm]
# divide all iterables into x-sized slices
_par = [(chunk(i,size_slice)) for i in _args]
zipped_slices2 = zip(*_par)
def the_plotter(num_figures, value_collection):
# chainage, sci, bci, bdi, marker_chain, height,
# marker_chain_ac, height_ac, marker_chain_ac_mixed, height_acm
chainage = value_collection[0]
sci = value_collection[1]
bci = value_collection[2]
bdi = value_collection[3]
height = value_collection[4]
height_ac = value_collection[5]
height_acm = value_collection[6]
f, (ax1, ax2) = plt.subplots(2, 1, constrained_layout=True,
sharey=False, figsize=(40, 15),
gridspec_kw=gs_kw)
ax1.plot(chainage, sci, color="black", linewidth= 2.0)
ax1.plot(chainage, bci, color="magenta")
ax1.plot(chainage, bdi, color="dodgerblue") # indigo
ax1.set_ylim(top=500)
ax1.legend(('sci', 'bci', 'bdi'), loc=2, prop={'size': 20}) # legend size set
# it is a bit countre intuitive
# you do not split chainage_range into
# smaller sequences since it is longer than
# visualized sequence it is cropped outside of it
ax1.set_xticks(list(chainage_range))
ax1.set_xticklabels(list(chainage_range), fontsize=14) # list(xticks_computation(chainage_range)
# ===================================================
# PLOT 2
# control
ax2.plot(chainage, height, linestyle='None', color="limegreen", marker='s', markersize=6)
# 2x factor match
ax2.plot(chainage, height_ac, linestyle='None', color="red", marker='s', markersize=6)
# is invent & any pms
ax2.plot(chainage, height_acm, linestyle='None', color="gold", marker='s', markersize=6)
ax2.yaxis.set_ticks([1,2,3])
ax2.set_yticklabels(['control','2x factor match','is invent & any/none pms'], fontsize=14)
ax2.set_ylabel('factor levels', fontsize = 16)
# =========================================================
# BARS
# add a bar chart to the upper subplot
width = 10 # the width of the bars, approx(end_of_chain - start_of_chain)/300
x = chainage
y = [i*500 for i in height if i]
y2 = [i*250 for i in height_ac if i]
y3 = [i*166 for i in height_acm]
# control
rects1 = ax1.bar(x, y, width, color='limegreen', alpha = 0.6)
# is invent & any/none pms
rects3 = ax1.bar(x, y3, width, color='gold', alpha = 0.6)
# 2x factor match
rects2 = ax1.bar(x, y2, width, color='red', alpha = 0.3)
# plt.savefig(f"roadnumber{ROAD}part{num_figures}_slice2000_v20200507.png")
# TO DO refactor as main
counter1=0
for chunk_element in reversed(list(zipped_slices2)):
counter1 += 1
the_plotter(counter1, chunk_element)
Example 2 The stacked barplot¶
This one is about representing distribution of frequencies (in %) of categorical variables in a correspondence analysis. This one is pulled from my favorite tuto about CA.
To create a cumulative barplot for percentages of a cross table I follow these steps:
Calculate the percentage for each category Calculate the cumulative percentage for each category Create a barplot with the categories on the x-axis and the cumulative percentages on the y-axis. you can add the percentage for each category as labels on the bars or as a separate table next to the plot
https://www.geeksforgeeks.org/create-a-stacked-bar-plot-in-matplotlib/
import pandas as pd
import numpy as np
data_ca = r'C:\thisAKcode.github.io\Pelican\content\other\CSV.csv'
df = pd.read_csv(data_ca, encoding='utf-8')
crosstab_orig = df.iloc[1:9, 1:-1].to_numpy()
countries = df.iloc[1:-1, 0].tolist()
prize_categories = df.columns[1:-1].tolist()
df
# x.j
row_totals = np.sum(crosstab_orig, axis=1)
# xi.
column_totals = np.sum(crosstab_orig, axis=0)
# x..
grand_total = np.sum(crosstab_orig)
the code below will generate a stacked bar plot showing the percentages across rows for each category.
from matplotlib import pyplot as plt
data = {
'col0': ['Country', 'Germany', 'Canada', 'France', 'UK', 'Italy', 'Japan', 'Russia', 'US', 'Total'],
'Chemistry': [None, 24.0, 4.0, 8.0, 23.0, 1.0, 6.0, 4.0, 51.0, 121.0],
'Economic sciences': [None, 1.0, 3.0, 3.0, 6.0, 1.0, 0.0, 3.0, 43.0, 60.0],
'Literature': [None, 8.0, 2.0, 11.0, 7.0, 6.0, 2.0, 5.0, 8.0, 49.0],
'Medicin': [None, 18.0, 4.0, 12.0, 26.0, 5.0, 3.0, 2.0, 70.0, 140.0],
'Peace': [None, 5.0, 1.0, 10.0, 11.0, 1.0, 1.0, 3.0, 19.0, 51.0],
'Physics': [None, 24.0, 4.0, 9.0, 20.0, 5.0, 11.0, 10.0, 66.0, 149.0]
}
df = pd.DataFrame(data)
df_percent = df.iloc[1:9, 1:8].div(df.iloc[1:9, 1:8].sum(axis=1), axis=0) * 100
ax = df_percent.plot(kind='bar', stacked=True)
# Set the x-axis labels
ax.set_xticklabels(df['col0'][1:9])
# Set the y-axis label
ax.set_ylabel('Percentage')
# Set the chart title
ax.set_title('Stacked Bar Plot of Percentages Across Rows')
# Display the legend
ax.legend(loc=(1.1, 0.5))
# Show the plot
plt.show()
df
df_percent
To create a stacked bar plot of percentages across columns with prize categories on the x-axis and country percentages on the y-axis, you can follow these steps:
import pandas as pd
# Transpose the DataFrame
df_transposed = df.set_index('col0').transpose()
# Recompute sum at the last line (Total) grouped by prize categories
df_transposed['Total'] = df_transposed.sum(axis=1)
df_transposed # .drop(columns = 'Country', inplace=True)
df_transposed2 = df_transposed.drop(columns = 'Country', inplace = False)
df_transposed2
df_transposed2.drop(columns=df_transposed2.columns[-1], inplace=True)
df = df_transposed2
df.loc['Total'] = df.sum()
# df = df.drop([0, 9])
df_percent = df.iloc[0:6, 0:9].div(df.iloc[0:6, 0:9].sum(axis=1), axis=0) * 100
# 19 18
ax = df_percent.plot(kind='bar', stacked=True)
# Set the x-axis label
ax.set_xlabel('Prize Categories')
# Set the y-axis label
ax.set_ylabel('Percentage')
# Set the chart title
ax.set_title('Stacked Bar Plot of Percentages Across Columns')
# Display the legend
ax.legend(loc=(1.1, 0.5))
# Show the plot
plt.show()
df
df_transposed
df_percent