animated_chart.py

For what it’s worth, here you go. As referenced in Animating Charts: Part 3.

Do note that a lot of my commented out development code, data and such are still in the listing below.

"""
file: r:/learn/py_play/population/animated_chart.py

Saw an article where user produced an animated chart using matplotlib.
  Anurag Gupta # email:- 999.anuraggupta@gmail.com
    https://opensource.com/article/20/4/python-data-covid-19
Thought I'd give it a try.

My code below based on:
  "Animations", by Ryan Wingate
    https://ryanwingate.com/visualization/matplotlib/animations/
    
Functions
---

  - animate(cnt): animation call back functionh, used/required? by matplotlib animation module

"""

import argparse
import json
import numpy as np
import pandas as pd
import pathlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation
#from time import sleep

# Test for some of the most likely starting working directories.
# If found set I_PATH accordingly. Using pathlib to cover operating system issues.
# If not one of them print message and exit.
I_PATH = ''
p = pathlib.Path.cwd()
# print(p)
if p.name == 'py_play':
  I_PATH = pathlib.Path('./population/play/img')
elif p.name == 'population':
  I_PATH = pathlib.Path('./play/img')
else:
  print(f"\nCurrent working directory is an unexpected location, {p}!")
  print(f"Please run the application from the root directory. Quitting application")
  exit(1)


def animate(cnt):
  global s_size, max_rpts, bins, axis, fig, ani, do_mean, mean_plotted

  if cnt == max_rpts:
    ani.event_source.stop()

  plt.cla()

  # I was just using the cnt, but it starts at 0
  # so need to adjust ndx for slicing data, note never want to exceed max_rpts
  rpts = min(cnt + 1, max_rpts)
  # was data[:cnt], think this approach is a touch tidier and more accurate
  # especially for the plot heading/title
  p_data = data[:rpts]

  plt.axis(axis)
  plt.hist(p_data, bins=bins)
  title = f"Estimated World Average Age for Repeated Samples of Size {s_size}"
  fig.suptitle(title)
  p_hdg = f"Sample Repetitions: {rpts}"
  plt.gca().set_title(p_hdg)
  plt.gca().set_ylabel('Frequency')
  plt.gca().set_xlabel('Age')

  # if approp nbr repetitions, generate new mean
  if rpts in do_mean:
    mean = sum(p_data) / len(p_data)
    mean_plotted = mean
  # if there is a mean value, plot it
  if mean_plotted:
    plt.axvline(mean_plotted, 0, 1, color='r', label=f'Est World Avg Age: {mean_plotted:.2f}')
    plt.gca().legend()  

# input file path info
fl_dir = "r:/learn/py_play/population/play/json"
fl_in = ""

parser = argparse.ArgumentParser()
# long name preceded by --, short by single -, get it as an integer so can use to access test data array
parser.add_argument('--file_name', '-f', help=f'Name of file to process: ')
parser.add_argument('--save_ani', '-s', help=f'Save animation to file (y or n or a number)', default='n')

args = parser.parse_args()
# get input file name and check it exists
if args.file_name:
  fl_in = args.file_name
else:
  print(f"File name required!\n\tUseage: {__file__} <data file name>\n")
  exit(1)
# if we got a file name, make sure file exists
fl_pth = pathlib.Path(f"{fl_dir}/{fl_in}")
if not fl_pth.exists():
  print(f"File name given, {fl_pth}, could not be found.\n")
  exit(1)

data = []
s_size = 0
do_mean = []
max_rpts = 0
bins = []
axis = [0, 0, 0, 0]
fig = 0
ani = ""
mean_plotted = 0
bin_wd = 0.5
# "" = do not save, "html" = save as video tag with embeded src, "mp4" = save as mp4
save_ani = ""
save_fl = "ani_no_name.mp4"

# if file exists, let's get the data from the file
# data = [30.845716086321577, 27.714993270689398, 26.539019466198756, 28.652412751981608, 25.91251105018476, 28.64765858770585, 28.43607425900043, 29.155768024294613, 28.208172328617486, 26.595183258570575, 28.865488929001568, 30.686593914044185, 28.949639621950695, 25.62845914668899, 29.19201774897019, 28.047594512491685, 28.22093542183883, 28.374921165089173, 30.690075634758678, 27.271132349654113, 29.740443263446412, 27.64449438259917, 28.952287681557547, 29.052409562050418, 29.115468450905333, 27.783257243947148, 27.548704530269532, 30.62276598022423, 28.94742189084511, 29.413534278654236, 29.38974836049136, 29.380930696708813, 28.377958686298296, 25.73136328589792, 26.57861731239424, 29.237794824735797, 26.808873073718367, 28.6745133768866, 29.080921150558893, 28.754975906219766, 28.72415124866063, 26.338287455602394, 30.080391362432472, 25.897604085649508, 28.972778954289655, 29.260116185271198, 28.86340953522474, 27.28060642825256, 30.805984223480607, 27.76411635202121, 31.021686055148223, 25.992921118938632, 30.12321928731231, 27.212553791272896, 30.905215921641773, 28.74129849170663, 27.63761576464364, 29.628767187005725, 28.594416579332297, 28.749408044766366, 27.86536302044773, 26.620086966452188, 27.238471545041484, 28.207836094665545, 28.67693576548127, 29.592031794726243, 28.62629156366716, 25.820627686102654, 29.207039853171004, 27.90139482030897, 29.93671731742414, 26.678888773631222, 28.494604917195666, 28.895453164116464, 28.600138218644503, 30.0692463710327, 28.591871496870056, 28.51242379871638, 28.948606671590202, 27.33776348504623, 29.650754154481263, 28.717365896940095, 27.569791814239803, 28.575197523811678, 27.529728084158055, 27.352196701851557, 31.08869246328819, 27.544809201406295, 28.729690047607775, 30.389801507642776, 27.51183455821836, 28.67564013270134, 30.127738834331428, 28.625162276233024, 30.29438580862724, 28.95852097757391, 26.077290603992097, 28.705586201250167, 32.546639208883455, 26.225844583789687]
# s_size = 30
# do_mean = [30, 50, 70, 100]
with open(fl_pth, 'r') as fin:
  samples = json.load(fin)

for s_seed, s_data in samples.items():
  data = s_data['means']
  s_size = s_data['size']
  s_rpts = len(s_data['means'])
  do_mean = s_data['rpts']
  # s_sum = sum(s_data['means']) 
  # mean = s_sum / s_rpts

  max_rpts = len(data)
  min_x = int(round((min(data) - 5) / 5) * 5)
  max_x = int(round((max(data) + 4) / 5) * 5)
  bins = np.arange(min_x, max_x, bin_wd)
  n, bins = np.histogram(data, bins)
  max_y = int(round((max(n) + 3) / 5) * 5)
  axis = [min_x, max_x, 0, max_y]
  mean_plotted = 0
  ani = None

  # determine whether or not to save animation and to what file
  # note: since multiple datasets possible in each file, use dataset key (seed value) in file name
  if args.save_ani == 'y':
    save_ani = "mp4"
    if bin_wd == 1:
      save_fl = f"hist_x9_{s_seed}.{save_ani}"
    else:
      save_fl = f"hist_x9_bw{bin_wd}_{s_seed}.{save_ani}"
  if args.save_ani.isdecimal():
    save_ani = "mp4"
    if bin_wd == 1:
      save_fl = f"ani_hist_{args.save_ani}_{s_seed}.{save_ani}"
    else:
      save_fl = f"ani_hist_{args.save_ani}_bw{bin_wd}_{s_seed}.{save_ani}"

  print(f"Processing: dataset with seed {s_seed}...\n")
  fig = plt.figure()
  ani = animation.FuncAnimation(fig, 
                              animate, 
                              interval=100)

  # if save_ani == 'html':
  #   with open(f"{save_fl}.html", "w") as f:
  #     print(ani.to_html5_video(), file=f)
  # elif save_ani == 'mp4':
  if save_ani == 'mp4':
    # Set up formatting for the movie files
    Writer = animation.writers['ffmpeg']
    writer = Writer(metadata=dict(artist='Me'))
    ani.save(I_PATH / save_fl, writer=writer)
  else:
    plt.show()

exit(0)