diff --git a/python-versions-pct.png b/python-versions-pct.png index 31e77a4..7c2e299 100644 Binary files a/python-versions-pct.png and b/python-versions-pct.png differ diff --git a/python-versions.png b/python-versions.png index 8d4c946..8cbae03 100644 Binary files a/python-versions.png and b/python-versions.png differ diff --git a/python-versions.py b/python-versions.py index c54342a..52b8f98 100644 --- a/python-versions.py +++ b/python-versions.py @@ -14,6 +14,7 @@ from pypinfo.core import build_query, create_client, create_config, parse_query_ from pypinfo.db import get_credentials import matplotlib.pyplot as plt from matplotlib.dates import date2num +import matplotlib.ticker as mtick from scipy.interpolate import make_interp_spline import numpy as np @@ -116,11 +117,12 @@ def plot_main(): versions = db.fetch_python_version() biggest_value = max(version["download_count"] for version in versions) for row in versions: - if row["download_count"] > biggest_value / 20: - by_version[row["python_version"]][0].append( - mean_date(row["start_date"], row["end_date"]) - ) - by_version[row["python_version"]][1].append(row["download_count"]) + if row["download_count"] <= biggest_value / 20: + continue + mid_of_month = mean_date(row["start_date"], row["end_date"]) + _, number_of_days = calendar.monthrange(mid_of_month.year, mid_of_month.month) + by_version[row["python_version"]][0].append(mid_of_month) + by_version[row["python_version"]][1].append(row["download_count"] / number_of_days) plt.style.use("tableau-colorblind10") plt.figure(figsize=(10, 10 * 2 / 3)) fmt = iter(cycle(["-", "--", ":", "-."])) @@ -135,7 +137,7 @@ def plot_main(): smooth_y = spline(smooth_x) plt.plot_date(smooth_x, smooth_y, label=version, fmt=next(fmt)) plt.xlabel("Date") - plt.ylabel("PyPI downloads") + plt.ylabel("PyPI daily downloads") plt.legend() plt.savefig("python-versions.png") @@ -166,25 +168,27 @@ def plot_pct(): versions.groupby("start_date").agg(monthly_downloads=("download_count", "sum")), on="start_date", ) - versions["pct"] = 100 * versions.download_count / versions.monthly_downloads - versions["date"] = pd.to_datetime(versions.start_date) + timedelta(days=14) + versions["pct"] = versions.download_count / versions.monthly_downloads + versions["date"] = pd.to_datetime(versions.start_date).dt.to_period('M') versions.set_index(["Python version", "date"], inplace=True) to_plot = versions.pct.unstack(0, fill_value=0) to_plot.sort_values( by="Python version", ascending=False, axis=1, inplace=True, key=by_versions ) + pd.options.display.float_format = '{:.2%}'.format print(to_plot.tail(24)) for version in HIDE: del to_plot[version] del to_plot["Other"] plt.style.use("tableau-colorblind10") - to_plot.plot.area( + ax = to_plot.plot.area( stacked=True, figsize=(10, 10 * 2 / 3), title="% of PyPI download by Python version", legend="reverse", ylabel="%", ) + ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1)) plt.savefig("python-versions-pct.png") diff --git a/python-versions.sqlite b/python-versions.sqlite index b2922b5..f487bac 100644 Binary files a/python-versions.sqlite and b/python-versions.sqlite differ