diff --git a/README.md b/README.md index 6c65ca3..5c51bb7 100644 --- a/README.md +++ b/README.md @@ -20,5 +20,11 @@ Then plot it using: ## Data +### Number of pypi.org downloads ![](python-versions.png) + + +### Percentage of pypi.org downloads + +![](python-versions-pct.png) diff --git a/cy.py b/cy.py new file mode 100644 index 0000000..c6f1194 --- /dev/null +++ b/cy.py @@ -0,0 +1,18 @@ +from cycler import cycler +import numpy as np +import matplotlib.pyplot as plt + +x = np.linspace(0, 2 * np.pi, 50) +offsets = np.linspace(0, 2 * np.pi, 4, endpoint=False) +yy = np.transpose([np.sin(x + phi) for phi in offsets]) + + +plt.rc("axes", prop_cycle=cycler(linestyle=["-", "--", ":", "-."])) + +# fig, (ax0, ax1) = plt.subplots(nrows=2) +plt.plot(yy) +# ax0.set_title("Set default color cycle to rgby") + +# Add a bit more space between the two plots. +# fig.subplots_adjust(hspace=0.3) +plt.show() diff --git a/python-versions-pct.png b/python-versions-pct.png new file mode 100644 index 0000000..eee341f Binary files /dev/null and b/python-versions-pct.png differ diff --git a/python-versions.png b/python-versions.png index 06ae77c..09e88e6 100644 Binary files a/python-versions.png and b/python-versions.png differ diff --git a/python-versions.py b/python-versions.py index f6ce49a..6e78c9d 100644 --- a/python-versions.py +++ b/python-versions.py @@ -6,7 +6,9 @@ import sqlite3 import sys from datetime import datetime, timedelta, date from collections import defaultdict +from itertools import cycle +import pandas as pd from pypinfo.fields import PythonVersion from pypinfo.core import build_query, create_client, create_config, parse_query_result from pypinfo.db import get_credentials @@ -118,6 +120,7 @@ def plot_main(): by_version[row["python_version"]][1].append(row["download_count"]) plt.style.use("tableau-colorblind10") plt.figure(figsize=(10, 10 * 2 / 3)) + fmt = iter(cycle(["-", "--", ":", "-."])) for version, (x, y) in by_version.items(): if version is None: continue @@ -127,14 +130,40 @@ def plot_main(): smooth_x = np.linspace(date2num(min(x)), date2num(max(x)), 200) spline = make_interp_spline([date2num(d) for d in x], y, k=2) smooth_y = spline(smooth_x) - plt.plot_date(smooth_x, smooth_y, label=version, fmt="-") + plt.plot_date(smooth_x, smooth_y, label=version, fmt=next(fmt)) plt.xlabel("Date") plt.ylabel("PyPI downloads") plt.legend() plt.savefig("python-versions.png") +def plot_pct(): + db = DB() + versions = pd.DataFrame( + db.fetch_python_version(), + columns=["start_date", "end_date", "python_version", "download_count"], + dtype="str", + ) + versions["download_count"] = pd.to_numeric(versions["download_count"]) + versions["python_version"].fillna("Other", inplace=True) + versions = versions.merge( + versions.groupby("start_date").agg(monthly_downloads=("download_count", "sum")), + on="start_date", + ) + versions["pct"] = 100 * versions.download_count / versions.monthly_downloads + versions["date"] = pd.to_datetime(versions.start_date) + timedelta(days=14) + versions.set_index(["python_version", "date"], inplace=True) + to_plot = versions.pct.unstack(0, fill_value=0) + to_plot["Other"] += to_plot["2.6"] + to_plot["3.3"] + to_plot["3.4"] + del to_plot["2.6"], to_plot["3.3"], to_plot["3.4"] + print(to_plot) + plt.style.use("tableau-colorblind10") + to_plot.plot.area(stacked=True, figsize=(10, 10 * 2 / 3)) + plt.savefig("python-versions-pct.png") + + if __name__ == "__main__": if "--fetch" in sys.argv: fetch_main() + plot_pct() plot_main()