Working on python-perfs.

This commit is contained in:
Julien Palard 2023-09-22 15:33:14 +02:00
parent 8d923687e2
commit 188e4bd870
Signed by: mdk
GPG Key ID: 0EFC1AC1006886F8
22 changed files with 933 additions and 115 deletions

View File

@ -1,17 +1,37 @@
#!/bin/sh
#!/usr/bin/env python
ARGS="$*"
import sys
import shlex
from subprocess import run, STDOUT
from time import perf_counter
from textwrap import wrap
args="--ttl 1y --cache-dir .cache"
args = sys.argv[1:]
while [ "$1" != "--" ]
do
args="$args $1"
shift
done
bkt_args = []
cmd_args = args
before="$(date +"%s.%N")"
bkt $args "$@" 2>&1
after="$(date +"%s.%N")"
if '--' in args:
at = args.index('--')
bkt_args, cmd_args = args[:at], args[at+1:]
printf "%s: %.2fs\n\n" "$ARGS" "$(echo "$after - $before"|bc)" >&2
bkt_args = ["--ttl", "1y", "--cache-dir", ".cache"] + bkt_args
def wrap_bash_command(cmd):
"""Given args as a list of string, wraps
the args to look like a bash cmd."""
cmd = [arg.replace(" ", " ") for arg in cmd]
lines = wrap(shlex.join(cmd), width=60)
lines = [line.replace(" ", " ") for line in lines]
return " \\\n> ".join(lines)
before = perf_counter()
print(f"$ {shlex.join(cmd_args)}", file=sys.stderr)
print(f"$ {wrap_bash_command(cmd_args)}", flush=True)
run(["bkt"] + bkt_args + ["--"] + cmd_args, stderr=STDOUT)
after = perf_counter()
print(f"$ {shlex.join(cmd_args)} # took {after-before:.2f}s\n", file=sys.stderr)

View File

@ -0,0 +1,2 @@
cdef extern from "my_collatz_length.c":
cpdef int collatz_length(long n)

View File

@ -0,0 +1,7 @@
def collatz_length(n):
if n == 1:
return 0
if n % 2 == 0:
return 1 + collatz_length(n // 2)
else:
return 1 + collatz_length(n * 3 + 1)

View File

@ -0,0 +1,705 @@
{
"alloc_samples": 0,
"elapsed_time_sec": 35.077271699905396,
"files": {
"phi5.py": {
"functions": [
{
"line": "fib",
"lineno": 5,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 4.2576382717302454,
"n_core_utilization": 0.062425365799925484,
"n_cpu_percent_c": 0.0034262080640758584,
"n_cpu_percent_python": 0.7126821564209772,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0009376888160889245,
"n_usage_fraction": 0
},
{
"line": "approx_phi",
"lineno": 12,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.6998240221201137,
"n_core_utilization": 0.06237139811505196,
"n_cpu_percent_c": 96.09209325974999,
"n_cpu_percent_python": 0.3418254092394039,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.20029089301487063,
"n_usage_fraction": 0
},
{
"line": "approx_phi_up_to",
"lineno": 16,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.06219424868052924,
"n_cpu_percent_c": 2.351736421918283,
"n_cpu_percent_python": 0.2556447055821568,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.012818642838908139,
"n_usage_fraction": 0
}
],
"imports": [
"from decimal import Decimal, localcontext",
"from itertools import count",
"from functools import cache",
"import sys"
],
"leaks": {},
"lines": [
{
"end_region_line": 1,
"line": "from decimal import Decimal, localcontext\n",
"lineno": 1,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 1
},
{
"end_region_line": 2,
"line": "from itertools import count\n",
"lineno": 2,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 2
},
{
"end_region_line": 3,
"line": "from functools import cache\n",
"lineno": 3,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 3
},
{
"end_region_line": 4,
"line": "\n",
"lineno": 4,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 4
},
{
"end_region_line": 5,
"line": "@cache\n",
"lineno": 5,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 5
},
{
"end_region_line": 10,
"line": "def fib(n):\n",
"lineno": 6,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 6
},
{
"end_region_line": 10,
"line": " if n in (0, 1):\n",
"lineno": 7,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 6
},
{
"end_region_line": 10,
"line": " return 1\n",
"lineno": 8,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 6
},
{
"end_region_line": 10,
"line": " x = n // 2\n",
"lineno": 9,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 6
},
{
"end_region_line": 10,
"line": " return fib(x - 1) * fib(n - x - 1) + fib(x) * fib(n - x)\n",
"lineno": 10,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 4.2576382717302454,
"n_core_utilization": 0.062425365799925484,
"n_cpu_percent_c": 0.0034262080640758584,
"n_cpu_percent_python": 0.7126821564209772,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0009376888160889245,
"n_usage_fraction": 0,
"start_region_line": 6
},
{
"end_region_line": 11,
"line": "\n",
"lineno": 11,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 11
},
{
"end_region_line": 12,
"line": "@cache\n",
"lineno": 12,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 12
},
{
"end_region_line": 14,
"line": "def approx_phi(n):\n",
"lineno": 13,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 13
},
{
"end_region_line": 14,
"line": " return Decimal(fib(n + 1)) / Decimal(fib(n))\n",
"lineno": 14,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.6998240221201137,
"n_core_utilization": 0.06237139811505196,
"n_cpu_percent_c": 96.09209325974997,
"n_cpu_percent_python": 0.3418254092394039,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.20029089301488137,
"n_usage_fraction": 0,
"start_region_line": 13
},
{
"end_region_line": 15,
"line": "\n",
"lineno": 15,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 15
},
{
"end_region_line": 23,
"line": "def approx_phi_up_to(n_digits):\n",
"lineno": 16,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 16
},
{
"end_region_line": 23,
"line": " with localcontext() as ctx:\n",
"lineno": 17,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 16
},
{
"end_region_line": 23,
"line": " ctx.prec = n_digits + 1\n",
"lineno": 18,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 16
},
{
"end_region_line": 23,
"line": " for n in count():\n",
"lineno": 19,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 19
},
{
"end_region_line": 23,
"line": " step1 = approx_phi(2 ** n)\n",
"lineno": 20,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 19
},
{
"end_region_line": 23,
"line": " step2 = approx_phi(2 ** (n+1))\n",
"lineno": 21,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.06219424868052924,
"n_cpu_percent_c": 2.351736421918283,
"n_cpu_percent_python": 0.2556447055821568,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.012818642838908139,
"n_usage_fraction": 0,
"start_region_line": 19
},
{
"end_region_line": 23,
"line": " if step1 == step2:\n",
"lineno": 22,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 19
},
{
"end_region_line": 23,
"line": " return step1\n",
"lineno": 23,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 19
},
{
"end_region_line": 24,
"line": "\n",
"lineno": 24,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 24
},
{
"end_region_line": 25,
"line": "import sys\n",
"lineno": 25,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 25
},
{
"end_region_line": 26,
"line": "\n",
"lineno": 26,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 26
},
{
"end_region_line": 27,
"line": "if __name__ == \"__main__\":\n",
"lineno": 27,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.0,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.0,
"n_usage_fraction": 0,
"start_region_line": 27
},
{
"end_region_line": 28,
"line": " print(approx_phi_up_to(int(sys.argv[1])))\n",
"lineno": 28,
"memory_samples": [],
"n_avg_mb": 0.0,
"n_copy_mb_s": 0.0,
"n_core_utilization": 0.005697373432005744,
"n_cpu_percent_c": 0.0,
"n_cpu_percent_python": 0.0026020692392709254,
"n_gpu_avg_memory_mb": 0.0,
"n_gpu_peak_memory_mb": 0.0,
"n_gpu_percent": 0,
"n_growth_mb": 0.0,
"n_malloc_mb": 0.0,
"n_mallocs": 0,
"n_peak_mb": 0.0,
"n_python_fraction": 0,
"n_sys_percent": 0.025942545115975865,
"n_usage_fraction": 0,
"start_region_line": 28
}
],
"percent_cpu_time": 100.0
}
},
"gpu": false,
"growth_rate": 0.0,
"max_footprint_fname": null,
"max_footprint_lineno": null,
"max_footprint_mb": 0,
"memory": true,
"program": "phi5.py",
"samples": [],
"stacks": []
}

View File

@ -0,0 +1,39 @@
import sys
def should_apply_gravity(terrain):
width = len(terrain)
return any(terrain[x][y] >= 4 for x in range(width) for y in range(width))
def show_terrain(terrain):
width = len(terrain)
for x in range(width):
for y in range(width):
print(" ·●⬤"[terrain[x][y]], end="")
print()
def apply_gravity(terrain):
width = len(terrain)
for x in range(width):
for y in range(width):
if terrain[x][y] >= 4:
terrain[x][y] -= 4
terrain[x - 1][y] += 1
terrain[x + 1][y] += 1
terrain[x][y + 1] += 1
terrain[x][y - 1] += 1
def main(height, show=True):
width = int(height ** .5) + 1
terrain = [[0] * width for _ in range(width)]
terrain[width // 2][width // 2] = height
while should_apply_gravity(terrain):
apply_gravity(terrain)
if show:
show_terrain(terrain)
if __name__ == "__main__":
main(int(sys.argv[1]))

View File

@ -1,4 +1,4 @@
# Les Performances en Python
# Les performances en Python
par
@ -31,7 +31,7 @@ O(n!) Factorielle
notes:
Il faut les grapher pour s'en rendre compte : cf. include/big.o.py
Il faut les grapher pour s'en rendre compte : cf. examples/big.o.py
## Comparaison asymptotique
@ -73,13 +73,13 @@ de temps mais peut-être une opération de plus.
## O(log n)
```python
#!sed -n '/def index/,/raise ValueError/p' include/find_in_list.py
#!sed -n '/def index/,/raise ValueError/p' examples/find_in_list.py
```
## O(n)
```python
#!sed -n '/def dumb_index/,/raise ValueError/p' include/find_in_list.py
#!sed -n '/def dumb_index/,/raise ValueError/p' examples/find_in_list.py
```
## O(n log n)
@ -92,7 +92,7 @@ Typique d'algorithmes de tris.
## Les mesures de complexité
- De temps (CPU consommé).
- D'espace (Mémoire consommée).
- D'espace (mémoire consommée).
- Dans le meilleur des cas.
- Dans le pire des cas.
- Dans le cas moyen.
@ -130,15 +130,9 @@ Mais retenir par cœur la complexité de quelques structures
## Le cas typique
```shell
$ python -m pyperf timeit \
> --setup 'container = list(range(10_000_000))' \
> '10_000_001 in container'
#!cache -- python -m pyperf timeit --fast -s 'container = list(range(10_000_000))' '10_000_001 in container'
#!cache python -m pyperf timeit --setup 'container = list(range(10_000_000))' '10_000_001 in container'
$ python -m pyperf timeit \
> --setup 'container = set(range(10_000_000))' \
> '10_000_001 in container'
#!cache -- python -m pyperf timeit --fast -s 'container = set(range(10_000_000))' '10_000_001 in container'
#!cache python -m pyperf timeit --setup 'container = set(range(10_000_000))' '10_000_001 in container'
```
Pourquoi une si grande différence !?
@ -149,6 +143,19 @@ notes:
C'est l'heure du live coding !
# À vous !
Simulons un tas de sable.
Ne vous souciez pas des perfs, on s'en occupera.
Vous avez 10mn.
notes:
voir sandpile.py
# Les outils
## Les outils en ligne de commande
@ -156,8 +163,7 @@ C'est l'heure du live coding !
`time`, un outil POSIX, mais aussi une fonction native de bash :
```shell
$ time python -c 'container = set(range(10_000_000))'
#!cache -- time -p python -c 'container = set(range(10_000_000))'
#!cache time -p python examples/sandpile.py 16
```
Mais `time` ne teste qu'une fois.
@ -173,10 +179,8 @@ sys 0m0.195s # Temps CPU passé dans le kernel
`hyperfine` teste plusieurs fois :
```text
$ hyperfine "python -c 'container = set(range(10_000_000))'"
Benchmark 1: python -c 'container = set(range(10_000_000))'
Time (mean ± σ): 735.6 ms ± 11.2 ms
```shell
#!cache hyperfine "python examples/sandpile.py 1000"
```
@ -184,10 +188,8 @@ Benchmark 1: python -c 'container = set(range(10_000_000))'
Mais attention, démarrer un processus Python n'est pas gratuit :
```python
$ hyperfine "python -c pass"
Benchmark 1: python -c pass
Time (mean ± σ): 19.4 ms ± 0.6 ms
```shell
#!cache hyperfine "python -c pass"
```
notes:
@ -199,7 +201,7 @@ N'essayez pas de retenir les chiffres, retenez les faits.
Et puis il peut dépendre de la version de Python, des options de compilation, ... :
```text
```shell
$ hyperfine "~/.local/bin/python3.10 -c pass" # Avec pydebug
Benchmark 1: ~/.local/bin/python3.10 -c pass
Time (mean ± σ): 37.6 ms ± 0.6 ms
@ -218,19 +220,19 @@ Leur parler de `--enable-optimizations` (PGO).
Timeit c'est dans la stdlib de Python, ça s'utilise en ligne de commande ou depuis Python.
```shell
#!cache python -m timeit -s 'import sandpile' 'sandpile.main(1000, show=False)'
```
Mais il ne vous donne que le meilleur temps, ce qui n'est pas toujours représentatif.
## pyperf
C'est l'équivalent d'hyperfine mais exécutant du Python plutôt qu'un programme :
Il n'est pas dans la stdlib, mais il replace avantageusement `timeit` :
```shell
$ ~/.local/bin/python3.10 -m pyperf timeit pass
.....................
Mean +- std dev: 7.33 ns +- 0.18 ns
$ /usr/bin/python3.10 -m pyperf timeit pass
.....................
Mean +- std dev: 6.10 ns +- 0.11 ns
#!cache python -m pyperf timeit --setup 'import sandpile' 'sandpile.main(1000, show=False)'
```
notes:
@ -240,17 +242,39 @@ démarrer** puis d'exécuter `pass`, ici on teste combien de temps ça
prend d'exécuter `pass`.
# cProfile
## pyperf
time, timeit, hyperfine, pyperf c'est bien pour mesurer, comparer.
Digression, `pyperf` peut aussi remplacer `hyperfine` :
cProfile nous aider à trouver la fonction coupable.
```shell
#!cache python -m pyperf command python examples/sandpile.py 1000
```
notes:
Avec hyperfine on teste combien de temps ça prend à Python **de
démarrer** puis d'exécuter `pass`, ici on teste combien de temps ça
prend d'exécuter `pass`.
## Les outils — À vous !
Effectuez quelques mesures sur votre implémentation.
Tentez d'en déterminer la complexité en fonction du nombre de grains.
# Profilage
`time`, `timeit`, `hyperfine`, `pyperf` c'est bien pour mesurer, comparer.
`cProfile` peut nous aider à trouver la fonction coupable.
## cProfile, exemple
```python
#!sed -n '/def fib/,/return approx/p' include/phi1.py
#!sed -n '/def fib/,/return approx/p' examples/phi1.py
```
@ -259,12 +283,11 @@ cProfile nous aider à trouver la fonction coupable.
Testons :
```python
#!sed -n '/import sys/,$p' include/phi1.py
#!sed -n '/import sys/,$p' examples/phi1.py
```
```text
$ time python phi1.py 10
#!cache -- time -p python include/phi1.py 10
#!cache time -p python examples/phi1.py 10
```
@ -275,10 +298,10 @@ C'est déjà lent, et pour `20` c'est interminable...
Sortons cProfile :
```text
```shell
$ python -m cProfile --sort cumulative phi1.py 10
...
#!cache -- python -m cProfile --sort cumulative include/phi1.py 10 | sed -n '/fib\|function calls/{s/ \+/ /g;s/^ *//;p}'
#!cache python -m cProfile --sort cumulative examples/phi1.py 10 | sed -n '/fib\|function calls/{s/ \+/ /g;s/^ *//;p}'
...
```
@ -291,16 +314,18 @@ C'est donc `fib` la coupable :
Cachons les résultats de `fib` :
```python
#!sed -n '/import cache/,/return fib/p' include/phi2.py
#!sed -n '/import cache/,/return fib/p' examples/phi2.py
```
## cProfile, exemple
Et on repasse dans cProfile !
```text
```shell
$ python -m cProfile --sort cumulative phi2.py 10
#!cache -- python -m cProfile --sort cumulative include/phi2.py 10 | sed -n '/fib\|function calls/{s/ \+/ /g;s/^ *//;p}'
...
#!cache python -m cProfile --sort cumulative examples/phi2.py 10 | sed -n '/fib\|function calls/{s/ \+/ /g;s/^ *//;p}'
...
```
C'est mieux !
@ -309,9 +334,8 @@ C'est mieux !
On essaye d'aller plus loin ?
```text
$ python -m cProfile --sort cumulative phi2.py 2000
#!cache -- python -m cProfile --sort cumulative include/phi2.py 2000 | head -n 2 | sed 's/^ *//g;s/seconds/s/g'
```shell
#!cache python -m cProfile --sort cumulative examples/phi2.py 2000 | head -n 3 | sed 's/^ *//g;s/seconds/s/g'
```
Ça tient, mais peut-on faire mieux ?
@ -323,22 +347,22 @@ Divisons par 10 le nombre d'appels, on réduira mécaniquement par 10 le
temps d'exécution ?
```python
#!sed -n '/def approx_phi_up_to/,/return step1/p' include/phi3.py
#!sed -n '/def approx_phi_up_to/,/return step1/p' examples/phi3.py
```
## cProfile, exemple
```text
$ python -m cProfile --sort cumulative phi3.py 2000
#!cache -- python -m cProfile --sort cumulative include/phi3.py 2000 | head -n 2 | sed 's/^ *//g;s/seconds/s/g'
```shell
#!cache python -m cProfile --sort cumulative examples/phi3.py 2000 | head -n 3 | sed 's/^ *//g;s/seconds/s/g'
```
## cProfile, exemple
En cachant `approx_phi` ?
```python
#!sed -n '10,/return step1/p' include/phi4.py
#!sed -n '10,/return step1/p' examples/phi4.py
```
notes:
@ -346,10 +370,11 @@ notes:
Notez l'astuce pour que le `step2` d'un
tour soit le `step1` du suivant...
## cProfile, exemple
```
$ python -m cProfile --sort cumulative phi4.py 2000
```shell
$ python -m cProfile --sort cumulative examples/phi4.py 2000
```
`RecursionError` !? En effet, en avançant par si grands pas, le cache
@ -363,15 +388,14 @@ Il est temps de sortir une implémentation de `fib` plus robuste, basée
sur l'algorithme « matrix exponentiation » :
```python
#!sed -n '/def fib/,/return fib/p' include/phi5.py
#!sed -n '/def fib/,/return fib/p' examples/phi5.py
```
## cProfile, exemple
```text
$ python -m cProfile --sort cumulative phi5.py 2000
#!cache -- python -m cProfile --sort cumulative include/phi5.py 2000 | head -n 2 | sed 's/^ *//g;s/seconds/s/g'
#!cache python -m cProfile --sort cumulative examples/phi5.py 2000 | head -n 3 | sed 's/^ *//g;s/seconds/s/g'
```
notes:
@ -381,14 +405,15 @@ Mieux.
## Snakeviz
```text
python -m pip install snakeviz
python -m cProfile -o phi5.prof phi5.py 2000
#!if [ ! -f .cache/phi5.prof ]; then python -m cProfile -o .cache/phi5.prof include/phi5.py 2000 >/dev/null 2>&1; fi
python -m snakeviz phi5.prof
#!if [ ! -f output/phi5-snakeviz.png ]; then python -m snakeviz -s .cache/phi5.prof & TOKILL=$!; sleep 1; cutycapt --min-width=1024 --delay=500 --url=http://127.0.0.1:8080/snakeviz/%2Ftmp%2Fphi5.prof --out=output/phi5-snakeviz.png ; kill $TOKILL; fi
```shell
$ python -m pip install snakeviz
$ python -m cProfile -o phi5.prof phi5.py 2000
$ python -m snakeviz phi5.prof
```
#!if [ ! -f .cache/phi5.prof ]; then python -m cProfile -o .cache/phi5.prof examples/phi5.py 2000 >/dev/null 2>&1; fi
#!if [ ! -f output/phi5-snakeviz.png ]; then python -m snakeviz -s .cache/phi5.prof & TOKILL=$!; sleep 1; cutycapt --min-width=1024 --delay=500 --url=http://127.0.0.1:8080/snakeviz/%2Ftmp%2Fphi5.prof --out=output/phi5-snakeviz.png ; kill $TOKILL; fi
## Snakeviz
@ -400,56 +425,64 @@ python -m snakeviz phi5.prof
```shell
$ python -m pip install scalene
$ scalene phi5.py 100000
#!if [ ! -f output/phi5.html ]; then ( cd include; scalene phi5.py 100000 --html --outfile ../output/phi5.html --cli >&2 ); fi
#!if [ ! -f output/phi5-scalene.png ]; then cutycapt --min-width=1024 --delay=100 --url=file://$(pwd)/output/phi5.html --out=output/phi5-scalene.png; fi
```
#!if [ ! -f output/phi5.html ]; then ( cd examples; scalene phi5.py 100000 --html --outfile ../output/phi5.html --cli >&2 ); fi
#!if [ ! -f output/phi5-scalene.png ]; then cutycapt --min-width=1024 --delay=100 --url=file://$(pwd)/output/phi5.html --out=output/phi5-scalene.png; fi
## Scalene
![](phi5-scalene.png)
## Atelier
## Aussi
Générateur de prénoms français.
Notes: voir includes/prenom-*.py
- https://github.com/gaogaotiantian/viztracer
- https://github.com/joerick/pyinstrument
- https://github.com/benfred/py-spy
- https://github.com/pyutils/line_profiler
- https://github.com/sumerc/yappi
- https://github.com/vmprof/vmprof-python
- https://github.com/bloomberg/memray
## TODO
- vprof
- https://pypi.org/project/pp3yflame/
## Profilage — À vous !
Profilez votre implémentation et tentez quelques améliorations.
# Cython
Cython est un dialecte de Python transpilable en C.
## Cython démo
```python
#!cat include/collatz_length.py
#!cat examples/collatz_length.py
```
## Cython démo
```shell
$ python -m pyperf timeit \
> -s 'from collatz_length import collatz_length'
> 'collatz_length(837799)'
#!cache -- python -m pyperf timeit --fast --setup 'from include.collatz_length import collatz_length' 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast --setup 'from examples.collatz_length import collatz_length' 'collatz_length(837799)'
```
```shell
$ cythonize --inplace collatz_length_cython.py
#!if ! [ -f include/collatz_length_cython.*.so ] ; then cythonize --inplace include/collatz_length_cython.py; fi
#!if ! [ -f examples/collatz_length_cython.*.so ] ; then cythonize --inplace examples/collatz_length_cython.py; fi
```
```shell
$ python -m pyperf timeit \
> -s 'from collatz_length import collatz_length'
> 'collatz_length(837799)'
#!cache -- python -m pyperf timeit --fast -s 'from include.collatz_length import collatz_length' 'collatz_length(837799)' # faster
#!cache python -m pyperf timeit --fast -s 'from examples.collatz_length import collatz_length' 'collatz_length(837799)' # faster
#!# Beware, the cythonized use `-s` while the non cythonized uses `--setup` just to have two cache buckets :D
```
@ -458,8 +491,8 @@ $ python -m pyperf timeit \
```shell
$ cython -a collatz_length.py
#!if ! [ -f include/collatz_length.html ] ; then cython -a include/collatz_length.py; fi
#!if ! [ -f output/collatz_length.png ] ; then cutycapt --min-width=1024 --delay=500 --url=file://$(pwd)/include/collatz_length.html --out=output/collatz_length.png; fi
#!if ! [ -f examples/collatz_length.html ] ; then cython -a examples/collatz_length.py; fi
#!if ! [ -f output/collatz_length.png ] ; then cutycapt --min-width=1024 --delay=500 --url=file://$(pwd)/examples/collatz_length.html --out=output/collatz_length.png; fi
```
![](collatz_length.png)
@ -468,96 +501,106 @@ $ cython -a collatz_length.py
## Cython annotated
```python
#!cat include/collatz_length_annotated.py
#!cat examples/collatz_length_annotated.py
```
```shell
$ cythonize --inplace collatz_length_annotated.py
```
#!if ! [ -f include/collatz_length_annotated.*.so ] ; then cythonize --inplace include/collatz_length_annotated.py; fi
#!if ! [ -f examples/collatz_length_annotated.*.so ] ; then cythonize --inplace examples/collatz_length_annotated.py; fi
```shell
$ python -m pyperf timeit \
> -s 'from collatz_length_annotated import collatz_length'
> 'collatz_length(837799)'
#!cache -- python -m pyperf timeit --fast -s 'from include.collatz_length_annotated import collatz_length' 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast -s 'from examples.collatz_length_annotated import collatz_length' 'collatz_length(837799)'
```
## Cython annotate again
```shell
$ cython -a include/collatz_length_annotated.py
#!if ! [ -f include/collatz_length_annotated.html ] ; then cython -a include/collatz_length_annotated.py; fi
#!if ! [ -f output/collatz_length_annotated.png ] ; then cutycapt --min-width=1024 --delay=500 --url=file://$(pwd)/include/collatz_length_annotated.html --out=output/collatz_length_annotated.png; fi
$ cython -a examples/collatz_length_annotated.py
#!if ! [ -f examples/collatz_length_annotated.html ] ; then cython -a examples/collatz_length_annotated.py; fi
#!if ! [ -f output/collatz_length_annotated.png ] ; then cutycapt --min-width=1024 --delay=500 --url=file://$(pwd)/examples/collatz_length_annotated.html --out=output/collatz_length_annotated.png; fi
```
![](collatz_length_annotated.png)
## Cython — À vous !
# Numba
Numba est un `JIT` : « Just In Time compiler ».
```python
#!cat include/collatz_length_numba.py
#!cat examples/collatz_length_numba.py
```
## Numba démo
```shell
$ python -m pyperf timeit \
> -s 'from include.collatz_length_numba import collatz_length' \
> -s 'from examples.collatz_length_numba import collatz_length' \
> 'collatz_length(837799)'
#!cache -- python -m pyperf timeit --fast -s 'from include.collatz_length_numba import collatz_length' 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast -s 'from examples.collatz_length_numba import collatz_length' 'collatz_length(837799)'
```
## numba — À vous !
# mypyc
mypyc est un compilateur qui s'appuie sur les annotationes de type mypy :
```python
#!cat include/collatz_length_mypy.py
#!cat examples/collatz_length_mypy.py
```
## mypyc demo
```shell
$ mypyc include/collatz_length_mypy.py
#!if ! [ -f collatz_length_mypy.*.so ] ; then mypyc include/collatz_length_mypy.py; fi
$ mypyc examples/collatz_length_mypy.py
#!if ! [ -f collatz_length_mypy.*.so ] ; then mypyc examples/collatz_length_mypy.py; fi
```
```shell
$ python -m pyperf timeit \
> -s 'from collatz_length_mypy import collatz_length' \
> 'collatz_length(837799)'
#!cache -- python -m pyperf timeit --fast -s 'from collatz_length_mypy import collatz_length' 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast -s 'from collatz_length_mypy import collatz_length' 'collatz_length(837799)'
```
## mypyc — À vous !
# Pythran
pythran est un compilateur pour du code scientifique :
```python
#!cat include/collatz_length_pythran.py
#!cat examples/collatz_length_pythran.py
```
## Pythran demo
```shell
$ pythran include/collatz_length_pythran.py
#!if ! [ -f collatz_length_pythran.*.so ]; then pythran include/collatz_length_pythran.py; fi
$ pythran examples/collatz_length_pythran.py
#!if ! [ -f collatz_length_pythran.*.so ]; then pythran examples/collatz_length_pythran.py; fi
```
```shell
$ python -m pyperf timeit \
> -s 'from collatz_length_pythran import collatz_length' \
> 'collatz_length(837799)'
#!cache -- python -m pyperf timeit --fast -s 'from collatz_length_pythran import collatz_length' 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast -s 'from collatz_length_pythran import collatz_length' 'collatz_length(837799)'
```
## pythran — À vous !
# Nuitka
@ -565,44 +608,46 @@ Aussi un compilateur, aussi utilisable pour distribuer une application.
```shell
$ python -m nuitka --module collatz_length_nuitka.py
#!if ! [ -f collatz_length_nuitka.*.so ]; then python -m nuitka --module include/collatz_length_nuitka.py >/dev/null; fi
#!if ! [ -f collatz_length_nuitka.*.so ]; then python -m nuitka --module examples/collatz_length_nuitka.py >/dev/null; fi
```
```shell
$ python -m pyperf timeit \
> -s 'from collatz_length_nuitka import collatz_length' \
> 'collatz_length(837799)'
#!cache -- python -m pyperf timeit --fast -s 'from collatz_length_nuitka import collatz_length' 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast -s 'from collatz_length_nuitka import collatz_length' 'collatz_length(837799)'
```
# Et d'autres
# Et d'autres
https://github.com/pfalcon/awesome-python-compilers
# Hand crafted C
```c
#!sed -n '/int collatz_length/,/^$/p' include/my_collatz_length.c
#!sed -n '/int collatz_length/,/^$/p' examples/my_collatz_length.c
```
Mais comment l'utiliser ?
## Hand crafted C
### Avec Cython
```cpython
#!cat include/collatz_length_cython_to_c.pyx
#!cat examples/collatz_length_cython_to_c.pyx
```
```shell
$ cythonize -i include/collatz_length_cython_to_c.pyx
#!if ! [ -f include/collatz_length_cython_to_c.*.so ] ; then cythonize -i include/collatz_length_cython_to_c.pyx; fi
$ cythonize -i examples/collatz_length_cython_to_c.pyx
#!if ! [ -f examples/collatz_length_cython_to_c.*.so ] ; then cythonize -i examples/collatz_length_cython_to_c.pyx; fi
```
```shell
$ python -m pyperf timeit \
> -s 'from collatz_length_cython_to_c import collatz_length' \
> 'collatz_length(837799)'
#!cache -- python -m pyperf timeit --fast -s 'from include.collatz_length_cython_to_c import collatz_length' 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast -s 'from examples.collatz_length_cython_to_c import collatz_length' 'collatz_length(837799)'
```