Working on python-perfs.
This commit is contained in:
parent
8d923687e2
commit
188e4bd870
|
@ -1,17 +1,37 @@
|
|||
#!/bin/sh
|
||||
#!/usr/bin/env python
|
||||
|
||||
ARGS="$*"
|
||||
import sys
|
||||
import shlex
|
||||
from subprocess import run, STDOUT
|
||||
from time import perf_counter
|
||||
from textwrap import wrap
|
||||
|
||||
args="--ttl 1y --cache-dir .cache"
|
||||
args = sys.argv[1:]
|
||||
|
||||
while [ "$1" != "--" ]
|
||||
do
|
||||
args="$args $1"
|
||||
shift
|
||||
done
|
||||
bkt_args = []
|
||||
cmd_args = args
|
||||
|
||||
before="$(date +"%s.%N")"
|
||||
bkt $args "$@" 2>&1
|
||||
after="$(date +"%s.%N")"
|
||||
if '--' in args:
|
||||
at = args.index('--')
|
||||
bkt_args, cmd_args = args[:at], args[at+1:]
|
||||
|
||||
printf "%s: %.2fs\n\n" "$ARGS" "$(echo "$after - $before"|bc)" >&2
|
||||
bkt_args = ["--ttl", "1y", "--cache-dir", ".cache"] + bkt_args
|
||||
|
||||
def wrap_bash_command(cmd):
|
||||
"""Given args as a list of string, wraps
|
||||
the args to look like a bash cmd."""
|
||||
|
||||
cmd = [arg.replace(" ", " ") for arg in cmd]
|
||||
lines = wrap(shlex.join(cmd), width=60)
|
||||
lines = [line.replace(" ", " ") for line in lines]
|
||||
return " \\\n> ".join(lines)
|
||||
|
||||
|
||||
|
||||
before = perf_counter()
|
||||
print(f"$ {shlex.join(cmd_args)}", file=sys.stderr)
|
||||
print(f"$ {wrap_bash_command(cmd_args)}", flush=True)
|
||||
run(["bkt"] + bkt_args + ["--"] + cmd_args, stderr=STDOUT)
|
||||
after = perf_counter()
|
||||
|
||||
print(f"$ {shlex.join(cmd_args)} # took {after-before:.2f}s\n", file=sys.stderr)
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
cdef extern from "my_collatz_length.c":
|
||||
cpdef int collatz_length(long n)
|
|
@ -0,0 +1,7 @@
|
|||
def collatz_length(n):
|
||||
if n == 1:
|
||||
return 0
|
||||
if n % 2 == 0:
|
||||
return 1 + collatz_length(n // 2)
|
||||
else:
|
||||
return 1 + collatz_length(n * 3 + 1)
|
|
@ -0,0 +1,705 @@
|
|||
{
|
||||
"alloc_samples": 0,
|
||||
"elapsed_time_sec": 35.077271699905396,
|
||||
"files": {
|
||||
"phi5.py": {
|
||||
"functions": [
|
||||
{
|
||||
"line": "fib",
|
||||
"lineno": 5,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 4.2576382717302454,
|
||||
"n_core_utilization": 0.062425365799925484,
|
||||
"n_cpu_percent_c": 0.0034262080640758584,
|
||||
"n_cpu_percent_python": 0.7126821564209772,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0009376888160889245,
|
||||
"n_usage_fraction": 0
|
||||
},
|
||||
{
|
||||
"line": "approx_phi",
|
||||
"lineno": 12,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.6998240221201137,
|
||||
"n_core_utilization": 0.06237139811505196,
|
||||
"n_cpu_percent_c": 96.09209325974999,
|
||||
"n_cpu_percent_python": 0.3418254092394039,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.20029089301487063,
|
||||
"n_usage_fraction": 0
|
||||
},
|
||||
{
|
||||
"line": "approx_phi_up_to",
|
||||
"lineno": 16,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.06219424868052924,
|
||||
"n_cpu_percent_c": 2.351736421918283,
|
||||
"n_cpu_percent_python": 0.2556447055821568,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.012818642838908139,
|
||||
"n_usage_fraction": 0
|
||||
}
|
||||
],
|
||||
"imports": [
|
||||
"from decimal import Decimal, localcontext",
|
||||
"from itertools import count",
|
||||
"from functools import cache",
|
||||
"import sys"
|
||||
],
|
||||
"leaks": {},
|
||||
"lines": [
|
||||
{
|
||||
"end_region_line": 1,
|
||||
"line": "from decimal import Decimal, localcontext\n",
|
||||
"lineno": 1,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 1
|
||||
},
|
||||
{
|
||||
"end_region_line": 2,
|
||||
"line": "from itertools import count\n",
|
||||
"lineno": 2,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 2
|
||||
},
|
||||
{
|
||||
"end_region_line": 3,
|
||||
"line": "from functools import cache\n",
|
||||
"lineno": 3,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 3
|
||||
},
|
||||
{
|
||||
"end_region_line": 4,
|
||||
"line": "\n",
|
||||
"lineno": 4,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 4
|
||||
},
|
||||
{
|
||||
"end_region_line": 5,
|
||||
"line": "@cache\n",
|
||||
"lineno": 5,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 5
|
||||
},
|
||||
{
|
||||
"end_region_line": 10,
|
||||
"line": "def fib(n):\n",
|
||||
"lineno": 6,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 6
|
||||
},
|
||||
{
|
||||
"end_region_line": 10,
|
||||
"line": " if n in (0, 1):\n",
|
||||
"lineno": 7,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 6
|
||||
},
|
||||
{
|
||||
"end_region_line": 10,
|
||||
"line": " return 1\n",
|
||||
"lineno": 8,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 6
|
||||
},
|
||||
{
|
||||
"end_region_line": 10,
|
||||
"line": " x = n // 2\n",
|
||||
"lineno": 9,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 6
|
||||
},
|
||||
{
|
||||
"end_region_line": 10,
|
||||
"line": " return fib(x - 1) * fib(n - x - 1) + fib(x) * fib(n - x)\n",
|
||||
"lineno": 10,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 4.2576382717302454,
|
||||
"n_core_utilization": 0.062425365799925484,
|
||||
"n_cpu_percent_c": 0.0034262080640758584,
|
||||
"n_cpu_percent_python": 0.7126821564209772,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0009376888160889245,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 6
|
||||
},
|
||||
{
|
||||
"end_region_line": 11,
|
||||
"line": "\n",
|
||||
"lineno": 11,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 11
|
||||
},
|
||||
{
|
||||
"end_region_line": 12,
|
||||
"line": "@cache\n",
|
||||
"lineno": 12,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 12
|
||||
},
|
||||
{
|
||||
"end_region_line": 14,
|
||||
"line": "def approx_phi(n):\n",
|
||||
"lineno": 13,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 13
|
||||
},
|
||||
{
|
||||
"end_region_line": 14,
|
||||
"line": " return Decimal(fib(n + 1)) / Decimal(fib(n))\n",
|
||||
"lineno": 14,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.6998240221201137,
|
||||
"n_core_utilization": 0.06237139811505196,
|
||||
"n_cpu_percent_c": 96.09209325974997,
|
||||
"n_cpu_percent_python": 0.3418254092394039,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.20029089301488137,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 13
|
||||
},
|
||||
{
|
||||
"end_region_line": 15,
|
||||
"line": "\n",
|
||||
"lineno": 15,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 15
|
||||
},
|
||||
{
|
||||
"end_region_line": 23,
|
||||
"line": "def approx_phi_up_to(n_digits):\n",
|
||||
"lineno": 16,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 16
|
||||
},
|
||||
{
|
||||
"end_region_line": 23,
|
||||
"line": " with localcontext() as ctx:\n",
|
||||
"lineno": 17,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 16
|
||||
},
|
||||
{
|
||||
"end_region_line": 23,
|
||||
"line": " ctx.prec = n_digits + 1\n",
|
||||
"lineno": 18,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 16
|
||||
},
|
||||
{
|
||||
"end_region_line": 23,
|
||||
"line": " for n in count():\n",
|
||||
"lineno": 19,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 19
|
||||
},
|
||||
{
|
||||
"end_region_line": 23,
|
||||
"line": " step1 = approx_phi(2 ** n)\n",
|
||||
"lineno": 20,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 19
|
||||
},
|
||||
{
|
||||
"end_region_line": 23,
|
||||
"line": " step2 = approx_phi(2 ** (n+1))\n",
|
||||
"lineno": 21,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.06219424868052924,
|
||||
"n_cpu_percent_c": 2.351736421918283,
|
||||
"n_cpu_percent_python": 0.2556447055821568,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.012818642838908139,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 19
|
||||
},
|
||||
{
|
||||
"end_region_line": 23,
|
||||
"line": " if step1 == step2:\n",
|
||||
"lineno": 22,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 19
|
||||
},
|
||||
{
|
||||
"end_region_line": 23,
|
||||
"line": " return step1\n",
|
||||
"lineno": 23,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 19
|
||||
},
|
||||
{
|
||||
"end_region_line": 24,
|
||||
"line": "\n",
|
||||
"lineno": 24,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 24
|
||||
},
|
||||
{
|
||||
"end_region_line": 25,
|
||||
"line": "import sys\n",
|
||||
"lineno": 25,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 25
|
||||
},
|
||||
{
|
||||
"end_region_line": 26,
|
||||
"line": "\n",
|
||||
"lineno": 26,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 26
|
||||
},
|
||||
{
|
||||
"end_region_line": 27,
|
||||
"line": "if __name__ == \"__main__\":\n",
|
||||
"lineno": 27,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.0,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.0,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 27
|
||||
},
|
||||
{
|
||||
"end_region_line": 28,
|
||||
"line": " print(approx_phi_up_to(int(sys.argv[1])))\n",
|
||||
"lineno": 28,
|
||||
"memory_samples": [],
|
||||
"n_avg_mb": 0.0,
|
||||
"n_copy_mb_s": 0.0,
|
||||
"n_core_utilization": 0.005697373432005744,
|
||||
"n_cpu_percent_c": 0.0,
|
||||
"n_cpu_percent_python": 0.0026020692392709254,
|
||||
"n_gpu_avg_memory_mb": 0.0,
|
||||
"n_gpu_peak_memory_mb": 0.0,
|
||||
"n_gpu_percent": 0,
|
||||
"n_growth_mb": 0.0,
|
||||
"n_malloc_mb": 0.0,
|
||||
"n_mallocs": 0,
|
||||
"n_peak_mb": 0.0,
|
||||
"n_python_fraction": 0,
|
||||
"n_sys_percent": 0.025942545115975865,
|
||||
"n_usage_fraction": 0,
|
||||
"start_region_line": 28
|
||||
}
|
||||
],
|
||||
"percent_cpu_time": 100.0
|
||||
}
|
||||
},
|
||||
"gpu": false,
|
||||
"growth_rate": 0.0,
|
||||
"max_footprint_fname": null,
|
||||
"max_footprint_lineno": null,
|
||||
"max_footprint_mb": 0,
|
||||
"memory": true,
|
||||
"program": "phi5.py",
|
||||
"samples": [],
|
||||
"stacks": []
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
import sys
|
||||
|
||||
def should_apply_gravity(terrain):
|
||||
width = len(terrain)
|
||||
return any(terrain[x][y] >= 4 for x in range(width) for y in range(width))
|
||||
|
||||
|
||||
def show_terrain(terrain):
|
||||
width = len(terrain)
|
||||
for x in range(width):
|
||||
for y in range(width):
|
||||
print(" ·●⬤"[terrain[x][y]], end="")
|
||||
print()
|
||||
|
||||
|
||||
def apply_gravity(terrain):
|
||||
width = len(terrain)
|
||||
for x in range(width):
|
||||
for y in range(width):
|
||||
if terrain[x][y] >= 4:
|
||||
terrain[x][y] -= 4
|
||||
terrain[x - 1][y] += 1
|
||||
terrain[x + 1][y] += 1
|
||||
terrain[x][y + 1] += 1
|
||||
terrain[x][y - 1] += 1
|
||||
|
||||
|
||||
def main(height, show=True):
|
||||
width = int(height ** .5) + 1
|
||||
terrain = [[0] * width for _ in range(width)]
|
||||
terrain[width // 2][width // 2] = height
|
||||
while should_apply_gravity(terrain):
|
||||
apply_gravity(terrain)
|
||||
if show:
|
||||
show_terrain(terrain)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(int(sys.argv[1]))
|
|
@ -1,4 +1,4 @@
|
|||
# Les Performances en Python
|
||||
# Les performances en Python
|
||||
|
||||
par
|
||||
|
||||
|
@ -31,7 +31,7 @@ O(n!) Factorielle
|
|||
|
||||
notes:
|
||||
|
||||
Il faut les grapher pour s'en rendre compte : cf. include/big.o.py
|
||||
Il faut les grapher pour s'en rendre compte : cf. examples/big.o.py
|
||||
|
||||
|
||||
## Comparaison asymptotique
|
||||
|
@ -73,13 +73,13 @@ de temps mais peut-être une opération de plus.
|
|||
## O(log n)
|
||||
|
||||
```python
|
||||
#!sed -n '/def index/,/raise ValueError/p' include/find_in_list.py
|
||||
#!sed -n '/def index/,/raise ValueError/p' examples/find_in_list.py
|
||||
```
|
||||
|
||||
## O(n)
|
||||
|
||||
```python
|
||||
#!sed -n '/def dumb_index/,/raise ValueError/p' include/find_in_list.py
|
||||
#!sed -n '/def dumb_index/,/raise ValueError/p' examples/find_in_list.py
|
||||
```
|
||||
|
||||
## O(n log n)
|
||||
|
@ -92,7 +92,7 @@ Typique d'algorithmes de tris.
|
|||
## Les mesures de complexité
|
||||
|
||||
- De temps (CPU consommé).
|
||||
- D'espace (Mémoire consommée).
|
||||
- D'espace (mémoire consommée).
|
||||
- Dans le meilleur des cas.
|
||||
- Dans le pire des cas.
|
||||
- Dans le cas moyen.
|
||||
|
@ -130,15 +130,9 @@ Mais retenir par cœur la complexité de quelques structures
|
|||
## Le cas typique
|
||||
|
||||
```shell
|
||||
$ python -m pyperf timeit \
|
||||
> --setup 'container = list(range(10_000_000))' \
|
||||
> '10_000_001 in container'
|
||||
#!cache -- python -m pyperf timeit --fast -s 'container = list(range(10_000_000))' '10_000_001 in container'
|
||||
#!cache python -m pyperf timeit --setup 'container = list(range(10_000_000))' '10_000_001 in container'
|
||||
|
||||
$ python -m pyperf timeit \
|
||||
> --setup 'container = set(range(10_000_000))' \
|
||||
> '10_000_001 in container'
|
||||
#!cache -- python -m pyperf timeit --fast -s 'container = set(range(10_000_000))' '10_000_001 in container'
|
||||
#!cache python -m pyperf timeit --setup 'container = set(range(10_000_000))' '10_000_001 in container'
|
||||
```
|
||||
|
||||
Pourquoi une si grande différence !?
|
||||
|
@ -149,6 +143,19 @@ notes:
|
|||
C'est l'heure du live coding !
|
||||
|
||||
|
||||
# À vous !
|
||||
|
||||
Simulons un tas de sable.
|
||||
|
||||
Ne vous souciez pas des perfs, on s'en occupera.
|
||||
|
||||
Vous avez 10mn.
|
||||
|
||||
notes:
|
||||
|
||||
voir sandpile.py
|
||||
|
||||
|
||||
# Les outils
|
||||
|
||||
## Les outils en ligne de commande
|
||||
|
@ -156,8 +163,7 @@ C'est l'heure du live coding !
|
|||
`time`, un outil POSIX, mais aussi une fonction native de bash :
|
||||
|
||||
```shell
|
||||
$ time python -c 'container = set(range(10_000_000))'
|
||||
#!cache -- time -p python -c 'container = set(range(10_000_000))'
|
||||
#!cache time -p python examples/sandpile.py 16
|
||||
```
|
||||
|
||||
Mais `time` ne teste qu'une fois.
|
||||
|
@ -173,10 +179,8 @@ sys 0m0.195s # Temps CPU passé dans le kernel
|
|||
|
||||
`hyperfine` teste plusieurs fois :
|
||||
|
||||
```text
|
||||
$ hyperfine "python -c 'container = set(range(10_000_000))'"
|
||||
Benchmark 1: python -c 'container = set(range(10_000_000))'
|
||||
Time (mean ± σ): 735.6 ms ± 11.2 ms
|
||||
```shell
|
||||
#!cache hyperfine "python examples/sandpile.py 1000"
|
||||
```
|
||||
|
||||
|
||||
|
@ -184,10 +188,8 @@ Benchmark 1: python -c 'container = set(range(10_000_000))'
|
|||
|
||||
Mais attention, démarrer un processus Python n'est pas gratuit :
|
||||
|
||||
```python
|
||||
$ hyperfine "python -c pass"
|
||||
Benchmark 1: python -c pass
|
||||
Time (mean ± σ): 19.4 ms ± 0.6 ms
|
||||
```shell
|
||||
#!cache hyperfine "python -c pass"
|
||||
```
|
||||
|
||||
notes:
|
||||
|
@ -199,7 +201,7 @@ N'essayez pas de retenir les chiffres, retenez les faits.
|
|||
|
||||
Et puis il peut dépendre de la version de Python, des options de compilation, ... :
|
||||
|
||||
```text
|
||||
```shell
|
||||
$ hyperfine "~/.local/bin/python3.10 -c pass" # Avec pydebug
|
||||
Benchmark 1: ~/.local/bin/python3.10 -c pass
|
||||
Time (mean ± σ): 37.6 ms ± 0.6 ms
|
||||
|
@ -218,19 +220,19 @@ Leur parler de `--enable-optimizations` (PGO).
|
|||
|
||||
Timeit c'est dans la stdlib de Python, ça s'utilise en ligne de commande ou depuis Python.
|
||||
|
||||
```shell
|
||||
#!cache python -m timeit -s 'import sandpile' 'sandpile.main(1000, show=False)'
|
||||
```
|
||||
|
||||
Mais il ne vous donne que le meilleur temps, ce qui n'est pas toujours représentatif.
|
||||
|
||||
|
||||
## pyperf
|
||||
|
||||
C'est l'équivalent d'hyperfine mais exécutant du Python plutôt qu'un programme :
|
||||
Il n'est pas dans la stdlib, mais il replace avantageusement `timeit` :
|
||||
|
||||
```shell
|
||||
$ ~/.local/bin/python3.10 -m pyperf timeit pass
|
||||
.....................
|
||||
Mean +- std dev: 7.33 ns +- 0.18 ns
|
||||
|
||||
$ /usr/bin/python3.10 -m pyperf timeit pass
|
||||
.....................
|
||||
Mean +- std dev: 6.10 ns +- 0.11 ns
|
||||
#!cache python -m pyperf timeit --setup 'import sandpile' 'sandpile.main(1000, show=False)'
|
||||
```
|
||||
|
||||
notes:
|
||||
|
@ -240,17 +242,39 @@ démarrer** puis d'exécuter `pass`, ici on teste combien de temps ça
|
|||
prend d'exécuter `pass`.
|
||||
|
||||
|
||||
# cProfile
|
||||
## pyperf
|
||||
|
||||
time, timeit, hyperfine, pyperf c'est bien pour mesurer, comparer.
|
||||
Digression, `pyperf` peut aussi remplacer `hyperfine` :
|
||||
|
||||
cProfile nous aider à trouver la fonction coupable.
|
||||
```shell
|
||||
#!cache python -m pyperf command python examples/sandpile.py 1000
|
||||
```
|
||||
|
||||
notes:
|
||||
|
||||
Avec hyperfine on teste combien de temps ça prend à Python **de
|
||||
démarrer** puis d'exécuter `pass`, ici on teste combien de temps ça
|
||||
prend d'exécuter `pass`.
|
||||
|
||||
|
||||
## Les outils — À vous !
|
||||
|
||||
Effectuez quelques mesures sur votre implémentation.
|
||||
|
||||
Tentez d'en déterminer la complexité en fonction du nombre de grains.
|
||||
|
||||
|
||||
# Profilage
|
||||
|
||||
`time`, `timeit`, `hyperfine`, `pyperf` c'est bien pour mesurer, comparer.
|
||||
|
||||
`cProfile` peut nous aider à trouver la fonction coupable.
|
||||
|
||||
|
||||
## cProfile, exemple
|
||||
|
||||
```python
|
||||
#!sed -n '/def fib/,/return approx/p' include/phi1.py
|
||||
#!sed -n '/def fib/,/return approx/p' examples/phi1.py
|
||||
```
|
||||
|
||||
|
||||
|
@ -259,12 +283,11 @@ cProfile nous aider à trouver la fonction coupable.
|
|||
Testons :
|
||||
|
||||
```python
|
||||
#!sed -n '/import sys/,$p' include/phi1.py
|
||||
#!sed -n '/import sys/,$p' examples/phi1.py
|
||||
```
|
||||
|
||||
```text
|
||||
$ time python phi1.py 10
|
||||
#!cache -- time -p python include/phi1.py 10
|
||||
#!cache time -p python examples/phi1.py 10
|
||||
```
|
||||
|
||||
|
||||
|
@ -275,10 +298,10 @@ C'est déjà lent, et pour `20` c'est interminable...
|
|||
|
||||
Sortons cProfile :
|
||||
|
||||
```text
|
||||
```shell
|
||||
$ python -m cProfile --sort cumulative phi1.py 10
|
||||
...
|
||||
#!cache -- python -m cProfile --sort cumulative include/phi1.py 10 | sed -n '/fib\|function calls/{s/ \+/ /g;s/^ *//;p}'
|
||||
#!cache python -m cProfile --sort cumulative examples/phi1.py 10 | sed -n '/fib\|function calls/{s/ \+/ /g;s/^ *//;p}'
|
||||
...
|
||||
```
|
||||
|
||||
|
@ -291,16 +314,18 @@ C'est donc `fib` la coupable :
|
|||
|
||||
Cachons les résultats de `fib` :
|
||||
```python
|
||||
#!sed -n '/import cache/,/return fib/p' include/phi2.py
|
||||
#!sed -n '/import cache/,/return fib/p' examples/phi2.py
|
||||
```
|
||||
|
||||
## cProfile, exemple
|
||||
|
||||
Et on repasse dans cProfile !
|
||||
|
||||
```text
|
||||
```shell
|
||||
$ python -m cProfile --sort cumulative phi2.py 10
|
||||
#!cache -- python -m cProfile --sort cumulative include/phi2.py 10 | sed -n '/fib\|function calls/{s/ \+/ /g;s/^ *//;p}'
|
||||
...
|
||||
#!cache python -m cProfile --sort cumulative examples/phi2.py 10 | sed -n '/fib\|function calls/{s/ \+/ /g;s/^ *//;p}'
|
||||
...
|
||||
```
|
||||
|
||||
C'est mieux !
|
||||
|
@ -309,9 +334,8 @@ C'est mieux !
|
|||
|
||||
On essaye d'aller plus loin ?
|
||||
|
||||
```text
|
||||
$ python -m cProfile --sort cumulative phi2.py 2000
|
||||
#!cache -- python -m cProfile --sort cumulative include/phi2.py 2000 | head -n 2 | sed 's/^ *//g;s/seconds/s/g'
|
||||
```shell
|
||||
#!cache python -m cProfile --sort cumulative examples/phi2.py 2000 | head -n 3 | sed 's/^ *//g;s/seconds/s/g'
|
||||
```
|
||||
|
||||
Ça tient, mais peut-on faire mieux ?
|
||||
|
@ -323,22 +347,22 @@ Divisons par 10 le nombre d'appels, on réduira mécaniquement par 10 le
|
|||
temps d'exécution ?
|
||||
|
||||
```python
|
||||
#!sed -n '/def approx_phi_up_to/,/return step1/p' include/phi3.py
|
||||
#!sed -n '/def approx_phi_up_to/,/return step1/p' examples/phi3.py
|
||||
```
|
||||
|
||||
## cProfile, exemple
|
||||
|
||||
```text
|
||||
$ python -m cProfile --sort cumulative phi3.py 2000
|
||||
#!cache -- python -m cProfile --sort cumulative include/phi3.py 2000 | head -n 2 | sed 's/^ *//g;s/seconds/s/g'
|
||||
```shell
|
||||
#!cache python -m cProfile --sort cumulative examples/phi3.py 2000 | head -n 3 | sed 's/^ *//g;s/seconds/s/g'
|
||||
```
|
||||
|
||||
|
||||
## cProfile, exemple
|
||||
|
||||
En cachant `approx_phi` ?
|
||||
|
||||
```python
|
||||
#!sed -n '10,/return step1/p' include/phi4.py
|
||||
#!sed -n '10,/return step1/p' examples/phi4.py
|
||||
```
|
||||
|
||||
notes:
|
||||
|
@ -346,10 +370,11 @@ notes:
|
|||
Notez l'astuce pour que le `step2` d'un
|
||||
tour soit le `step1` du suivant...
|
||||
|
||||
|
||||
## cProfile, exemple
|
||||
|
||||
```
|
||||
$ python -m cProfile --sort cumulative phi4.py 2000
|
||||
```shell
|
||||
$ python -m cProfile --sort cumulative examples/phi4.py 2000
|
||||
```
|
||||
|
||||
`RecursionError` !? En effet, en avançant par si grands pas, le cache
|
||||
|
@ -363,15 +388,14 @@ Il est temps de sortir une implémentation de `fib` plus robuste, basée
|
|||
sur l'algorithme « matrix exponentiation » :
|
||||
|
||||
```python
|
||||
#!sed -n '/def fib/,/return fib/p' include/phi5.py
|
||||
#!sed -n '/def fib/,/return fib/p' examples/phi5.py
|
||||
```
|
||||
|
||||
|
||||
## cProfile, exemple
|
||||
|
||||
```text
|
||||
$ python -m cProfile --sort cumulative phi5.py 2000
|
||||
#!cache -- python -m cProfile --sort cumulative include/phi5.py 2000 | head -n 2 | sed 's/^ *//g;s/seconds/s/g'
|
||||
#!cache python -m cProfile --sort cumulative examples/phi5.py 2000 | head -n 3 | sed 's/^ *//g;s/seconds/s/g'
|
||||
```
|
||||
|
||||
notes:
|
||||
|
@ -381,14 +405,15 @@ Mieux.
|
|||
|
||||
## Snakeviz
|
||||
|
||||
```text
|
||||
python -m pip install snakeviz
|
||||
python -m cProfile -o phi5.prof phi5.py 2000
|
||||
#!if [ ! -f .cache/phi5.prof ]; then python -m cProfile -o .cache/phi5.prof include/phi5.py 2000 >/dev/null 2>&1; fi
|
||||
python -m snakeviz phi5.prof
|
||||
#!if [ ! -f output/phi5-snakeviz.png ]; then python -m snakeviz -s .cache/phi5.prof & TOKILL=$!; sleep 1; cutycapt --min-width=1024 --delay=500 --url=http://127.0.0.1:8080/snakeviz/%2Ftmp%2Fphi5.prof --out=output/phi5-snakeviz.png ; kill $TOKILL; fi
|
||||
```shell
|
||||
$ python -m pip install snakeviz
|
||||
$ python -m cProfile -o phi5.prof phi5.py 2000
|
||||
$ python -m snakeviz phi5.prof
|
||||
```
|
||||
|
||||
#!if [ ! -f .cache/phi5.prof ]; then python -m cProfile -o .cache/phi5.prof examples/phi5.py 2000 >/dev/null 2>&1; fi
|
||||
#!if [ ! -f output/phi5-snakeviz.png ]; then python -m snakeviz -s .cache/phi5.prof & TOKILL=$!; sleep 1; cutycapt --min-width=1024 --delay=500 --url=http://127.0.0.1:8080/snakeviz/%2Ftmp%2Fphi5.prof --out=output/phi5-snakeviz.png ; kill $TOKILL; fi
|
||||
|
||||
|
||||
## Snakeviz
|
||||
|
||||
|
@ -400,56 +425,64 @@ python -m snakeviz phi5.prof
|
|||
```shell
|
||||
$ python -m pip install scalene
|
||||
$ scalene phi5.py 100000
|
||||
#!if [ ! -f output/phi5.html ]; then ( cd include; scalene phi5.py 100000 --html --outfile ../output/phi5.html --cli >&2 ); fi
|
||||
#!if [ ! -f output/phi5-scalene.png ]; then cutycapt --min-width=1024 --delay=100 --url=file://$(pwd)/output/phi5.html --out=output/phi5-scalene.png; fi
|
||||
```
|
||||
|
||||
#!if [ ! -f output/phi5.html ]; then ( cd examples; scalene phi5.py 100000 --html --outfile ../output/phi5.html --cli >&2 ); fi
|
||||
#!if [ ! -f output/phi5-scalene.png ]; then cutycapt --min-width=1024 --delay=100 --url=file://$(pwd)/output/phi5.html --out=output/phi5-scalene.png; fi
|
||||
|
||||
|
||||
## Scalene
|
||||
|
||||
![](phi5-scalene.png)
|
||||
|
||||
|
||||
## Atelier
|
||||
## Aussi
|
||||
|
||||
Générateur de prénoms français.
|
||||
|
||||
Notes: voir includes/prenom-*.py
|
||||
- https://github.com/gaogaotiantian/viztracer
|
||||
- https://github.com/joerick/pyinstrument
|
||||
- https://github.com/benfred/py-spy
|
||||
- https://github.com/pyutils/line_profiler
|
||||
- https://github.com/sumerc/yappi
|
||||
- https://github.com/vmprof/vmprof-python
|
||||
- https://github.com/bloomberg/memray
|
||||
|
||||
|
||||
## TODO
|
||||
- vprof
|
||||
- https://pypi.org/project/pp3yflame/
|
||||
## Profilage — À vous !
|
||||
|
||||
Profilez votre implémentation et tentez quelques améliorations.
|
||||
|
||||
|
||||
# Cython
|
||||
|
||||
Cython est un dialecte de Python transpilable en C.
|
||||
|
||||
|
||||
## Cython démo
|
||||
|
||||
```python
|
||||
#!cat include/collatz_length.py
|
||||
#!cat examples/collatz_length.py
|
||||
```
|
||||
|
||||
|
||||
## Cython démo
|
||||
|
||||
```shell
|
||||
$ python -m pyperf timeit \
|
||||
> -s 'from collatz_length import collatz_length'
|
||||
> 'collatz_length(837799)'
|
||||
#!cache -- python -m pyperf timeit --fast --setup 'from include.collatz_length import collatz_length' 'collatz_length(837799)'
|
||||
#!cache python -m pyperf timeit --fast --setup 'from examples.collatz_length import collatz_length' 'collatz_length(837799)'
|
||||
```
|
||||
|
||||
```shell
|
||||
$ cythonize --inplace collatz_length_cython.py
|
||||
#!if ! [ -f include/collatz_length_cython.*.so ] ; then cythonize --inplace include/collatz_length_cython.py; fi
|
||||
#!if ! [ -f examples/collatz_length_cython.*.so ] ; then cythonize --inplace examples/collatz_length_cython.py; fi
|
||||
```
|
||||
|
||||
```shell
|
||||
$ python -m pyperf timeit \
|
||||
> -s 'from collatz_length import collatz_length'
|
||||
> 'collatz_length(837799)'
|
||||
#!cache -- python -m pyperf timeit --fast -s 'from include.collatz_length import collatz_length' 'collatz_length(837799)' # faster
|
||||
#!cache python -m pyperf timeit --fast -s 'from examples.collatz_length import collatz_length' 'collatz_length(837799)' # faster
|
||||
#!# Beware, the cythonized use `-s` while the non cythonized uses `--setup` just to have two cache buckets :D
|
||||
```
|
||||
|
||||
|
@ -458,8 +491,8 @@ $ python -m pyperf timeit \
|
|||
|
||||
```shell
|
||||
$ cython -a collatz_length.py
|
||||
#!if ! [ -f include/collatz_length.html ] ; then cython -a include/collatz_length.py; fi
|
||||
#!if ! [ -f output/collatz_length.png ] ; then cutycapt --min-width=1024 --delay=500 --url=file://$(pwd)/include/collatz_length.html --out=output/collatz_length.png; fi
|
||||
#!if ! [ -f examples/collatz_length.html ] ; then cython -a examples/collatz_length.py; fi
|
||||
#!if ! [ -f output/collatz_length.png ] ; then cutycapt --min-width=1024 --delay=500 --url=file://$(pwd)/examples/collatz_length.html --out=output/collatz_length.png; fi
|
||||
```
|
||||
|
||||
![](collatz_length.png)
|
||||
|
@ -468,96 +501,106 @@ $ cython -a collatz_length.py
|
|||
## Cython annotated
|
||||
|
||||
```python
|
||||
#!cat include/collatz_length_annotated.py
|
||||
#!cat examples/collatz_length_annotated.py
|
||||
```
|
||||
|
||||
```shell
|
||||
$ cythonize --inplace collatz_length_annotated.py
|
||||
```
|
||||
|
||||
#!if ! [ -f include/collatz_length_annotated.*.so ] ; then cythonize --inplace include/collatz_length_annotated.py; fi
|
||||
#!if ! [ -f examples/collatz_length_annotated.*.so ] ; then cythonize --inplace examples/collatz_length_annotated.py; fi
|
||||
|
||||
```shell
|
||||
$ python -m pyperf timeit \
|
||||
> -s 'from collatz_length_annotated import collatz_length'
|
||||
> 'collatz_length(837799)'
|
||||
#!cache -- python -m pyperf timeit --fast -s 'from include.collatz_length_annotated import collatz_length' 'collatz_length(837799)'
|
||||
#!cache python -m pyperf timeit --fast -s 'from examples.collatz_length_annotated import collatz_length' 'collatz_length(837799)'
|
||||
```
|
||||
|
||||
|
||||
## Cython annotate again
|
||||
|
||||
```shell
|
||||
$ cython -a include/collatz_length_annotated.py
|
||||
#!if ! [ -f include/collatz_length_annotated.html ] ; then cython -a include/collatz_length_annotated.py; fi
|
||||
#!if ! [ -f output/collatz_length_annotated.png ] ; then cutycapt --min-width=1024 --delay=500 --url=file://$(pwd)/include/collatz_length_annotated.html --out=output/collatz_length_annotated.png; fi
|
||||
$ cython -a examples/collatz_length_annotated.py
|
||||
#!if ! [ -f examples/collatz_length_annotated.html ] ; then cython -a examples/collatz_length_annotated.py; fi
|
||||
#!if ! [ -f output/collatz_length_annotated.png ] ; then cutycapt --min-width=1024 --delay=500 --url=file://$(pwd)/examples/collatz_length_annotated.html --out=output/collatz_length_annotated.png; fi
|
||||
```
|
||||
|
||||
![](collatz_length_annotated.png)
|
||||
|
||||
|
||||
## Cython — À vous !
|
||||
|
||||
|
||||
# Numba
|
||||
|
||||
Numba est un `JIT` : « Just In Time compiler ».
|
||||
|
||||
```python
|
||||
#!cat include/collatz_length_numba.py
|
||||
#!cat examples/collatz_length_numba.py
|
||||
```
|
||||
|
||||
## Numba démo
|
||||
|
||||
```shell
|
||||
$ python -m pyperf timeit \
|
||||
> -s 'from include.collatz_length_numba import collatz_length' \
|
||||
> -s 'from examples.collatz_length_numba import collatz_length' \
|
||||
> 'collatz_length(837799)'
|
||||
#!cache -- python -m pyperf timeit --fast -s 'from include.collatz_length_numba import collatz_length' 'collatz_length(837799)'
|
||||
#!cache python -m pyperf timeit --fast -s 'from examples.collatz_length_numba import collatz_length' 'collatz_length(837799)'
|
||||
```
|
||||
|
||||
## numba — À vous !
|
||||
|
||||
|
||||
# mypyc
|
||||
|
||||
mypyc est un compilateur qui s'appuie sur les annotationes de type mypy :
|
||||
|
||||
```python
|
||||
#!cat include/collatz_length_mypy.py
|
||||
#!cat examples/collatz_length_mypy.py
|
||||
```
|
||||
|
||||
## mypyc demo
|
||||
|
||||
```shell
|
||||
$ mypyc include/collatz_length_mypy.py
|
||||
#!if ! [ -f collatz_length_mypy.*.so ] ; then mypyc include/collatz_length_mypy.py; fi
|
||||
$ mypyc examples/collatz_length_mypy.py
|
||||
#!if ! [ -f collatz_length_mypy.*.so ] ; then mypyc examples/collatz_length_mypy.py; fi
|
||||
```
|
||||
|
||||
```shell
|
||||
$ python -m pyperf timeit \
|
||||
> -s 'from collatz_length_mypy import collatz_length' \
|
||||
> 'collatz_length(837799)'
|
||||
#!cache -- python -m pyperf timeit --fast -s 'from collatz_length_mypy import collatz_length' 'collatz_length(837799)'
|
||||
#!cache python -m pyperf timeit --fast -s 'from collatz_length_mypy import collatz_length' 'collatz_length(837799)'
|
||||
```
|
||||
|
||||
## mypyc — À vous !
|
||||
|
||||
|
||||
# Pythran
|
||||
|
||||
pythran est un compilateur pour du code scientifique :
|
||||
|
||||
```python
|
||||
#!cat include/collatz_length_pythran.py
|
||||
#!cat examples/collatz_length_pythran.py
|
||||
```
|
||||
|
||||
## Pythran demo
|
||||
|
||||
```shell
|
||||
$ pythran include/collatz_length_pythran.py
|
||||
#!if ! [ -f collatz_length_pythran.*.so ]; then pythran include/collatz_length_pythran.py; fi
|
||||
$ pythran examples/collatz_length_pythran.py
|
||||
#!if ! [ -f collatz_length_pythran.*.so ]; then pythran examples/collatz_length_pythran.py; fi
|
||||
```
|
||||
|
||||
```shell
|
||||
$ python -m pyperf timeit \
|
||||
> -s 'from collatz_length_pythran import collatz_length' \
|
||||
> 'collatz_length(837799)'
|
||||
#!cache -- python -m pyperf timeit --fast -s 'from collatz_length_pythran import collatz_length' 'collatz_length(837799)'
|
||||
#!cache python -m pyperf timeit --fast -s 'from collatz_length_pythran import collatz_length' 'collatz_length(837799)'
|
||||
```
|
||||
|
||||
## pythran — À vous !
|
||||
|
||||
|
||||
# Nuitka
|
||||
|
||||
|
@ -565,44 +608,46 @@ Aussi un compilateur, aussi utilisable pour distribuer une application.
|
|||
|
||||
```shell
|
||||
$ python -m nuitka --module collatz_length_nuitka.py
|
||||
#!if ! [ -f collatz_length_nuitka.*.so ]; then python -m nuitka --module include/collatz_length_nuitka.py >/dev/null; fi
|
||||
#!if ! [ -f collatz_length_nuitka.*.so ]; then python -m nuitka --module examples/collatz_length_nuitka.py >/dev/null; fi
|
||||
```
|
||||
|
||||
```shell
|
||||
$ python -m pyperf timeit \
|
||||
> -s 'from collatz_length_nuitka import collatz_length' \
|
||||
> 'collatz_length(837799)'
|
||||
#!cache -- python -m pyperf timeit --fast -s 'from collatz_length_nuitka import collatz_length' 'collatz_length(837799)'
|
||||
#!cache python -m pyperf timeit --fast -s 'from collatz_length_nuitka import collatz_length' 'collatz_length(837799)'
|
||||
```
|
||||
|
||||
|
||||
# Et d'autres
|
||||
# Et d'autres…
|
||||
|
||||
https://github.com/pfalcon/awesome-python-compilers
|
||||
|
||||
|
||||
# Hand crafted C
|
||||
|
||||
```c
|
||||
#!sed -n '/int collatz_length/,/^$/p' include/my_collatz_length.c
|
||||
#!sed -n '/int collatz_length/,/^$/p' examples/my_collatz_length.c
|
||||
```
|
||||
|
||||
Mais comment l'utiliser ?
|
||||
|
||||
## Hand crafted C
|
||||
|
||||
### Avec Cython
|
||||
|
||||
```cpython
|
||||
#!cat include/collatz_length_cython_to_c.pyx
|
||||
#!cat examples/collatz_length_cython_to_c.pyx
|
||||
```
|
||||
|
||||
```shell
|
||||
$ cythonize -i include/collatz_length_cython_to_c.pyx
|
||||
#!if ! [ -f include/collatz_length_cython_to_c.*.so ] ; then cythonize -i include/collatz_length_cython_to_c.pyx; fi
|
||||
$ cythonize -i examples/collatz_length_cython_to_c.pyx
|
||||
#!if ! [ -f examples/collatz_length_cython_to_c.*.so ] ; then cythonize -i examples/collatz_length_cython_to_c.pyx; fi
|
||||
```
|
||||
|
||||
```shell
|
||||
$ python -m pyperf timeit \
|
||||
> -s 'from collatz_length_cython_to_c import collatz_length' \
|
||||
> 'collatz_length(837799)'
|
||||
#!cache -- python -m pyperf timeit --fast -s 'from include.collatz_length_cython_to_c import collatz_length' 'collatz_length(837799)'
|
||||
#!cache python -m pyperf timeit --fast -s 'from examples.collatz_length_cython_to_c import collatz_length' 'collatz_length(837799)'
|
||||
```
|
||||
|
|
Loading…
Reference in New Issue