python-perfs: proofreading

This commit is contained in:
Julien Palard 2023-10-09 14:09:02 +02:00
parent c1ce350c7c
commit 1b5208e07c
Signed by: mdk
GPG Key ID: 0EFC1AC1006886F8
9 changed files with 393 additions and 211 deletions

View File

@ -1,9 +1,13 @@
.cache/
.hypothesis/
*.so
*.c
*.html
*.bin
*.build/
build/
*.pyi
examples/collatz_length_annotated.c
examples/collatz_length_cython.c
examples/collatz_length_cython_to_c.c
examples/phi5cython.c
*.lprof

View File

@ -0,0 +1,13 @@
// Compile with:
// cc -c -fPIC my_collatz_length.c -o libcollatz.so
int collatz_length(long n)
{
if (n == 1)
return 0;
if (n % 2 == 0)
return 1 + collatz_length(n / 2);
else
return 1 + collatz_length(n * 3 + 1);
}

View File

@ -0,0 +1,28 @@
from decimal import Decimal, localcontext
from itertools import count
from functools import cache
@cache
def fib(n):
if n in (0, 1):
return 1
x = n // 2
return fib(x - 1) * fib(n - x - 1) + fib(x) * fib(n - x)
@cache
def approx_phi(n):
return Decimal(fib(n + 1)) / Decimal(fib(n))
def approx_phi_up_to(n_digits):
with localcontext() as ctx:
ctx.prec = n_digits + 1
for n in count():
step1 = approx_phi(2 ** n)
step2 = approx_phi(2 ** (n+1))
if step1 == step2:
return step1
import sys
if __name__ == "__main__":
print(approx_phi_up_to(int(sys.argv[1])))

View File

@ -0,0 +1,29 @@
from decimal import Decimal, localcontext
from itertools import count
from functools import cache
@cache
def fib(n: int) -> int:
if n in (0, 1):
return 1
x = n // 2
return fib(x - 1) * fib(n - x - 1) + fib(x) * fib(n - x)
@cache
def approx_phi(n: int) -> Decimal:
return Decimal(fib(n + 1)) / Decimal(fib(n))
def approx_phi_up_to(n_digits: int) -> Decimal:
with localcontext() as ctx:
ctx.prec = n_digits + 1
for n in count():
step1 = approx_phi(2 ** n)
step2 = approx_phi(2 ** (n+1))
if step1 == step2:
return step1
assert 0
import sys
if __name__ == "__main__":
print(approx_phi_up_to(int(sys.argv[1])))

View File

@ -0,0 +1,32 @@
from decimal import Decimal, localcontext
from itertools import count
from functools import cache
from numba import jit
@cache
@jit
def fib(n):
if n in (0, 1):
return 1
x = n // 2
return fib(x - 1) * fib(n - x - 1) + fib(x) * fib(n - x)
@cache
@jit
def approx_phi(n):
return Decimal(fib(n + 1)) / Decimal(fib(n))
def approx_phi_up_to(n_digits):
with localcontext() as ctx:
ctx.prec = n_digits + 1
for n in count():
step1 = approx_phi(2 ** n)
step2 = approx_phi(2 ** (n+1))
if step1 == step2:
return step1
import sys
if __name__ == "__main__":
print(approx_phi_up_to(int(sys.argv[1])))

View File

@ -0,0 +1,149 @@
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <inttypes.h>
#include <stdio.h>
#include <math.h>
#include <png.h>
void show_terrain(int width, int **terrain)
{
for (int x = 0; x < width; x++)
printf("-");
printf("\n");
for (int x = 0; x < width; x++) {
for (int y = 0; y < width; y++) {
if (terrain[x][y] == 0)
printf(" ");
else if (terrain[x][y] == 1)
printf("·");
else if (terrain[x][y] == 2)
printf("");
else if (terrain[x][y] == 3)
printf("");
else
printf("+");
}
printf("\n");
}
for (int x = 0; x < width; x++)
printf("-");
printf("\n");
}
int save_terrain(int width, int **terrain, char *filename)
{
png_image image;
png_byte row_pointers[width][width];
memset(&image, 0, (sizeof image));
image.version = PNG_IMAGE_VERSION;
image.format = PNG_FORMAT_FLAG_COLORMAP|PNG_FORMAT_FLAG_COLOR;
image.width = width;
image.height = width;
image.colormap_entries = 4;
for (int x = 0; x < width; x++)
for (int y = 0; y < width; y++)
row_pointers[x][y] = terrain[x][y];
if (png_image_write_to_file(&image, filename, 0,
row_pointers, 0,
"\xFF\xFF\xFF\xe9\xff\x70\x70\xd6\xff\xff\x70\xa6")){
return 0;
}
fprintf(stderr, "pngtopng: error: %s\n", image.message);
return 1;
}
#ifdef DEBUG
#define BOUND(a, low, high) (a < low ? low : (a > high ? high : a))
#define STORE_HEIGHT(x) by_height[BOUND(x, 0, 19)] += 1
#endif
void apply_gravity(int width, int **terrain)
{
bool did_something;
int div;
#ifdef DEBUG
int loops = 1;
uint64_t by_height[20] = {0};
#endif
while (1) {
did_something = false;
#ifdef DEBUG
loops += 1;
#endif
for (int x = 0; x < width; x++) {
for (int y = 0; y < width; y++) {
if (terrain[x][y] >= 4) {
#ifdef DEBUG
STORE_HEIGHT(terrain[x][y]);
#endif
did_something = true;
div = terrain[x][y] / 4;
terrain[x][y] = terrain[x][y] % 4;
terrain[x - 1][y] += div;
terrain[x + 1][y] += div;
terrain[x][y + 1] += div;
terrain[x][y - 1] += div;
}
}
}
if (!did_something) {
#ifdef DEBUG
fprintf(stderr, "looped %i times:\n", loops);
for (int i = 0; i < 9; i++) {
fprintf(stderr, "- %"PRIu64" times to break a pile of %i grains\n", by_height[i], i);
}
fprintf(stderr, "- %"PRIu64" times to break a pile of more than 13 grains\n", by_height[9]);
#endif
return;
}
}
}
int main(int argc, char **argv)
{
int width;
int height;
if (argc < 2) {
fprintf(stderr, "Please provide height.\n");
return EXIT_FAILURE;
}
if (sscanf(argv[1], "%i", &height) == EOF) {
fprintf(stderr, "Can't read height as an integer.\n");
return EXIT_FAILURE;
}
width = pow(height, 0.48);
printf("Spawning a terrain of %ix%i\n", width, width);
int **terrain = calloc(width, sizeof(int*));
int *data = calloc(width * width, sizeof(int));
for (int i = 0; i < width; i++) {
terrain[i] = data + i * width;
}
terrain[width / 2][width / 2] = height;
apply_gravity(width, terrain);
if (argc > 2) {
if (strcmp(argv[2], "/dev/null") != 0) {
printf("Saving terrain to file...\n");
if (save_terrain(width, terrain, argv[2]) == 0)
printf("Terrain saved\n");
else
printf("Can't save terrain\n");
}
}
else {
show_terrain(width, terrain);
}
return EXIT_SUCCESS;
}

View File

@ -66,59 +66,58 @@ def apply_gravity(terrain):
if not did_someting:
return
@numba.njit(numba.void(numba.int64[:,:]))
def apply_gravity(terrain):
"""
$ python -m pyperf timeit --fast -s 'from examples.sandpile_numpy import main' 'main(10_000, False)'
...........
Mean +- std dev: 100 ms +- 5 ms
"""
shape = np.shape(terrain)
while True:
did_someting = False
for x, y in np.ndindex(shape):
if terrain[x, y] >= 4:
terrain[x,y] -= 4
terrain[x - 1][y] += 1
terrain[x + 1][y] += 1
terrain[x][y + 1] += 1
terrain[x][y - 1] += 1
did_someting = True
# @numba.njit(numba.void(numba.int64[:,:]))
# def apply_gravity(terrain):
# """
# $ python -m pyperf timeit --fast -s 'from examples.sandpile_numpy import main' 'main(10_000, False)'
# ...........
# Mean +- std dev: 100 ms +- 5 ms
# """
# shape = np.shape(terrain)
# while True:
# did_someting = False
# for x, y in np.ndindex(shape):
# if terrain[x, y] >= 4:
# terrain[x,y] -= 4
# terrain[x - 1][y] += 1
# terrain[x + 1][y] += 1
# terrain[x][y + 1] += 1
# terrain[x][y - 1] += 1
# did_someting = True
#
# if not did_someting:
# return
if not did_someting:
return
@numba.njit(numba.void(numba.int64[:,:]))
def apply_gravity(terrain):
"""Can handle 10k sand grains in 1.5s."""
shape = np.shape(terrain)
while True:
did_someting = False
for x, y in np.ndindex(shape):
if terrain[x, y] >= 4000:
div, terrain[x, y] = divmod(terrain[x][y], 4)
terrain[x - 1][y] += div
terrain[x + 1][y] += div
terrain[x][y + 1] += div
terrain[x][y - 1] += div
did_someting = True
elif terrain[x, y] >= 4:
terrain[x,y] -= 4
terrain[x - 1][y] += 1
terrain[x + 1][y] += 1
terrain[x][y + 1] += 1
terrain[x][y - 1] += 1
did_someting = True
if not did_someting:
return
# @numba.njit(numba.void(numba.int64[:,:]))
# def apply_gravity(terrain):
# """Can handle 10k sand grains in 1.5s."""
# shape = np.shape(terrain)
# while True:
# did_someting = False
# for x, y in np.ndindex(shape):
# if terrain[x, y] >= 4000:
# div, terrain[x, y] = divmod(terrain[x][y], 4)
# terrain[x - 1][y] += div
# terrain[x + 1][y] += div
# terrain[x][y + 1] += div
# terrain[x][y - 1] += div
# did_someting = True
# elif terrain[x, y] >= 4:
# terrain[x,y] -= 4
# terrain[x - 1][y] += 1
# terrain[x + 1][y] += 1
# terrain[x][y + 1] += 1
# terrain[x][y - 1] += 1
# did_someting = True
#
# if not did_someting:
# return
def main(height, show=True):
width = int(height**0.5) + 1
terrain = np.zeros((width, width), dtype=np.int64)
terrain[width // 2, width // 2] = height
begin = perf_counter()
apply_gravity(terrain)
if show:

View File

@ -145,51 +145,40 @@ C'est l'heure du live coding !
# À vous !
Simulons un tas de sable.
Simulez un tas de sable, moi je calcule le nombre l'or.
Ne vous souciez pas des perfs, on s'en occupera.
Vous avez 10mn.
notes:
Leur laisser ~15mn.
voir sandpile.py
# Les outils
## Les outils en ligne de commande
`time`, un outil POSIX, mais aussi une fonction native de bash :
```shell
#!cache time -p python examples/sandpile.py 16
```
Mais `time` ne teste qu'une fois.
notes:
real 0m0.719s # C'est le temps « sur le mur »
user 0m0.521s # Temps CPU passé « dans Python »
sys 0m0.195s # Temps CPU passé dans le kernel
Je mesure mes perfs, puis ils mesurent leurs perfs.
## Hyperfine
`hyperfine` teste plusieurs fois :
## `pyperf command`
```shell
#!cache hyperfine "python examples/sandpile.py 1000"
```
#!cache pyperf command python examples/phi1.py 3
#!cache pyperf command python examples/phi1.py 6
#!cache pyperf command python examples/phi1.py 9
```
## Petite parenthèse
Mais attention, démarrer un processus Python n'est pas gratuit :
```shell
#!cache hyperfine "python -c pass"
#!cache pyperf command python -c pass
```
notes:
@ -202,13 +191,13 @@ N'essayez pas de retenir les chiffres, retenez les faits.
Et puis il peut dépendre de la version de Python, des options de compilation, ... :
```shell
$ hyperfine "~/.local/bin/python3.10 -c pass" # Avec pydebug
Benchmark 1: ~/.local/bin/python3.10 -c pass
Time (mean ± σ): 37.6 ms ± 0.6 ms
$ pyperf command ~/.local/bin/python3.10 -c pass
.....................
command: Mean +- std dev: 37.6 ms +- 0.6 ms
$ hyperfine "/usr/bin/python3.10 -c pass" # Sans pydebug
Benchmark 1: /usr/bin/python3.10 -c pass
Time (mean ± σ): 19.1 ms ± 0.8 ms
$ pyperf command /usr/bin/python3.10 -c pass
.....................
command: Mean +- std dev: 14.4 ms +- 0.4 ms
```
notes:
@ -216,46 +205,14 @@ notes:
Leur parler de `--enable-optimizations` (PGO).
## timeit
## `pyperf timeit`
Timeit c'est dans la stdlib de Python, ça s'utilise en ligne de commande ou depuis Python.
Il existe aussi `timeit` dans la stdlib, mais je préfère `pyperf timeit` :
```shell
#!cache python -m timeit -s 'import sandpile' 'sandpile.main(1000, show=False)'
#!cache pyperf timeit --setup 'from examples.phi1 import approx_phi_up_to' 'approx_phi_up_to(3)'
```
Mais il ne vous donne que le meilleur temps, ce qui n'est pas toujours représentatif.
## pyperf
Il n'est pas dans la stdlib, mais il replace avantageusement `timeit` :
```shell
#!cache python -m pyperf timeit --setup 'import sandpile' 'sandpile.main(1000, show=False)'
```
notes:
Avec hyperfine on teste combien de temps ça prend à Python **de
démarrer** puis d'exécuter `pass`, ici on teste combien de temps ça
prend d'exécuter `pass`.
## pyperf
Digression, `pyperf` peut aussi remplacer `hyperfine` :
```shell
#!cache python -m pyperf command python examples/sandpile.py 1000
```
notes:
Avec hyperfine on teste combien de temps ça prend à Python **de
démarrer** puis d'exécuter `pass`, ici on teste combien de temps ça
prend d'exécuter `pass`.
## Les outils — À vous !
@ -263,12 +220,14 @@ Effectuez quelques mesures sur votre implémentation.
Tentez d'en déterminer la complexité en fonction du nombre de grains.
Explorez les limites de vos implémentations.
# Profilage
`time`, `timeit`, `hyperfine`, `pyperf` c'est bien pour mesurer, comparer.
`pyperf` c'est bien pour mesurer, comparer.
`cProfile` peut nous aider à trouver la fonction coupable.
Le profilage peut nous aider à trouver la fonction coupable.
## cProfile, exemple
@ -278,22 +237,6 @@ Tentez d'en déterminer la complexité en fonction du nombre de grains.
```
## cProfile, exemple
Testons :
```python
#!sed -n '/import sys/,$p' examples/phi1.py
```
```text
#!cache time -p python examples/phi1.py 10
```
C'est déjà lent, et pour `20` c'est interminable...
## cProfile, exemple
Sortons cProfile :
@ -412,7 +355,7 @@ $ python -m snakeviz phi5.prof
```
#!if [ ! -f .cache/phi5.prof ]; then python -m cProfile -o .cache/phi5.prof examples/phi5.py 2000 >/dev/null 2>&1; fi
#!if [ ! -f output/phi5-snakeviz.png ]; then python -m snakeviz -s .cache/phi5.prof & TOKILL=$!; sleep 1; cutycapt --min-width=1024 --delay=500 --url=http://127.0.0.1:8080/snakeviz/%2Ftmp%2Fphi5.prof --out=output/phi5-snakeviz.png ; kill $TOKILL; fi
#!if [ ! -f output/phi5-snakeviz.png ]; then python -m snakeviz -s .cache/phi5.prof & TOKILL=$!; sleep 1; cutycapt --min-width=1024 --delay=500 --url=file://$(pwd)/.cache/phi5.prof --out=output/phi5-snakeviz.png ; kill $TOKILL; fi
## Snakeviz
@ -436,15 +379,23 @@ $ scalene phi5.py 100000
![](phi5-scalene.png)
## line_profiler
```shell
$ python -m pip install line_profiler
#!cache python -m kernprof --view --prof-mod examples/phi5.py --line-by-line examples/phi5.py 100000
```
## Aussi
- https://github.com/gaogaotiantian/viztracer
- https://github.com/joerick/pyinstrument
- https://github.com/benfred/py-spy
- https://github.com/pyutils/line_profiler
- https://github.com/sumerc/yappi
- https://github.com/vmprof/vmprof-python
- https://github.com/bloomberg/memray
- https://github.com/pythonprofilers/memory_profiler
## Profilage — À vous !
@ -459,74 +410,38 @@ Cython est un dialecte de Python transpilable en C.
## Cython démo
Sans modifier le code :
```python
#!cat examples/collatz_length.py
$ pip install cython
$ cythonize --inplace examples/phi5cython.py
#!cythonize --inplace examples/phi5cython.py
```
## Cython démo
```
#!cache pyperf timeit --setup 'from examples.phi5 import approx_phi_up_to' 'approx_phi_up_to(100_000)'
#!cache pyperf timeit --setup 'from examples.phi5cython import approx_phi_up_to' 'approx_phi_up_to(100_000)'
```
## Cython démo
```shell
$ python -m pyperf timeit \
> -s 'from collatz_length import collatz_length'
> 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast --setup 'from examples.collatz_length import collatz_length' 'collatz_length(837799)'
```
En annotant le fichier on permet à cython d'utiliser des types natifs.
```shell
$ cythonize --inplace collatz_length_cython.py
#!if ! [ -f examples/collatz_length_cython.*.so ] ; then cythonize --inplace examples/collatz_length_cython.py; fi
```
```shell
$ python -m pyperf timeit \
> -s 'from collatz_length import collatz_length'
> 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast -s 'from examples.collatz_length import collatz_length' 'collatz_length(837799)' # faster
#!# Beware, the cythonized use `-s` while the non cythonized uses `--setup` just to have two cache buckets :D
```
Et ainsi réduire les aller-retour coûteux entre le C et Python.
## Cython annotate
```shell
$ cython -a collatz_length.py
#!if ! [ -f examples/collatz_length.html ] ; then cython -a examples/collatz_length.py; fi
#!if ! [ -f output/collatz_length.png ] ; then cutycapt --min-width=1024 --delay=500 --url=file://$(pwd)/examples/collatz_length.html --out=output/collatz_length.png; fi
#!cache cython --annotate examples/phi5cython.py
#!if ! [ -f output/phi5cython.png ] ; then cutycapt --min-width=1024 --delay=500 --url=file://$(pwd)/examples/phi5cython.html --out=output/phi5cython.png; fi
```
![](collatz_length.png)
## Cython annotated
```python
#!cat examples/collatz_length_annotated.py
```
```shell
$ cythonize --inplace collatz_length_annotated.py
```
#!if ! [ -f examples/collatz_length_annotated.*.so ] ; then cythonize --inplace examples/collatz_length_annotated.py; fi
```shell
$ python -m pyperf timeit \
> -s 'from collatz_length_annotated import collatz_length'
> 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast -s 'from examples.collatz_length_annotated import collatz_length' 'collatz_length(837799)'
```
## Cython annotate again
```shell
$ cython -a examples/collatz_length_annotated.py
#!if ! [ -f examples/collatz_length_annotated.html ] ; then cython -a examples/collatz_length_annotated.py; fi
#!if ! [ -f output/collatz_length_annotated.png ] ; then cutycapt --min-width=1024 --delay=500 --url=file://$(pwd)/examples/collatz_length_annotated.html --out=output/collatz_length_annotated.png; fi
```
![](collatz_length_annotated.png)
![](phi5cython.png)
## Cython — À vous !
@ -537,16 +452,16 @@ $ cython -a examples/collatz_length_annotated.py
Numba est un `JIT` : « Just In Time compiler ».
```python
#!
#!cat examples/collatz_length_numba.py
```
## Numba démo
```shell
$ python -m pyperf timeit \
> -s 'from examples.collatz_length_numba import collatz_length' \
> 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast -s 'from examples.collatz_length_numba import collatz_length' 'collatz_length(837799)'
#!cache pyperf timeit --setup 'from examples.collatz_length import collatz_length' 'collatz_length(837799)'
#!cache pyperf timeit --setup 'from examples.collatz_length_numba import collatz_length' 'collatz_length(837799)'
```
## numba — À vous !
@ -563,15 +478,18 @@ mypyc est un compilateur qui s'appuie sur les annotationes de type mypy :
## mypyc demo
```shell
$ mypyc examples/collatz_length_mypy.py
#!if ! [ -f collatz_length_mypy.*.so ] ; then mypyc examples/collatz_length_mypy.py; fi
$ pip install mypy
#!cd examples; mypyc collatz_length_mypy.py
$ mypyc collatz_length_mypy.py
```
## mypyc demo
```shell
$ python -m pyperf timeit \
> -s 'from collatz_length_mypy import collatz_length' \
> 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast -s 'from collatz_length_mypy import collatz_length' 'collatz_length(837799)'
#!cache pyperf timeit --setup 'from examples.collatz_length import collatz_length' 'collatz_length(837799)'
#!cache pyperf timeit --setup 'from examples.collatz_length_mypy import collatz_length' 'collatz_length(837799)' 2>/dev/null
```
## mypyc — À vous !
@ -588,17 +506,21 @@ pythran est un compilateur pour du code scientifique :
## Pythran demo
```shell
$ pip install pythran
$ pythran examples/collatz_length_pythran.py
#!if ! [ -f collatz_length_pythran.*.so ]; then pythran examples/collatz_length_pythran.py; fi
#!if ! [ -f examples/collatz_length_pythran.*.so ]; then cd examples; pythran collatz_length_pythran.py; fi
```
## Pythran demo
```shell
$ python -m pyperf timeit \
> -s 'from collatz_length_pythran import collatz_length' \
> 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast -s 'from collatz_length_pythran import collatz_length' 'collatz_length(837799)'
#!cache pyperf timeit --setup 'from examples.collatz_length import collatz_length' 'collatz_length(837799)'
#!cache pyperf timeit --setup 'from examples.collatz_length_pythran import collatz_length' 'collatz_length(837799)'
```
## pythran — À vous !
@ -607,15 +529,15 @@ $ python -m pyperf timeit \
Aussi un compilateur, aussi utilisable pour distribuer une application.
```shell
$ pip install nuitka
$ python -m nuitka --module collatz_length_nuitka.py
#!if ! [ -f collatz_length_nuitka.*.so ]; then python -m nuitka --module examples/collatz_length_nuitka.py >/dev/null; fi
#!if ! [ -f examples/collatz_length_nuitka.*.so ]; then (cd examples/; python -m nuitka --module collatz_length_nuitka.py >/dev/null); fi
```
```shell
$ python -m pyperf timeit \
> -s 'from collatz_length_nuitka import collatz_length' \
> 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast -s 'from collatz_length_nuitka import collatz_length' 'collatz_length(837799)'
#!cache pyperf timeit --setup 'from examples.collatz_length import collatz_length' 'collatz_length(837799)'
#!cache --force -- pyperf timeit --setup 'from examples.collatz_length_nuitka import collatz_length' 'collatz_length(837799)'
```
@ -627,14 +549,13 @@ https://github.com/pfalcon/awesome-python-compilers
# Hand crafted C
```c
#!sed -n '/int collatz_length/,/^$/p' examples/my_collatz_length.c
#!sed -n '/int collatz_length/,$p' examples/my_collatz_length.c
```
Mais comment l'utiliser ?
## Hand crafted C
### Avec Cython
## Avec Cython
```cpython
#!cat examples/collatz_length_cython_to_c.pyx
@ -645,9 +566,15 @@ $ cythonize -i examples/collatz_length_cython_to_c.pyx
#!if ! [ -f examples/collatz_length_cython_to_c.*.so ] ; then cythonize -i examples/collatz_length_cython_to_c.pyx; fi
```
## Avec Cython
```shell
$ python -m pyperf timeit \
> -s 'from collatz_length_cython_to_c import collatz_length' \
> 'collatz_length(837799)'
#!cache python -m pyperf timeit --fast -s 'from examples.collatz_length_cython_to_c import collatz_length' 'collatz_length(837799)'
#!cache pyperf timeit --setup 'from examples.collatz_length import collatz_length' 'collatz_length(837799)'
#!cache pyperf timeit --setup 'from examples.collatz_length_cython_to_c import collatz_length' 'collatz_length(837799)'
```
## TODO
https://docs.python.org/3.12/howto/perf_profiling.html#perf-profiling

View File

@ -6,3 +6,4 @@ numba
mypy
pythran
nuitka
line_profiler