From b438485ea7d489a0004a9701d6cf9215b52f91ad Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 18 Nov 2017 01:12:36 -0300 Subject: [PATCH 1/3] gcc vectorization With -O3 -march=native and using strlen, GCC decided to vectorize the loop. See: https://godbolt.org/g/EEwvam --- pyext-myclib/myclib.c | 4 +++- pyext-myclib/setup.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pyext-myclib/myclib.c b/pyext-myclib/myclib.c index bb22b25..22b7760 100644 --- a/pyext-myclib/myclib.c +++ b/pyext-myclib/myclib.c @@ -1,8 +1,10 @@ #include "myclib.h" +#include uint64_t count_byte_doubles(char * str) { uint64_t count = 0; - while (str[0] && str[1]) { + int len = strlen(str); + while (len--) { if (str[0] == str[1]) count++; str++; } diff --git a/pyext-myclib/setup.py b/pyext-myclib/setup.py index cb5ca9d..ff885bc 100644 --- a/pyext-myclib/setup.py +++ b/pyext-myclib/setup.py @@ -6,6 +6,7 @@ Extension( '_myclib', sources=['myclib.i', 'myclib.c'], + extra_compile_args=["-march=native","-O3"], depends=['setup.py', 'mylib.h'] ) ] From d412b70afb97a018d068ab25b5031b80cda24f23 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 18 Nov 2017 01:36:54 -0300 Subject: [PATCH 2/3] add a numba example --- doubles_all.py | 16 ++++++++++++++++ requirements.txt | 1 + 2 files changed, 17 insertions(+) diff --git a/doubles_all.py b/doubles_all.py index 742fd4c..3a6a199 100644 --- a/doubles_all.py +++ b/doubles_all.py @@ -9,6 +9,18 @@ sys.path.append('./pyext-myclib') import myclib # <-- Importing C Implemented Library +from numba import jit + +@jit(nopython=True) +def count_doubles_once_numba(val): + total = 0 + chars = iter(val) + c1 = next(chars) + for c2 in chars: + if c1 == c2: + total += 1 + c1 = c2 + return total def count_doubles(val): @@ -67,6 +79,10 @@ def test_pure_python_once(benchmark): print(benchmark(count_doubles_once, val)) +def test_pure_python_once_numba(benchmark): + print(benchmark(count_doubles_once_numba, val.encode())) + + def test_itertools(benchmark): print(benchmark(count_doubles_itertools, val)) diff --git a/requirements.txt b/requirements.txt index 86c9ef6..a04ca32 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ pytest pytest-benchmark numpy +numba From a9c6b969155ab2f3ad41d61ee674cb25140edf6c Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Sat, 18 Nov 2017 02:33:23 -0300 Subject: [PATCH 3/3] add numpy+numba and jit compilation cache --- doubles_all.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/doubles_all.py b/doubles_all.py index 3a6a199..be76788 100644 --- a/doubles_all.py +++ b/doubles_all.py @@ -11,7 +11,7 @@ import myclib # <-- Importing C Implemented Library from numba import jit -@jit(nopython=True) +@jit(nopython=True, cache=True) def count_doubles_once_numba(val): total = 0 chars = iter(val) @@ -64,6 +64,11 @@ def count_double_numpy(val): return np.sum(ng[:-1] == ng[1:]) +@jit(nopython=True, cache=True) +def count_double_numpy_numba(ng): + return np.sum(ng[:-1] == ng[1:]) + + def count_doubles_comprehension(val): return sum(1 for c1, c2 in zip(val, val[1:]) if c1 == c2) @@ -95,6 +100,11 @@ def test_numpy(benchmark): print(benchmark(count_double_numpy, val)) +def test_numpy_numba(benchmark): + ng = np.fromstring(val, dtype=np.byte) + print(benchmark(count_double_numpy_numba, ng)) + + def test_python_comprehension(benchmark): print(benchmark(count_doubles_comprehension, val))