From 5776b0f3d5fe1346198aa9bc89765cad1fe547b8 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 1 Jan 2023 17:29:16 -0600 Subject: [PATCH 01/42] Stub function. --- Modules/clinic/mathmodule.c.h | 40 ++++++++++++++++++++++++++++++++++- Modules/mathmodule.c | 27 +++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/Modules/clinic/mathmodule.c.h b/Modules/clinic/mathmodule.c.h index 9fac1037e52528..2072f530de0b8d 100644 --- a/Modules/clinic/mathmodule.c.h +++ b/Modules/clinic/mathmodule.c.h @@ -333,6 +333,44 @@ math_dist(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(math_sumprod__doc__, +"sumprod($module, p, q, /)\n" +"--\n" +"\n" +"Return the sum of products of value from two iterables p and q.\n" +"\n" +"Roughly equivalent to:\n" +"\n" +" sum(itertools.starmap(operator.mul, zip(vec1, vec2, strict=True)))\n" +"\n" +"For float and mixed int/float inputs, the products and an running\n" +"sum are computed in quad precison and the result is rounded back\n" +"to double precision."); + +#define MATH_SUMPROD_METHODDEF \ + {"sumprod", _PyCFunction_CAST(math_sumprod), METH_FASTCALL, math_sumprod__doc__}, + +static PyObject * +math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q); + +static PyObject * +math_sumprod(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *p; + PyObject *q; + + if (!_PyArg_CheckPositional("sumprod", nargs, 2, 2)) { + goto exit; + } + p = args[0]; + q = args[1]; + return_value = math_sumprod_impl(module, p, q); + +exit: + return return_value; +} + PyDoc_STRVAR(math_pow__doc__, "pow($module, x, y, /)\n" "--\n" @@ -917,4 +955,4 @@ math_ulp(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=c2c2f42452d63734 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ac2a3f407dadd9ab input=a9049054013a1b77]*/ diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 49c0293d4f5ce3..e86db64f86cdcb 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2819,6 +2819,32 @@ For example, the hypotenuse of a 3/4/5 right triangle is:\n\ 5.0\n\ "); +/*[clinic input] +math.sumprod + + p: object + q: object + / + +Return the sum of products of value from two iterables p and q. + +Roughly equivalent to: + + sum(itertools.starmap(operator.mul, zip(vec1, vec2, strict=True))) + +For float and mixed int/float inputs, the products and an running +sum are computed in quad precison and the result is rounded back +to double precision. +[clinic start generated code]*/ + +static PyObject * +math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) +/*[clinic end generated code: output=6722dbfe60664554 input=43acbb5dc736a1f5]*/ +{ + return PyFloat_FromDouble(2.71828); +} + + /* pow can't use math_2, but needs its own wrapper: the problem is that an infinite result can arise either as a result of overflow (in which case OverflowError should be raised) or as a result of @@ -3933,6 +3959,7 @@ static PyMethodDef math_methods[] = { {"sqrt", math_sqrt, METH_O, math_sqrt_doc}, {"tan", math_tan, METH_O, math_tan_doc}, {"tanh", math_tanh, METH_O, math_tanh_doc}, + MATH_SUMPROD_METHODDEF MATH_TRUNC_METHODDEF MATH_PROD_METHODDEF MATH_PERM_METHODDEF From 8ffd49e00799c7b5c170a9c3384c8a2aa3b0ab2d Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 1 Jan 2023 17:47:12 -0600 Subject: [PATCH 02/42] Core fuctionality without error handling --- Modules/mathmodule.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index e86db64f86cdcb..f83f844515dc26 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2841,7 +2841,27 @@ static PyObject * math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) /*[clinic end generated code: output=6722dbfe60664554 input=43acbb5dc736a1f5]*/ { - return PyFloat_FromDouble(2.71828); + PyObject *p_it, *q_it, *total; + PyObject *p_i, *q_i, *term_i, *new_total; + + p_it = PyObject_GetIter(p); + q_it = PyObject_GetIter(q); + total = PyLong_FromLong(0); + while (1) { + p_i = PyIter_Next(p_it); + q_i = PyIter_Next(q_it); + if (q_i == NULL) + break; + term_i = PyNumber_Multiply(p_i, q_i); + new_total = PyNumber_Add(total, term_i); + Py_SETREF(total, new_total); + Py_DECREF(p_i); + Py_DECREF(q_i); + Py_DECREF(term_i); + } + Py_DECREF(p_it); + Py_DECREF(q_it); + return total; } From f5a3ecb7b81c79eb190a0a874ce1ba18e1c3fc52 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 1 Jan 2023 18:25:24 -0600 Subject: [PATCH 03/42] Test core functionality. --- Lib/test/test_math.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index bf0d0a56e6ac8b..305739064f8053 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -4,6 +4,7 @@ from test.support import verbose, requires_IEEE_754 from test import support import unittest +import fractions import itertools import decimal import math @@ -1202,6 +1203,40 @@ def testLog10(self): self.assertEqual(math.log(INF), INF) self.assertTrue(math.isnan(math.log10(NAN))) + def testSumProd(self): + sumprod = math.sumprod + Decimal = decimal.Decimal + Fraction = fractions.Fraction + + # Core functionality + self.assertEqual(sumprod(iter([10, 20, 30]), (1, 2, 3)), 140) + self.assertEqual(sumprod([1.5, 2.5], [3.5, 4.5]), 16.5) + self.assertEqual(sumprod([], []), 0) + + # Type preservation and coercion + for v in [ + (10, 20, 30), + (1.5, -2.5), + (Fraction(3, 5), Fraction(4, 5)), + (Decimal(3.5), Decimal(4.5)), + (2.5, 10), # float/int + (2.5, Fraction(3, 5)), # float/fraction + (25, Fraction(3, 5)), # int/fraction + (25, Decimal(4.5)), # int/decimal + ]: + for p, q in [(v, v), (v, v[::-1])]: + with self.subTest(p=p, q=q): + expected = sum(p_i * q_i for p_i, q_i in zip(p, q, strict=True)) + actual = sumprod(p, q) + self.assertEqual(expected, actual) + self.assertEqual(type(expected), type(actual)) + + # Bad arguments + self.assertRaises(TypeError, sumprod) # No args + self.assertRaises(TypeError, sumprod, []) # One arg + self.assertRaises(TypeError, sumprod, [], [], []) # Three args + + def testModf(self): self.assertRaises(TypeError, math.modf) From 37fcb0a6954f499c6928a637f1e2db392fc8fca8 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 1 Jan 2023 18:36:58 -0600 Subject: [PATCH 04/42] Add docs --- Doc/library/math.rst | 17 +++++++++++++++++ Modules/mathmodule.c | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Doc/library/math.rst b/Doc/library/math.rst index aeebcaf6ab0864..2c38c44af260ff 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -291,6 +291,23 @@ Number-theoretic and representation functions .. versionadded:: 3.7 +.. function:: sumprod(p, q) + + Return the sum of products of values from two iterables *p* and *q*. + + raises :exc:`valueerror` if the inputs do not have the same length. + + roughly equivalent to:: + + sum(itertools.starmap(operator.mul, zip(p, q, strict=true))) + + For float and mixed int/float inputs, the products and and running + sum are computed in quad precison and the result is rounded back + to double precision. + + .. versionadded:: 3.12 + + .. function:: trunc(x) Return *x* with the fractional part diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index f83f844515dc26..3ecbb956f465a9 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2826,13 +2826,13 @@ math.sumprod q: object / -Return the sum of products of value from two iterables p and q. +Return the sum of products of values from two iterables p and q. Roughly equivalent to: sum(itertools.starmap(operator.mul, zip(vec1, vec2, strict=True))) -For float and mixed int/float inputs, the products and an running +For float and mixed int/float inputs, the products and and running sum are computed in quad precison and the result is rounded back to double precision. [clinic start generated code]*/ From 6cfc5ccefedba7a3efd21dae180af0131b5ae251 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 1 Jan 2023 18:43:06 -0600 Subject: [PATCH 05/42] Check for non-iterable inputs --- Lib/test/test_math.py | 3 ++- Modules/mathmodule.c | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 305739064f8053..3fa9fbc1b65e62 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1235,7 +1235,8 @@ def testSumProd(self): self.assertRaises(TypeError, sumprod) # No args self.assertRaises(TypeError, sumprod, []) # One arg self.assertRaises(TypeError, sumprod, [], [], []) # Three args - + self.assertRaises(TypeError, sumprod, None, [10]) # Non-iterable + self.assertRaises(TypeError, sumprod, [10], None) # Non-iterable def testModf(self): self.assertRaises(TypeError, math.modf) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 3ecbb956f465a9..d3fee046f3e005 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2845,8 +2845,20 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) PyObject *p_i, *q_i, *term_i, *new_total; p_it = PyObject_GetIter(p); + if (p_it == NULL) { + return NULL; + } q_it = PyObject_GetIter(q); + if (q_it == NULL) { + Py_DECREF(p_it); + return NULL; + } total = PyLong_FromLong(0); + if (total == NULL) { + Py_DECREF(p_it); + Py_DECREF(q_it); + return NULL; + } while (1) { p_i = PyIter_Next(p_it); q_i = PyIter_Next(q_it); From fbeca6b24caf1cabfd197100f253123120627e27 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 1 Jan 2023 21:22:47 -0600 Subject: [PATCH 06/42] Handle uneven lengths and error returns from PyIter_Next(). --- Lib/test/test_math.py | 14 +++++++++++ Modules/mathmodule.c | 54 ++++++++++++++++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 3fa9fbc1b65e62..33695a932de770 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1238,6 +1238,20 @@ def testSumProd(self): self.assertRaises(TypeError, sumprod, None, [10]) # Non-iterable self.assertRaises(TypeError, sumprod, [10], None) # Non-iterable + # Uneven lengths + self.assertRaises(ValueError, sumprod, [10, 20], [30]) + # self.assertRaises(ValueError, sumprod, [10], [20, 30]) + + # Error in iterator + def raise_after(n): + for i in range(n): + yield i + raise RuntimeError + with self.assertRaises(RuntimeError): + sumprod(range(10), raise_after(5)) + with self.assertRaises(RuntimeError): + sumprod(raise_after(5), range(10)) + def testModf(self): self.assertRaises(TypeError, math.modf) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index d3fee046f3e005..ab4b94dff82715 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2842,7 +2842,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) /*[clinic end generated code: output=6722dbfe60664554 input=43acbb5dc736a1f5]*/ { PyObject *p_it, *q_it, *total; - PyObject *p_i, *q_i, *term_i, *new_total; + PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL; p_it = PyObject_GetIter(p); if (p_it == NULL) { @@ -2860,20 +2860,62 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) return NULL; } while (1) { + int p_stopped = 0; + int q_stopped = 0; + + assert (p_i == NULL); + assert (q_i == NULL); + assert (term_i == NULL); + assert (new_total == NULL); + + assert (p_it != NULL); + assert (q_it != NULL); + assert (total != NULL); + p_i = PyIter_Next(p_it); + if (p_i == NULL) { + if (PyErr_Occurred()) { + goto err_exit; + } + p_stopped = 1; + } q_i = PyIter_Next(q_it); - if (q_i == NULL) - break; + if (q_i == NULL) { + if (PyErr_Occurred()) { + goto err_exit; + } + q_stopped = 1; + } + if (p_stopped && q_stopped) { + goto normal_exit; + } + if (p_stopped != q_stopped) { + PyErr_Format(PyExc_ValueError, "Inputs are not the same length"); + goto err_exit; + } term_i = PyNumber_Multiply(p_i, q_i); new_total = PyNumber_Add(total, term_i); Py_SETREF(total, new_total); - Py_DECREF(p_i); - Py_DECREF(q_i); - Py_DECREF(term_i); + new_total = NULL; + Py_CLEAR(p_i); + Py_CLEAR(q_i); + Py_CLEAR(term_i); } + + normal_exit: Py_DECREF(p_it); Py_DECREF(q_it); return total; + + err_exit: + Py_DECREF(p_it); + Py_DECREF(q_it); + Py_DECREF(total); + Py_XDECREF(p_i); + Py_XDECREF(q_i); + Py_XDECREF(term_i); + Py_XDECREF(new_total); + return NULL; } From 40cfc8ffb70bccb211e2292c6329ea1e585f1f20 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 1 Jan 2023 21:43:48 -0600 Subject: [PATCH 07/42] Handle errors arising during the multiplications or additions. --- Lib/test/test_math.py | 19 ++++++++++++++++++- Modules/mathmodule.c | 8 +++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 33695a932de770..7667049879551a 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1240,7 +1240,7 @@ def testSumProd(self): # Uneven lengths self.assertRaises(ValueError, sumprod, [10, 20], [30]) - # self.assertRaises(ValueError, sumprod, [10], [20, 30]) + self.assertRaises(ValueError, sumprod, [10], [20, 30]) # Error in iterator def raise_after(n): @@ -1252,6 +1252,23 @@ def raise_after(n): with self.assertRaises(RuntimeError): sumprod(raise_after(5), range(10)) + # Error in multiplication + class BadMultiply: + def __mul__(self, other): + raise RuntimeError + def __rmul__(self, other): + raise RuntimeError + with self.assertRaises(RuntimeError): + sumprod([10, BadMultiply(), 30], [1, 2, 3]) + with self.assertRaises(RuntimeError): + sumprod([1, 2, 3], [10, BadMultiply(), 30]) + + # Error in addition + with self.assertRaises(TypeError): + sumprod(['abc', 3], [5, 10]) + with self.assertRaises(TypeError): + sumprod([5, 10], ['abc', 3]) + def testModf(self): self.assertRaises(TypeError, math.modf) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index ab4b94dff82715..d16c11be4565b5 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2830,7 +2830,7 @@ Return the sum of products of values from two iterables p and q. Roughly equivalent to: - sum(itertools.starmap(operator.mul, zip(vec1, vec2, strict=True))) + sum(itertools.starmap(operator.mul, zip(p, q, strict=True))) For float and mixed int/float inputs, the products and and running sum are computed in quad precison and the result is rounded back @@ -2894,7 +2894,13 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) goto err_exit; } term_i = PyNumber_Multiply(p_i, q_i); + if (term_i == NULL) { + goto err_exit; + } new_total = PyNumber_Add(total, term_i); + if (new_total == NULL) { + goto err_exit; + } Py_SETREF(total, new_total); new_total = NULL; Py_CLEAR(p_i); From 2ac24f533ca3bfeb394f1cd43a0bf3adf672e4f3 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 1 Jan 2023 21:51:37 -0600 Subject: [PATCH 08/42] Test special values --- Lib/test/test_math.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 7667049879551a..06f8d019647b5a 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1269,6 +1269,17 @@ def __rmul__(self, other): with self.assertRaises(TypeError): sumprod([5, 10], ['abc', 3]) + # Special values should give the same as the pure python recipe + self.assertEqual(sumprod([10.1, math.inf], [20.2, 30.3]), math.inf) + self.assertEqual(sumprod([10.1, math.inf], [math.inf, 30.3]), math.inf) + self.assertEqual(sumprod([10.1, math.inf], [math.inf, math.inf]), math.inf) + self.assertEqual(sumprod([10.1, -math.inf], [20.2, 30.3]), -math.inf) + self.assertTrue(math.isnan(sumprod([10.1, math.inf], [-math.inf, math.inf]))) + self.assertTrue(math.isnan(sumprod([10.1, math.nan], [20.2, 30.3]))) + self.assertTrue(math.isnan(sumprod([10.1, math.inf], [math.nan, 30.3]))) + self.assertTrue(math.isnan(sumprod([10.1, math.inf], [20.3, math.nan]))) + + def testModf(self): self.assertRaises(TypeError, math.modf) From 1a64fd841ff4b80dfde2a3e1b2cca0b3447e8ad0 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 1 Jan 2023 21:54:55 -0600 Subject: [PATCH 09/42] Add blurb --- .../next/Library/2023-01-01-21-54-46.gh-issue-100485.geNrHS.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2023-01-01-21-54-46.gh-issue-100485.geNrHS.rst diff --git a/Misc/NEWS.d/next/Library/2023-01-01-21-54-46.gh-issue-100485.geNrHS.rst b/Misc/NEWS.d/next/Library/2023-01-01-21-54-46.gh-issue-100485.geNrHS.rst new file mode 100644 index 00000000000000..9f6e593113bb54 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-01-01-21-54-46.gh-issue-100485.geNrHS.rst @@ -0,0 +1 @@ +Add math.sumprod() to compute the sum of products. From 5f4e493cc6245ac527cd50e8e07efc1b3a81eb1d Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 1 Jan 2023 22:06:05 -0600 Subject: [PATCH 10/42] Regenerate clinic files --- Modules/clinic/mathmodule.c.h | 8 ++++---- Modules/mathmodule.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Modules/clinic/mathmodule.c.h b/Modules/clinic/mathmodule.c.h index 2072f530de0b8d..98bcd32ede70b4 100644 --- a/Modules/clinic/mathmodule.c.h +++ b/Modules/clinic/mathmodule.c.h @@ -337,13 +337,13 @@ PyDoc_STRVAR(math_sumprod__doc__, "sumprod($module, p, q, /)\n" "--\n" "\n" -"Return the sum of products of value from two iterables p and q.\n" +"Return the sum of products of values from two iterables p and q.\n" "\n" "Roughly equivalent to:\n" "\n" -" sum(itertools.starmap(operator.mul, zip(vec1, vec2, strict=True)))\n" +" sum(itertools.starmap(operator.mul, zip(p, q, strict=True)))\n" "\n" -"For float and mixed int/float inputs, the products and an running\n" +"For float and mixed int/float inputs, the products and and running\n" "sum are computed in quad precison and the result is rounded back\n" "to double precision."); @@ -955,4 +955,4 @@ math_ulp(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=ac2a3f407dadd9ab input=a9049054013a1b77]*/ +/*[clinic end generated code: output=267c6eb95ea9a8d8 input=a9049054013a1b77]*/ diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index d16c11be4565b5..30a0d7b7989daa 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2839,7 +2839,7 @@ to double precision. static PyObject * math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) -/*[clinic end generated code: output=6722dbfe60664554 input=43acbb5dc736a1f5]*/ +/*[clinic end generated code: output=6722dbfe60664554 input=6fc5c5e00f7b79d1]*/ { PyObject *p_it, *q_it, *total; PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL; From 31fa258d527b4b792cd564fe50f0ae37dc1c41c2 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 3 Jan 2023 11:32:06 -0600 Subject: [PATCH 11/42] Beautify with booleans. --- Modules/mathmodule.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 30a0d7b7989daa..ad3b0130c9646a 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -68,6 +68,7 @@ raised for division by zero and mod by zero. #include /* For _Py_log1p with workarounds for buggy handling of zeros. */ #include "_math.h" +#include #include "clinic/mathmodule.c.h" @@ -2843,6 +2844,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) { PyObject *p_it, *q_it, *total; PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL; + bool p_stopped = false, q_stopped = false; p_it = PyObject_GetIter(p); if (p_it == NULL) { @@ -2860,9 +2862,6 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) return NULL; } while (1) { - int p_stopped = 0; - int q_stopped = 0; - assert (p_i == NULL); assert (q_i == NULL); assert (term_i == NULL); @@ -2877,22 +2876,22 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) if (PyErr_Occurred()) { goto err_exit; } - p_stopped = 1; + p_stopped = true; } q_i = PyIter_Next(q_it); if (q_i == NULL) { if (PyErr_Occurred()) { goto err_exit; } - q_stopped = 1; - } - if (p_stopped && q_stopped) { - goto normal_exit; + q_stopped = true; } if (p_stopped != q_stopped) { PyErr_Format(PyExc_ValueError, "Inputs are not the same length"); goto err_exit; } + if (p_stopped && q_stopped) { + goto normal_exit; + } term_i = PyNumber_Multiply(p_i, q_i); if (term_i == NULL) { goto err_exit; From d19a38bf022e961b6a464b006adb372147425672 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 3 Jan 2023 12:14:18 -0600 Subject: [PATCH 12/42] Add int_path. Needs work on refcount and addition overflow. --- Modules/mathmodule.c | 57 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index ad3b0130c9646a..74e481327a4355 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2820,6 +2820,9 @@ For example, the hypotenuse of a 3/4/5 right triangle is:\n\ 5.0\n\ "); +/* Forward declaration */ +static inline int _check_long_mult_overflow(long a, long b); + /*[clinic input] math.sumprod @@ -2845,6 +2848,8 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) PyObject *p_it, *q_it, *total; PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL; bool p_stopped = false, q_stopped = false; + bool int_path_enabled = true, int_total_in_use = false; + long int_total = 0; p_it = PyObject_GetIter(p); if (p_it == NULL) { @@ -2862,6 +2867,8 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) return NULL; } while (1) { + bool finished, p_is_int, q_is_int; + assert (p_i == NULL); assert (q_i == NULL); assert (term_i == NULL); @@ -2889,7 +2896,55 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) PyErr_Format(PyExc_ValueError, "Inputs are not the same length"); goto err_exit; } - if (p_stopped && q_stopped) { + + finished = p_stopped && q_stopped; + p_is_int = PyLong_CheckExact(p_i); + q_is_int = PyLong_CheckExact(q_i); + + if (int_path_enabled) { + + if (!finished && p_is_int & q_is_int) { + int overflow; + long int_p, int_q; + + int_p = PyLong_AsLongAndOverflow(p_i, &overflow); + if (overflow) { + goto finalize_int_path; + } + int_q = PyLong_AsLongAndOverflow(q_i, &overflow); + if (overflow) { + goto finalize_int_path; + } + if (_check_long_mult_overflow(int_p, int_q)) { + goto finalize_int_path; + } + int_total += int_p * int_q; // XXX Check for addition overflow + int_total_in_use = true; + // Py_CLEAR(p_i); + // Py_CLEAR(q_i); + continue; + } + + finalize_int_path: + // # We're finished, overflowed, or have a non-int + int_path_enabled = false; + if (int_total_in_use) { + term_i = PyLong_FromLong(int_total); + if (term_i == NULL) { + goto err_exit; + } + new_total = PyNumber_Add(total, term_i); + if (new_total == NULL) { + goto err_exit; + } + Py_SETREF(total, new_total); + new_total = NULL; + //Py_CLEAR(term_i); + int_total_in_use = false; + } + } + + if (finished) { goto normal_exit; } term_i = PyNumber_Multiply(p_i, q_i); From a28fd716b415dbaf198b6f1ffb502483a1298312 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 3 Jan 2023 12:46:04 -0600 Subject: [PATCH 13/42] Add more assertions and refcount fixes --- Modules/mathmodule.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 74e481327a4355..1fbad4dd01f961 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2920,8 +2920,8 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) } int_total += int_p * int_q; // XXX Check for addition overflow int_total_in_use = true; - // Py_CLEAR(p_i); - // Py_CLEAR(q_i); + Py_CLEAR(p_i); + Py_CLEAR(q_i); continue; } @@ -2934,16 +2934,17 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) goto err_exit; } new_total = PyNumber_Add(total, term_i); + Py_CLEAR(term_i); if (new_total == NULL) { goto err_exit; } Py_SETREF(total, new_total); new_total = NULL; - //Py_CLEAR(term_i); int_total_in_use = false; } } + assert(!int_total_in_use); if (finished) { goto normal_exit; } From f998ee46a85d03854a04eab0dfd236f408b8f67d Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 3 Jan 2023 13:23:27 -0600 Subject: [PATCH 14/42] Only test for ints when iterators are not stopped. --- Modules/mathmodule.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 1fbad4dd01f961..8558a5aa7ec0a4 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2867,8 +2867,6 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) return NULL; } while (1) { - bool finished, p_is_int, q_is_int; - assert (p_i == NULL); assert (q_i == NULL); assert (term_i == NULL); @@ -2897,13 +2895,9 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) goto err_exit; } - finished = p_stopped && q_stopped; - p_is_int = PyLong_CheckExact(p_i); - q_is_int = PyLong_CheckExact(q_i); - if (int_path_enabled) { - if (!finished && p_is_int & q_is_int) { + if (!p_stopped && !q_stopped && PyLong_CheckExact(p_i) & PyLong_CheckExact(q_i)) { int overflow; long int_p, int_q; @@ -2934,18 +2928,18 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) goto err_exit; } new_total = PyNumber_Add(total, term_i); - Py_CLEAR(term_i); if (new_total == NULL) { goto err_exit; } Py_SETREF(total, new_total); new_total = NULL; + Py_CLEAR(term_i); int_total_in_use = false; } } assert(!int_total_in_use); - if (finished) { + if (p_stopped && q_stopped) { goto normal_exit; } term_i = PyNumber_Multiply(p_i, q_i); From 94a9cbd91254de87005d5ad9d178ce4a9d919fcc Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 3 Jan 2023 14:03:02 -0600 Subject: [PATCH 15/42] Test long addition for overflow --- Modules/mathmodule.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 8558a5aa7ec0a4..ec7921bdb2a78f 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2823,6 +2823,12 @@ For example, the hypotenuse of a 3/4/5 right triangle is:\n\ /* Forward declaration */ static inline int _check_long_mult_overflow(long a, long b); +static inline bool +long_add_will_overflow(long a, long b) +{ + return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a); +} + /*[clinic input] math.sumprod @@ -2899,7 +2905,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) if (!p_stopped && !q_stopped && PyLong_CheckExact(p_i) & PyLong_CheckExact(q_i)) { int overflow; - long int_p, int_q; + long int_p, int_q, int_prod; int_p = PyLong_AsLongAndOverflow(p_i, &overflow); if (overflow) { @@ -2912,7 +2918,11 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) if (_check_long_mult_overflow(int_p, int_q)) { goto finalize_int_path; } - int_total += int_p * int_q; // XXX Check for addition overflow + int_prod = int_p * int_q; + if (long_add_will_overflow(int_total, int_prod)) { + goto finalize_int_path; + } + int_total += int_prod; int_total_in_use = true; Py_CLEAR(p_i); Py_CLEAR(q_i); From c7788657d1258381d669a41a500198f5626f0ddb Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 3 Jan 2023 14:13:17 -0600 Subject: [PATCH 16/42] Improve wording regarding extended precision. --- Doc/library/math.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Doc/library/math.rst b/Doc/library/math.rst index 2c38c44af260ff..12ed5806870a7b 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -301,9 +301,8 @@ Number-theoretic and representation functions sum(itertools.starmap(operator.mul, zip(p, q, strict=true))) - For float and mixed int/float inputs, the products and and running - sum are computed in quad precison and the result is rounded back - to double precision. + For float and mixed int/float inputs, the intermediate products and + and sums are computed in extended precision. .. versionadded:: 3.12 From 6d619dd0899e81cdb54718930980add65a4f1dca Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 3 Jan 2023 14:18:15 -0600 Subject: [PATCH 17/42] Add the "finished" summary variable for readability. --- Modules/mathmodule.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index ec7921bdb2a78f..db0138d831ca9b 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2873,6 +2873,8 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) return NULL; } while (1) { + bool finished; + assert (p_i == NULL); assert (q_i == NULL); assert (term_i == NULL); @@ -2900,10 +2902,11 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) PyErr_Format(PyExc_ValueError, "Inputs are not the same length"); goto err_exit; } + finished = p_stopped & q_stopped; if (int_path_enabled) { - if (!p_stopped && !q_stopped && PyLong_CheckExact(p_i) & PyLong_CheckExact(q_i)) { + if (!finished && PyLong_CheckExact(p_i) & PyLong_CheckExact(q_i)) { int overflow; long int_p, int_q, int_prod; @@ -2949,7 +2952,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) } assert(!int_total_in_use); - if (p_stopped && q_stopped) { + if (finished) { goto normal_exit; } term_i = PyNumber_Multiply(p_i, q_i); From 83dbf96c800a8c1aadc37ed8c3caab8b8a79d3cd Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 3 Jan 2023 18:07:05 -0600 Subject: [PATCH 18/42] Add flt path --- Modules/mathmodule.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index db0138d831ca9b..ee3f7f009ac5b9 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2856,6 +2856,8 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) bool p_stopped = false, q_stopped = false; bool int_path_enabled = true, int_total_in_use = false; long int_total = 0; + bool flt_path_enabled = true, flt_total_in_use = false; + double flt_total = 0.0; p_it = PyObject_GetIter(p); if (p_it == NULL) { @@ -2951,6 +2953,38 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) } } + if (flt_path_enabled) { + if (!finished && PyFloat_CheckExact(p_i) & PyFloat_CheckExact(q_i)) { + double flt_p = PyFloat_AS_DOUBLE(p_i); + double flt_q = PyFloat_AS_DOUBLE(q_i); + double new_flt_total = flt_total + flt_p * flt_q; + if (isfinite(new_flt_total)) { + flt_total = new_flt_total; + flt_total_in_use = true; + Py_CLEAR(p_i); + Py_CLEAR(q_i); + continue; + } + // For non-finite values arises, fallback to the slow path + } + // We're finished, overflowed, have a non-float, or had a non-finite value + flt_path_enabled = false; + if (flt_total_in_use) { + term_i = PyFloat_FromDouble(flt_total); + if (term_i == NULL) { + goto err_exit; + } + new_total = PyNumber_Add(total, term_i); + if (new_total == NULL) { + goto err_exit; + } + Py_SETREF(total, new_total); + new_total = NULL; + Py_CLEAR(term_i); + flt_total_in_use = false; + } + } + assert(!int_total_in_use); if (finished) { goto normal_exit; From c2e5f3db8a3347f6eedc29f0f84940422f8f544f Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 3 Jan 2023 20:40:57 -0600 Subject: [PATCH 19/42] add flt/int path --- Modules/mathmodule.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index ee3f7f009ac5b9..54cbf3ce48460a 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2954,9 +2954,31 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) } if (flt_path_enabled) { - if (!finished && PyFloat_CheckExact(p_i) & PyFloat_CheckExact(q_i)) { - double flt_p = PyFloat_AS_DOUBLE(p_i); - double flt_q = PyFloat_AS_DOUBLE(q_i); + + if (!finished) { + double flt_p, flt_q; + bool p_type_float = PyFloat_CheckExact(p_i); + bool q_type_float = PyFloat_CheckExact(q_i); + if (p_type_float && q_type_float) { + flt_p = PyFloat_AS_DOUBLE(p_i); + flt_q = PyFloat_AS_DOUBLE(q_i); + } else if (p_type_float && PyLong_CheckExact(q_i)) { + flt_p = PyFloat_AS_DOUBLE(p_i); + flt_q = PyLong_AsDouble(q_i); + if (flt_q == -1.0 && PyErr_Occurred()) { + PyErr_Clear(); + goto finalize_flt_path; + } + } else if (q_type_float && PyLong_CheckExact(p_i)) { + flt_q = PyFloat_AS_DOUBLE(q_i); + flt_p = PyLong_AsDouble(p_i); + if (flt_p == -1.0 && PyErr_Occurred()) { + PyErr_Clear(); + goto finalize_flt_path; + } + } else { + goto finalize_flt_path; + } double new_flt_total = flt_total + flt_p * flt_q; if (isfinite(new_flt_total)) { flt_total = new_flt_total; @@ -2965,8 +2987,10 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) Py_CLEAR(q_i); continue; } - // For non-finite values arises, fallback to the slow path + // For non-finite values arises, fallback to pure Python arithmetic } + + finalize_flt_path: // We're finished, overflowed, have a non-float, or had a non-finite value flt_path_enabled = false; if (flt_total_in_use) { From b8c9127c59f73ecd4ccde7381ea29ae319bd8d03 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 3 Jan 2023 20:56:16 -0600 Subject: [PATCH 20/42] . --- Modules/mathmodule.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 54cbf3ce48460a..996544031f6b41 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2987,11 +2987,10 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) Py_CLEAR(q_i); continue; } - // For non-finite values arises, fallback to pure Python arithmetic } finalize_flt_path: - // We're finished, overflowed, have a non-float, or had a non-finite value + // We're finished, overflowed, have a non-float, or got a non-finite value flt_path_enabled = false; if (flt_total_in_use) { term_i = PyFloat_FromDouble(flt_total); From 4f22a7fb76cd1f7115535360a3ed88ddf8724a55 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 4 Jan 2023 11:22:13 -0600 Subject: [PATCH 21/42] Update docs and docstrings. --- Doc/library/math.rst | 4 ++-- Modules/clinic/mathmodule.c.h | 7 +++---- Modules/mathmodule.c | 7 +++---- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/Doc/library/math.rst b/Doc/library/math.rst index 12ed5806870a7b..91bdf38ea9b79d 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -295,9 +295,9 @@ Number-theoretic and representation functions Return the sum of products of values from two iterables *p* and *q*. - raises :exc:`valueerror` if the inputs do not have the same length. + Raises :exc:`ValueError` if the inputs do not have the same length. - roughly equivalent to:: + Roughly equivalent to:: sum(itertools.starmap(operator.mul, zip(p, q, strict=true))) diff --git a/Modules/clinic/mathmodule.c.h b/Modules/clinic/mathmodule.c.h index 98bcd32ede70b4..1f9725883b9820 100644 --- a/Modules/clinic/mathmodule.c.h +++ b/Modules/clinic/mathmodule.c.h @@ -343,9 +343,8 @@ PyDoc_STRVAR(math_sumprod__doc__, "\n" " sum(itertools.starmap(operator.mul, zip(p, q, strict=True)))\n" "\n" -"For float and mixed int/float inputs, the products and and running\n" -"sum are computed in quad precison and the result is rounded back\n" -"to double precision."); +"For float and mixed int/float inputs, the intermediate products\n" +"and sums are computed with extended precision."); #define MATH_SUMPROD_METHODDEF \ {"sumprod", _PyCFunction_CAST(math_sumprod), METH_FASTCALL, math_sumprod__doc__}, @@ -955,4 +954,4 @@ math_ulp(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=267c6eb95ea9a8d8 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=899211ec70e4506c input=a9049054013a1b77]*/ diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 996544031f6b41..b832f64fe4d985 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2842,14 +2842,13 @@ Roughly equivalent to: sum(itertools.starmap(operator.mul, zip(p, q, strict=True))) -For float and mixed int/float inputs, the products and and running -sum are computed in quad precison and the result is rounded back -to double precision. +For float and mixed int/float inputs, the intermediate products +and sums are computed with extended precision. [clinic start generated code]*/ static PyObject * math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) -/*[clinic end generated code: output=6722dbfe60664554 input=6fc5c5e00f7b79d1]*/ +/*[clinic end generated code: output=6722dbfe60664554 input=82be54fe26f87e30]*/ { PyObject *p_it, *q_it, *total; PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL; From 7d7fd875395fb7303d1b943d7b558cda37560600 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 4 Jan 2023 12:10:38 -0600 Subject: [PATCH 22/42] Fix typo --- Doc/library/math.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/math.rst b/Doc/library/math.rst index 91bdf38ea9b79d..0e888c4d4e423f 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -301,8 +301,8 @@ Number-theoretic and representation functions sum(itertools.starmap(operator.mul, zip(p, q, strict=true))) - For float and mixed int/float inputs, the intermediate products and - and sums are computed in extended precision. + For float and mixed int/float inputs, the intermediate products + and sums are computed with extended precision. .. versionadded:: 3.12 From c16f90a96db6c9c4984646f249e0d347f11cce32 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 4 Jan 2023 16:36:31 -0600 Subject: [PATCH 23/42] Use extended precision for float/float and int/float cases. --- Lib/test/test_math.py | 10 ++++++ Modules/mathmodule.c | 71 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 06f8d019647b5a..b9ed677ce9f461 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1280,6 +1280,16 @@ def __rmul__(self, other): self.assertTrue(math.isnan(sumprod([10.1, math.inf], [20.3, math.nan]))) + @requires_IEEE_754 + @unittest.skipIf(HAVE_DOUBLE_ROUNDING, + "sumprod() accuracy not guaranteed on machines with double rounding") + @support.cpython_only # Other implementations may choose a different algorithm + def test_sumprod_accuracy(self): + sumprod = math.sumprod + self.assertEqual(sumprod([0.1] * 10, [1]*10), 1.0) + self.assertEqual(sumprod([1.0, 10E100, 1.0, -10E100], [1.0]*4), 2.0) + + def testModf(self): self.assertRaises(TypeError, math.modf) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index b832f64fe4d985..b7a3c259fc3067 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2820,6 +2820,8 @@ For example, the hypotenuse of a 3/4/5 right triangle is:\n\ 5.0\n\ "); +/** sumprod() ***************************************************************/ + /* Forward declaration */ static inline int _check_long_mult_overflow(long a, long b); @@ -2829,6 +2831,65 @@ long_add_will_overflow(long a, long b) return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a); } +/* +Double length extended precision floating point arithmetic +based on ideas from three sources: + + Improved Kahan–Babuška algorithm by Arnold Neumaier + https://www.mat.univie.ac.at/~neum/scan/01.pdf + + A Floating-Point Technique for Extending the Available Precision + by T. J. Dekker + https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf + + Ultimately Fast Accurate Summation by Siegfried M. Rump + https://www.tuhh.de/ti3/paper/rump/Ru08b.pdf + +dl_split() exactly splits a double into two half precision components. +dl_neumaier() performs compensated summation to keep a running total. +dl_fma() implements an extended precision fused-multiply-add. + + */ + +struct DoubleLengthFloat { double hi; double lo; }; +typedef struct DoubleLengthFloat DoubleLength; + +static inline DoubleLength +dl_split(double x) { + const double VELTKAMP_CONSTANT = 134217729.0; /* float(0x8000001) */ + double t = x * VELTKAMP_CONSTANT, hi = t - (t - x), lo = x - hi; + DoubleLength result = {hi, lo}; + return result; +} + +static inline DoubleLength +dl_neumaier(DoubleLength total, double x) +{ + double t = total.hi + x; + if (fabs(total.hi) >= fabs(x)) { + total.lo += (total.hi - t) + x; + } else { + total.lo += (x - t) + total.hi; + } + total.hi = t; + return total; +} + +static inline DoubleLength +dl_fma(DoubleLength total, double p, double q) +{ + DoubleLength pp = dl_split(p); + DoubleLength qq = dl_split(q); + total = dl_neumaier(total, pp.hi * qq.hi); + total = dl_neumaier(total, pp.hi * qq.lo); + total = dl_neumaier(total, pp.lo * qq.hi); + total = dl_neumaier(total, pp.lo * qq.lo); + return total; + // XXX Possibly leverage instruction level parallelization by + // keeping separate accumulators and only combined them after + // all of the dl_fma() calls. +} + /*[clinic input] math.sumprod @@ -2856,7 +2917,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) bool int_path_enabled = true, int_total_in_use = false; long int_total = 0; bool flt_path_enabled = true, flt_total_in_use = false; - double flt_total = 0.0; + DoubleLength flt_total = {0.0, 0.0}; p_it = PyObject_GetIter(p); if (p_it == NULL) { @@ -2948,6 +3009,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) Py_SETREF(total, new_total); new_total = NULL; Py_CLEAR(term_i); + int_total = 0; // An ounce of prevention, ... int_total_in_use = false; } } @@ -2978,8 +3040,8 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) } else { goto finalize_flt_path; } - double new_flt_total = flt_total + flt_p * flt_q; - if (isfinite(new_flt_total)) { + DoubleLength new_flt_total = dl_fma(flt_total, flt_p, flt_q); + if (isfinite(new_flt_total.hi)) { flt_total = new_flt_total; flt_total_in_use = true; Py_CLEAR(p_i); @@ -2992,7 +3054,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) // We're finished, overflowed, have a non-float, or got a non-finite value flt_path_enabled = false; if (flt_total_in_use) { - term_i = PyFloat_FromDouble(flt_total); + term_i = PyFloat_FromDouble(flt_total.hi + flt_total.lo); if (term_i == NULL) { goto err_exit; } @@ -3003,6 +3065,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) Py_SETREF(total, new_total); new_total = NULL; Py_CLEAR(term_i); + flt_total = (DoubleLength) {0.0, 0.0}; flt_total_in_use = false; } } From c52bb8426789989cfc6aac51b211c130be6c245f Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 4 Jan 2023 16:47:25 -0600 Subject: [PATCH 24/42] Brevity is the soul of wit. --- Modules/mathmodule.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index b7a3c259fc3067..4f0186d547f8e0 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2858,8 +2858,7 @@ static inline DoubleLength dl_split(double x) { const double VELTKAMP_CONSTANT = 134217729.0; /* float(0x8000001) */ double t = x * VELTKAMP_CONSTANT, hi = t - (t - x), lo = x - hi; - DoubleLength result = {hi, lo}; - return result; + return (DoubleLength) {hi, lo}; } static inline DoubleLength @@ -2883,8 +2882,7 @@ dl_fma(DoubleLength total, double p, double q) total = dl_neumaier(total, pp.hi * qq.hi); total = dl_neumaier(total, pp.hi * qq.lo); total = dl_neumaier(total, pp.lo * qq.hi); - total = dl_neumaier(total, pp.lo * qq.lo); - return total; + return dl_neumaier(total, pp.lo * qq.lo); // XXX Possibly leverage instruction level parallelization by // keeping separate accumulators and only combined them after // all of the dl_fma() calls. From bf8059c217fda458b29497c2e42f867016c087a2 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 4 Jan 2023 17:04:51 -0600 Subject: [PATCH 25/42] Beautification. --- Modules/mathmodule.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 4f0186d547f8e0..91974b5ddfb7f4 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2857,7 +2857,9 @@ typedef struct DoubleLengthFloat DoubleLength; static inline DoubleLength dl_split(double x) { const double VELTKAMP_CONSTANT = 134217729.0; /* float(0x8000001) */ - double t = x * VELTKAMP_CONSTANT, hi = t - (t - x), lo = x - hi; + double t = x * VELTKAMP_CONSTANT; + double hi = t - (t - x); + double lo = x - hi; return (DoubleLength) {hi, lo}; } @@ -2883,9 +2885,6 @@ dl_fma(DoubleLength total, double p, double q) total = dl_neumaier(total, pp.hi * qq.lo); total = dl_neumaier(total, pp.lo * qq.hi); return dl_neumaier(total, pp.lo * qq.lo); - // XXX Possibly leverage instruction level parallelization by - // keeping separate accumulators and only combined them after - // all of the dl_fma() calls. } /*[clinic input] From 1326e83764344aca04d496b9d96338e54c0888a9 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 4 Jan 2023 22:05:28 -0600 Subject: [PATCH 26/42] Various minor improvments --- Modules/mathmodule.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 91974b5ddfb7f4..f5f598a3730aa8 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2826,7 +2826,7 @@ For example, the hypotenuse of a 3/4/5 right triangle is:\n\ static inline int _check_long_mult_overflow(long a, long b); static inline bool -long_add_will_overflow(long a, long b) +long_add_would_overflow(long a, long b) { return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a); } @@ -2846,7 +2846,7 @@ based on ideas from three sources: https://www.tuhh.de/ti3/paper/rump/Ru08b.pdf dl_split() exactly splits a double into two half precision components. -dl_neumaier() performs compensated summation to keep a running total. +dl_add() performs compensated summation to keep a running total. dl_fma() implements an extended precision fused-multiply-add. */ @@ -2864,7 +2864,7 @@ dl_split(double x) { } static inline DoubleLength -dl_neumaier(DoubleLength total, double x) +dl_add(DoubleLength total, double x) { double t = total.hi + x; if (fabs(total.hi) >= fabs(x)) { @@ -2881,10 +2881,10 @@ dl_fma(DoubleLength total, double p, double q) { DoubleLength pp = dl_split(p); DoubleLength qq = dl_split(q); - total = dl_neumaier(total, pp.hi * qq.hi); - total = dl_neumaier(total, pp.hi * qq.lo); - total = dl_neumaier(total, pp.lo * qq.hi); - return dl_neumaier(total, pp.lo * qq.lo); + total = dl_add(total, pp.hi * qq.hi); + total = dl_add(total, pp.hi * qq.lo); + total = dl_add(total, pp.lo * qq.hi); + return dl_add(total, pp.lo * qq.lo); } /*[clinic input] @@ -2981,7 +2981,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) goto finalize_int_path; } int_prod = int_p * int_q; - if (long_add_will_overflow(int_total, int_prod)) { + if (long_add_would_overflow(int_total, int_prod)) { goto finalize_int_path; } int_total += int_prod; @@ -3021,6 +3021,10 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) flt_p = PyFloat_AS_DOUBLE(p_i); flt_q = PyFloat_AS_DOUBLE(q_i); } else if (p_type_float && PyLong_CheckExact(q_i)) { + /* We care about float/int pairs and int/float pairs because + they arise naturally in several use cases such as price + vs quantity, data multiplied by integer weights, or data + selected by a vector of bools. */ flt_p = PyFloat_AS_DOUBLE(p_i); flt_q = PyLong_AsDouble(q_i); if (flt_q == -1.0 && PyErr_Occurred()) { @@ -3034,6 +3038,20 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) PyErr_Clear(); goto finalize_flt_path; } + } else if (flt_total_in_use && PyLong_CheckExact(p_i) && PyLong_CheckExact(q_i)) { + /* This path makes sure that we don't throw-away a double + length float accumulation if a subsequent int/int pair is + encountered. */ + flt_p = PyLong_AsDouble(p_i); + if (flt_p == -1.0 && PyErr_Occurred()) { + PyErr_Clear(); + goto finalize_flt_path; + } + flt_q = PyLong_AsDouble(q_i); + if (flt_q == -1.0 && PyErr_Occurred()) { + PyErr_Clear(); + goto finalize_flt_path; + } } else { goto finalize_flt_path; } From 9b034686b685a00ec9adc0632ee19400523c6743 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 4 Jan 2023 22:26:16 -0600 Subject: [PATCH 27/42] Fully encapsulate the dl (double length) logic. --- Modules/mathmodule.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index f5f598a3730aa8..4d5f9bb2db6cea 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2845,15 +2845,23 @@ based on ideas from three sources: Ultimately Fast Accurate Summation by Siegfried M. Rump https://www.tuhh.de/ti3/paper/rump/Ru08b.pdf +dl_zero() returns an extended precision zero dl_split() exactly splits a double into two half precision components. dl_add() performs compensated summation to keep a running total. dl_fma() implements an extended precision fused-multiply-add. +dl_to_d() converts from extended precision to double precision. - */ +*/ struct DoubleLengthFloat { double hi; double lo; }; typedef struct DoubleLengthFloat DoubleLength; +static inline DoubleLength +dl_zero() +{ + return (DoubleLength) {0.0, 0.0}; +} + static inline DoubleLength dl_split(double x) { const double VELTKAMP_CONSTANT = 134217729.0; /* float(0x8000001) */ @@ -2887,6 +2895,12 @@ dl_fma(DoubleLength total, double p, double q) return dl_add(total, pp.lo * qq.lo); } +static inline double +dl_to_d(DoubleLength total) +{ + return total.hi + total.lo; +} + /*[clinic input] math.sumprod @@ -2914,7 +2928,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) bool int_path_enabled = true, int_total_in_use = false; long int_total = 0; bool flt_path_enabled = true, flt_total_in_use = false; - DoubleLength flt_total = {0.0, 0.0}; + DoubleLength flt_total = dl_zero(); p_it = PyObject_GetIter(p); if (p_it == NULL) { @@ -3069,7 +3083,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) // We're finished, overflowed, have a non-float, or got a non-finite value flt_path_enabled = false; if (flt_total_in_use) { - term_i = PyFloat_FromDouble(flt_total.hi + flt_total.lo); + term_i = PyFloat_FromDouble(dl_to_d(flt_total)); if (term_i == NULL) { goto err_exit; } @@ -3080,7 +3094,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) Py_SETREF(total, new_total); new_total = NULL; Py_CLEAR(term_i); - flt_total = (DoubleLength) {0.0, 0.0}; + flt_total = dl_zero(); flt_total_in_use = false; } } From 8b88f4d21bacca74289256bc88994ee75dfa6fdb Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 4 Jan 2023 23:21:54 -0600 Subject: [PATCH 28/42] Add performance comment. --- Modules/mathmodule.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 4d5f9bb2db6cea..ba565e2a56035c 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2851,6 +2851,18 @@ dl_add() performs compensated summation to keep a running total. dl_fma() implements an extended precision fused-multiply-add. dl_to_d() converts from extended precision to double precision. +The double length routines allow for quite a bit of instruction +level parallelism. On a 3.22 Ghz Apple M1 Max, the incremental +cost of increasing the input vector sizes by one adds 8.75ns +which is about 28 clock cycles. This includes the time for +fetching floats from the iterator, decoding them into C doubles, +and managing their reference counts. + +The total function running time for input vectors of size 100 +is 946 nsec. This includes 71 nsec fixed costs for call overhead, +creating the iterators, building the float object result, +and managing the reference counts. + */ struct DoubleLengthFloat { double hi; double lo; }; From 83ca9299424d3f2c5e9ce6e15c464da02dcf00ab Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 5 Jan 2023 00:17:55 -0600 Subject: [PATCH 29/42] Write tight. --- Modules/mathmodule.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index ba565e2a56035c..05b53b42a646ac 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2845,24 +2845,17 @@ based on ideas from three sources: Ultimately Fast Accurate Summation by Siegfried M. Rump https://www.tuhh.de/ti3/paper/rump/Ru08b.pdf +The double length routines allow for quite a bit of instruction +level parallelism. On a 3.22 Ghz Apple M1 Max, the incremental +cost of increasing the input vector size by one is 8.75ns which +is about 28 clock cycles for everything including + dl_zero() returns an extended precision zero dl_split() exactly splits a double into two half precision components. dl_add() performs compensated summation to keep a running total. dl_fma() implements an extended precision fused-multiply-add. dl_to_d() converts from extended precision to double precision. -The double length routines allow for quite a bit of instruction -level parallelism. On a 3.22 Ghz Apple M1 Max, the incremental -cost of increasing the input vector sizes by one adds 8.75ns -which is about 28 clock cycles. This includes the time for -fetching floats from the iterator, decoding them into C doubles, -and managing their reference counts. - -The total function running time for input vectors of size 100 -is 946 nsec. This includes 71 nsec fixed costs for call overhead, -creating the iterators, building the float object result, -and managing the reference counts. - */ struct DoubleLengthFloat { double hi; double lo; }; From 708f3149d6acdc90cd059fa6d3f73c035f9cf00a Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 5 Jan 2023 00:36:48 -0600 Subject: [PATCH 30/42] Abbreviate comment. --- Modules/mathmodule.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 05b53b42a646ac..8651481c2de7cc 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2847,8 +2847,7 @@ based on ideas from three sources: The double length routines allow for quite a bit of instruction level parallelism. On a 3.22 Ghz Apple M1 Max, the incremental -cost of increasing the input vector size by one is 8.75ns which -is about 28 clock cycles for everything including +cost of increasing the input vector size by one is 8.75ns. dl_zero() returns an extended precision zero dl_split() exactly splits a double into two half precision components. From 90f1ee4b972956a090e270fcdf0b64a7f485fc2b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 5 Jan 2023 02:54:10 -0600 Subject: [PATCH 31/42] Remove case for int/int while float in use. --- Lib/test/test_math.py | 4 ++++ Modules/mathmodule.c | 14 -------------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index b9ed677ce9f461..2b723efe532708 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1279,6 +1279,10 @@ def __rmul__(self, other): self.assertTrue(math.isnan(sumprod([10.1, math.inf], [math.nan, 30.3]))) self.assertTrue(math.isnan(sumprod([10.1, math.inf], [20.3, math.nan]))) + # Error cases that arose during development + args = ((-5, -5, 10), (1.5, 4611686018427387904, 2305843009213693952)) + self.assertEqual(sumprod(*args), 0.0) + @requires_IEEE_754 @unittest.skipIf(HAVE_DOUBLE_ROUNDING, diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 8651481c2de7cc..dfe8ffbf266309 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -3056,20 +3056,6 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) PyErr_Clear(); goto finalize_flt_path; } - } else if (flt_total_in_use && PyLong_CheckExact(p_i) && PyLong_CheckExact(q_i)) { - /* This path makes sure that we don't throw-away a double - length float accumulation if a subsequent int/int pair is - encountered. */ - flt_p = PyLong_AsDouble(p_i); - if (flt_p == -1.0 && PyErr_Occurred()) { - PyErr_Clear(); - goto finalize_flt_path; - } - flt_q = PyLong_AsDouble(q_i); - if (flt_q == -1.0 && PyErr_Occurred()) { - PyErr_Clear(); - goto finalize_flt_path; - } } else { goto finalize_flt_path; } From 02035dcac720c7e887ea72e1613f1dd751f6f6cc Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 5 Jan 2023 12:28:18 -0600 Subject: [PATCH 32/42] Beautify dl_add() --- Modules/mathmodule.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index dfe8ffbf266309..9d19dde5cdf96e 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2878,14 +2878,14 @@ dl_split(double x) { static inline DoubleLength dl_add(DoubleLength total, double x) { - double t = total.hi + x; + double s = total.hi + x; + double c = total.lo; if (fabs(total.hi) >= fabs(x)) { - total.lo += (total.hi - t) + x; + c += (total.hi - s) + x; } else { - total.lo += (x - t) + total.hi; + c += (x - s) + total.hi; } - total.hi = t; - return total; + return (DoubleLength) {s, c}; } static inline DoubleLength From 9b1edcc008f26e440e33c3868066b4b66e09c5e7 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 5 Jan 2023 16:13:40 -0600 Subject: [PATCH 33/42] Add stress tests. --- Lib/test/test_math.py | 72 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 2b723efe532708..3e8765b0694185 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1293,6 +1293,78 @@ def test_sumprod_accuracy(self): self.assertEqual(sumprod([0.1] * 10, [1]*10), 1.0) self.assertEqual(sumprod([1.0, 10E100, 1.0, -10E100], [1.0]*4), 2.0) + def test_sumprod_stress(self): + sumprod = math.sumprod + product = itertools.product + Decimal = decimal.Decimal + Fraction = fractions.Fraction + + class Int(int): + def __add__(self, other): + return Int(int(self) + int(other)) + def __mul__(self, other): + return Int(int(self) * int(other)) + __radd__ = __add__ + __rmul__ = __mul__ + def __repr__(self): + return f'Int({int(self)})' + + class Flt(float): + def __add__(self, other): + return Int(int(self) + int(other)) + def __mul__(self, other): + return Int(int(self) * int(other)) + __radd__ = __add__ + __rmul__ = __mul__ + def __repr__(self): + return f'Flt({int(self)})' + + def baseline_sumprod(p, q): + """This defines the target behavior including expections and special values. + However, it is subject to rounding errors, so float inputs should be exactly + representable with only a few bits. + """ + total = 0 + for p_i, q_i in zip(p, q, strict=True): + total += p_i * q_i + return total + + def run(func, *args): + "Make comparing functions easier. Returns error status, type, and result." + try: + result = func(*args) + except (AssertionError, NameError): + raise + except Exception as e: + return type(e), None, 'None' + return None, type(result), repr(result) + + pools = [ + (-5, 10, -2**20, 2**31, 2**40, 2**61, 2**62, 2**80, 1.5, Int(7)), + (5.25, -3.5, 4.75, 11.25, 400.5, 0.046875, 0.25, -1.0, -0.078125), + (-19.0*2**500, 11*2**1000, -3*2**1500, 17*2*333, + 5.25, -3.25, -3.0*2**(-333), 3, 2**513), + (3.75, 2.5, -1.5, float('inf'), -float('inf'), float('NaN'), 14, + 9, 3+4j, Flt(13), 0.0), + (13.25, -4.25, Decimal('10.5'), Decimal('-2.25'), Fraction(13, 8), + Fraction(-11, 16), 4.75 + 0.125j, 97, -41, Int(3)), + (Decimal('6.125'), Decimal('12.375'), Decimal('-2.75'), Decimal(0), + Decimal('Inf'), -Decimal('Inf'), Decimal('NaN'), 12, 13.5), + (-2.0 ** -1000, 11*2**1000, 3, 7, -37*2**32, -2*2**-537, -2*2**-538, + 2*2**-513), + (-7 * 2.0 ** -510, 5 * 2.0 ** -520, 17, -19.0, -6.25), + ] + + for pool in pools: + for size in range(4): + for args1 in product(pool, repeat=size): + for args2 in product(pool, repeat=size): + args = (args1, args2) + self.assertEqual( + run(baseline_sumprod, *args), + run(sumprod, *args), + args, + ) def testModf(self): self.assertRaises(TypeError, math.modf) From 22249d67bc3914c63dc3aa09c296592f586e4628 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 5 Jan 2023 16:17:55 -0600 Subject: [PATCH 34/42] Update itertools recipe to reflect the promotion. --- Doc/library/itertools.rst | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 4ad679dfccdbb4..5c55087792b191 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -33,7 +33,7 @@ by combining :func:`map` and :func:`count` to form ``map(f, count())``. These tools and their built-in counterparts also work well with the high-speed functions in the :mod:`operator` module. For example, the multiplication operator can be mapped across two vectors to form an efficient dot-product: -``sum(map(operator.mul, vector1, vector2))``. +``sum(starmap(operator.mul, zip(vec1, vec2, strict=True)))``. **Infinite iterators:** @@ -838,10 +838,6 @@ which incur interpreter overhead. "Returns the sequence elements n times" return chain.from_iterable(repeat(tuple(iterable), n)) - def dotproduct(vec1, vec2): - "Compute a sum of products." - return sum(starmap(operator.mul, zip(vec1, vec2, strict=True))) - def convolve(signal, kernel): # See: https://betterexplained.com/articles/intuitive-convolution/ # convolve(data, [0.25, 0.25, 0.25, 0.25]) --> Moving average (blur) @@ -852,7 +848,7 @@ which incur interpreter overhead. window = collections.deque([0], maxlen=n) * n for x in chain(signal, repeat(0, n-1)): window.append(x) - yield dotproduct(kernel, window) + yield math.sumprod(kernel, window) def polynomial_from_roots(roots): """Compute a polynomial's coefficients from its roots. From ec50404a689e08181450e4a74fcd0dc6b3f87e7e Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 5 Jan 2023 16:39:58 -0600 Subject: [PATCH 35/42] Remove doctest for dotproduct(). --- Doc/library/itertools.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 5c55087792b191..e7d2e13626fa52 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -1207,9 +1207,6 @@ which incur interpreter overhead. >>> list(ncycles('abc', 3)) ['a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c'] - >>> dotproduct([1,2,3], [4,5,6]) - 32 - >>> data = [20, 40, 24, 32, 20, 28, 16] >>> list(convolve(data, [0.25, 0.25, 0.25, 0.25])) [5.0, 15.0, 21.0, 29.0, 29.0, 26.0, 24.0, 16.0, 11.0, 4.0] From 19dfd08fcce4a519d79a76d71ef6dab598c1077e Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 6 Jan 2023 09:43:37 -0600 Subject: [PATCH 36/42] Speed-up iterator access. Let float be selected by bools. --- Lib/test/test_math.py | 2 ++ Modules/mathmodule.c | 37 ++++++++++++++++++++++--------------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 3e8765b0694185..65fe169671eae2 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1291,6 +1291,7 @@ def __rmul__(self, other): def test_sumprod_accuracy(self): sumprod = math.sumprod self.assertEqual(sumprod([0.1] * 10, [1]*10), 1.0) + self.assertEqual(sumprod([0.1] * 20, [True, False] * 10), 1.0) self.assertEqual(sumprod([1.0, 10E100, 1.0, -10E100], [1.0]*4), 2.0) def test_sumprod_stress(self): @@ -1353,6 +1354,7 @@ def run(func, *args): (-2.0 ** -1000, 11*2**1000, 3, 7, -37*2**32, -2*2**-537, -2*2**-538, 2*2**-513), (-7 * 2.0 ** -510, 5 * 2.0 ** -520, 17, -19.0, -6.25), + (11.25, -3.75, -0.625, 23.375, True, False, 7, Int(5)), ] for pool in pools: diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 9d19dde5cdf96e..2170864c032f88 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2847,7 +2847,7 @@ based on ideas from three sources: The double length routines allow for quite a bit of instruction level parallelism. On a 3.22 Ghz Apple M1 Max, the incremental -cost of increasing the input vector size by one is 8.75ns. +cost of increasing the input vector size by one is 7.4ns. dl_zero() returns an extended precision zero dl_split() exactly splits a double into two half precision components. @@ -2857,8 +2857,7 @@ dl_to_d() converts from extended precision to double precision. */ -struct DoubleLengthFloat { double hi; double lo; }; -typedef struct DoubleLengthFloat DoubleLength; +typedef struct{ double hi; double lo; } DoubleLength; static inline DoubleLength dl_zero() @@ -2868,8 +2867,7 @@ dl_zero() static inline DoubleLength dl_split(double x) { - const double VELTKAMP_CONSTANT = 134217729.0; /* float(0x8000001) */ - double t = x * VELTKAMP_CONSTANT; + double t = x * 134217729.0; /* Veltkamp constant = float(0x8000001) */ double hi = t - (t - x); double lo = x - hi; return (DoubleLength) {hi, lo}; @@ -2926,12 +2924,13 @@ static PyObject * math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) /*[clinic end generated code: output=6722dbfe60664554 input=82be54fe26f87e30]*/ { - PyObject *p_it, *q_it, *total; PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL; + PyObject *p_it, *q_it, *total; + iternextfunc p_next, q_next; bool p_stopped = false, q_stopped = false; bool int_path_enabled = true, int_total_in_use = false; - long int_total = 0; bool flt_path_enabled = true, flt_total_in_use = false; + long int_total = 0; DoubleLength flt_total = dl_zero(); p_it = PyObject_GetIter(p); @@ -2949,6 +2948,8 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) Py_DECREF(q_it); return NULL; } + p_next = *Py_TYPE(p_it)->tp_iternext; + q_next = *Py_TYPE(q_it)->tp_iternext; while (1) { bool finished; @@ -2961,17 +2962,23 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) assert (q_it != NULL); assert (total != NULL); - p_i = PyIter_Next(p_it); + p_i = p_next(p_it); if (p_i == NULL) { if (PyErr_Occurred()) { - goto err_exit; + if (!PyErr_ExceptionMatches(PyExc_StopIteration)) { + goto err_exit; + } + PyErr_Clear(); } p_stopped = true; } - q_i = PyIter_Next(q_it); + q_i = q_next(q_it); if (q_i == NULL) { if (PyErr_Occurred()) { - goto err_exit; + if (!PyErr_ExceptionMatches(PyExc_StopIteration)) { + goto err_exit; + } + PyErr_Clear(); } q_stopped = true; } @@ -3038,18 +3045,18 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) if (p_type_float && q_type_float) { flt_p = PyFloat_AS_DOUBLE(p_i); flt_q = PyFloat_AS_DOUBLE(q_i); - } else if (p_type_float && PyLong_CheckExact(q_i)) { + } else if (p_type_float && (PyLong_CheckExact(q_i) || PyBool_Check(q_i))) { /* We care about float/int pairs and int/float pairs because they arise naturally in several use cases such as price - vs quantity, data multiplied by integer weights, or data - selected by a vector of bools. */ + times quantity, measurements with integer weights, or + data selected by a vector of bools. */ flt_p = PyFloat_AS_DOUBLE(p_i); flt_q = PyLong_AsDouble(q_i); if (flt_q == -1.0 && PyErr_Occurred()) { PyErr_Clear(); goto finalize_flt_path; } - } else if (q_type_float && PyLong_CheckExact(p_i)) { + } else if (q_type_float && (PyLong_CheckExact(p_i) || PyBool_Check(q_i))) { flt_q = PyFloat_AS_DOUBLE(q_i); flt_p = PyLong_AsDouble(p_i); if (flt_p == -1.0 && PyErr_Occurred()) { From 13f1d4fbb8f7589545cd810e2b03d94d83aaf7be Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 6 Jan 2023 10:00:55 -0600 Subject: [PATCH 37/42] Add assertion for a key checkpoint --- Modules/mathmodule.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 2170864c032f88..f5c9fb1fd3a30a 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -3097,6 +3097,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) } assert(!int_total_in_use); + assert(!flt_total_in_use); if (finished) { goto normal_exit; } From a07e03de669134abe16870c264d0eac1d9e0aa03 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 6 Jan 2023 12:58:07 -0600 Subject: [PATCH 38/42] Move dl_mul() out to a separate function. Dekker (5.8). --- Modules/mathmodule.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index f5c9fb1fd3a30a..939de9f9086e64 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2852,6 +2852,7 @@ cost of increasing the input vector size by one is 7.4ns. dl_zero() returns an extended precision zero dl_split() exactly splits a double into two half precision components. dl_add() performs compensated summation to keep a running total. +dl_mul() implements lossless multiplication of doubles. dl_fma() implements an extended precision fused-multiply-add. dl_to_d() converts from extended precision to double precision. @@ -2887,14 +2888,20 @@ dl_add(DoubleLength total, double x) } static inline DoubleLength -dl_fma(DoubleLength total, double p, double q) +dl_mul(double p, double q) { DoubleLength pp = dl_split(p); DoubleLength qq = dl_split(q); - total = dl_add(total, pp.hi * qq.hi); - total = dl_add(total, pp.hi * qq.lo); - total = dl_add(total, pp.lo * qq.hi); - return dl_add(total, pp.lo * qq.lo); + DoubleLength product = {pp.hi * qq.hi, pp.lo * qq.lo}; + return dl_add(product, pp.hi * qq.lo + pp.lo * qq.hi); +} + +static inline DoubleLength +dl_fma(DoubleLength total, double p, double q) +{ + DoubleLength product = dl_mul(p, q); + total = dl_add(total, product.hi); + return dl_add(total, product.lo); } static inline double From d274325737e3a0396c4e2aeb1dfa65b1ba46eeb9 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 6 Jan 2023 13:24:16 -0600 Subject: [PATCH 39/42] Add the smaller magnitude component first. --- Modules/mathmodule.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 939de9f9086e64..d71d885b5ac004 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2900,8 +2900,8 @@ static inline DoubleLength dl_fma(DoubleLength total, double p, double q) { DoubleLength product = dl_mul(p, q); - total = dl_add(total, product.hi); - return dl_add(total, product.lo); + total = dl_add(total, product.lo); + return dl_add(total, product.hi); } static inline double From 67f82ca3afef740be6f0bd3eb69a6bf92cea5d9f Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 6 Jan 2023 22:24:16 -0600 Subject: [PATCH 40/42] Use Dekker (5.12) directly. Avoids calling dl_add() and saves a comparison. --- Modules/mathmodule.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index d71d885b5ac004..ccd8a6a9fbae08 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2865,15 +2865,6 @@ dl_zero() { return (DoubleLength) {0.0, 0.0}; } - -static inline DoubleLength -dl_split(double x) { - double t = x * 134217729.0; /* Veltkamp constant = float(0x8000001) */ - double hi = t - (t - x); - double lo = x - hi; - return (DoubleLength) {hi, lo}; -} - static inline DoubleLength dl_add(DoubleLength total, double x) { @@ -2888,12 +2879,24 @@ dl_add(DoubleLength total, double x) } static inline DoubleLength -dl_mul(double p, double q) +dl_split(double x) { + double t = x * 134217729.0; /* Veltkamp constant = float(0x8000001) */ + double hi = t - (t - x); + double lo = x - hi; + return (DoubleLength) {hi, lo}; +} + +static inline DoubleLength +dl_mul(double x, double y) { - DoubleLength pp = dl_split(p); - DoubleLength qq = dl_split(q); - DoubleLength product = {pp.hi * qq.hi, pp.lo * qq.lo}; - return dl_add(product, pp.hi * qq.lo + pp.lo * qq.hi); + /* Dekker mul12(). Section (5.12) */ + DoubleLength xx = dl_split(x); + DoubleLength yy = dl_split(y); + double p = xx.hi * yy.hi; + double q = xx.hi * yy.lo + xx.lo * yy.hi; + double z = p + q; + double zz = p - z + q + xx.lo * yy.lo; + return (DoubleLength) {z, zz}; } static inline DoubleLength From 5d165be65920e13790ee79f70a6c634b42d676df Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 6 Jan 2023 22:57:54 -0600 Subject: [PATCH 41/42] Update timing in comment. --- Modules/mathmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index ccd8a6a9fbae08..b2588b478fdff3 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2847,7 +2847,7 @@ based on ideas from three sources: The double length routines allow for quite a bit of instruction level parallelism. On a 3.22 Ghz Apple M1 Max, the incremental -cost of increasing the input vector size by one is 7.4ns. +cost of increasing the input vector size by one is 6.25 nsec. dl_zero() returns an extended precision zero dl_split() exactly splits a double into two half precision components. From 2e772cb94d225ad5663cedb34300f0799b4fa884 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 7 Jan 2023 00:24:30 -0600 Subject: [PATCH 42/42] Add in the large component first. --- Modules/mathmodule.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index b2588b478fdff3..0bcb336efbb03a 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2903,8 +2903,8 @@ static inline DoubleLength dl_fma(DoubleLength total, double p, double q) { DoubleLength product = dl_mul(p, q); - total = dl_add(total, product.lo); - return dl_add(total, product.hi); + total = dl_add(total, product.hi); + return dl_add(total, product.lo); } static inline double