From 44a9e6d3156bad252b15574e31306b2b4a47ad5b Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sat, 4 Feb 2023 13:01:35 -0600 Subject: [PATCH 01/14] Add `from_dense` and `to_dense` methods --- graphblas/core/automethods.py | 5 + graphblas/core/infix.py | 2 + graphblas/core/matrix.py | 217 ++++++++++++++++++++------------- graphblas/core/utils.py | 16 +++ graphblas/core/vector.py | 88 ++++++++++--- graphblas/io.py | 16 +-- graphblas/tests/test_matrix.py | 42 ++++++- graphblas/tests/test_vector.py | 46 +++++++ 8 files changed, 316 insertions(+), 116 deletions(-) diff --git a/graphblas/core/automethods.py b/graphblas/core/automethods.py index 5b8fb5726..d370469e7 100644 --- a/graphblas/core/automethods.py +++ b/graphblas/core/automethods.py @@ -261,6 +261,10 @@ def to_dcsr(self): return self._get_value("to_dcsr") +def to_dense(self): + return self._get_value("to_dense") + + def to_dict(self): return self._get_value("to_dict") @@ -389,6 +393,7 @@ def _main(): "reposition", "ss", "to_coo", + "to_dense", "to_values", } vector = { diff --git a/graphblas/core/infix.py b/graphblas/core/infix.py index 22b1c5dca..1fc7caa95 100644 --- a/graphblas/core/infix.py +++ b/graphblas/core/infix.py @@ -208,6 +208,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts): else: ss = Vector.__dict__["ss"] # raise if used to_coo = wrapdoc(Vector.to_coo)(property(automethods.to_coo)) + to_dense = wrapdoc(Vector.to_dense)(property(automethods.to_dense)) to_dict = wrapdoc(Vector.to_dict)(property(automethods.to_dict)) to_values = wrapdoc(Vector.to_values)(property(automethods.to_values)) vxm = wrapdoc(Vector.vxm)(property(automethods.vxm)) @@ -343,6 +344,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts): to_csr = wrapdoc(Matrix.to_csr)(property(automethods.to_csr)) to_dcsc = wrapdoc(Matrix.to_dcsc)(property(automethods.to_dcsc)) to_dcsr = wrapdoc(Matrix.to_dcsr)(property(automethods.to_dcsr)) + to_dense = wrapdoc(Matrix.to_dense)(property(automethods.to_dense)) to_dicts = wrapdoc(Matrix.to_dicts)(property(automethods.to_dicts)) to_edgelist = wrapdoc(Matrix.to_edgelist)(property(automethods.to_edgelist)) to_values = wrapdoc(Matrix.to_values)(property(automethods.to_values)) diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index a1e18152b..30f133916 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -28,6 +28,7 @@ class_property, get_order, ints_to_numpy_buffer, + normalize_values, output_type, values_to_numpy_buffer, wrapdoc, @@ -528,11 +529,7 @@ def to_coo(self, dtype=None, *, rows=True, columns=True, values=True, sort=True) [c_rows, c_columns, c_values, _Pointer(scalar), self], ) if values: - c_values = c_values.array - if dtype is not None: - dtype = lookup_dtype(dtype) - if dtype != self.dtype: - c_values = c_values.astype(dtype.np_type) # copies + c_values = normalize_values(self, c_values.array, dtype) if sort and backend != "suitesparse": col = c_columns.array row = c_rows.array @@ -1335,30 +1332,81 @@ def from_dcsc( return cls.from_coo(row_indices, cols, values, dtype, nrows=nrows, ncols=ncols, name=name) @classmethod - def _from_dense(cls, values, dtype=None, *, name=None): - """Create a new Matrix from a dense numpy array.""" - # TODO: GraphBLAS needs a way to import or assign dense - # We could also handle F-contiguous data w/o a copy - # TODO: handle `Matrix._from_dense(np.arange(3*4*5).reshape(3, 4, 5))` as 3x4 Matrix - if backend == "suitesparse": # pragma: no cover (unused) - return Matrix.ss.import_fullr(values, dtype=dtype, name=name) - values, dtype = values_to_numpy_buffer(values, dtype) - if values.ndim < 2: - raise ValueError("A 2d array is required to create a dense Matrix") - if dtype.np_type.subdtype is not None and values.ndim < 3: + def from_dense(self, values, dtype=None, *, nrows=None, ncols=None, name=None): + values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=2) + if values.ndim == 0: + if nrows is None or ncols is None: + raise TypeError( + "nrows and ncols must be given when creating a dense Matrix from a scalar" + ) + if backend == "suitesparse": + # Should we try to handle F-contiguous data w/o a copy? + return Matrix.ss.import_fullr( + values, dtype=dtype, nrows=nrows, ncols=ncols, is_iso=True, name=name + ) + rv = Matrix(dtype, nrows=nrows, ncols=ncols, name=name) + rv << values + return rv + if values.ndim == 1: + raise ValueError("A 2d array or scalar is required to create a dense Matrix") + if values.ndim == 2 and dtype.np_type.subdtype is not None: raise ValueError("A >2d array is required to create a dense Matrix with subdtype") - nrows, ncols, *rest = values.shape - cols, rows = np.meshgrid( - np.arange(ncols, dtype=np.uint64), - np.arange(nrows, dtype=np.uint64), - ) - rows = rows.ravel() - cols = cols.ravel() - if values.ndim > 2: - values = values.reshape([nrows * ncols, *rest]) + if values.ndim > 2 and dtype.np_type.subdtype is None: + raise ValueError(f"values array must be 2d to create dense Matrix with dtype {dtype}") + if backend == "suitesparse": + rv = Matrix.ss.import_fullr(values, dtype=dtype, name=name) + nrows2, ncols2 = rv.shape else: - values = values.ravel() - return cls.from_coo(rows, cols, values, dtype, nrows=nrows, ncols=ncols, name=name) + nrows2, ncols2, *rest = values.shape + indptr = np.arange(0, nrows2 * ncols2 + 1, ncols2, dtype=np.uint64) + cols = np.repeat(np.arange(ncols2, dtype=np.uint64)[None, :], nrows2, 0).ravel() + if rest: # sub-array dtype + values = values.reshape(nrows2 * ncols2, *rest) + else: + values = values.ravel() + rv = Matrix.from_csr( + indptr, + cols, + values, + dtype, + ncols=ncols2, + name=name, + ) + if nrows is not None and nrows != nrows2 or ncols is not None and ncols != ncols2: + rv.resize(nrows2 if nrows is None else nrows, ncols2 if ncols is None else ncols) + return rv + + def to_dense(self, fill_value=None, dtype=None): + max_nvals = self._nrows * self._ncols + if fill_value is None or self._nvals == max_nvals: + if self._nvals != max_nvals: + raise TypeError( + "fill_value must be given in `to_dense` when there are missing values" + ) + if backend == "suitesparse": + info = self.ss.export("fullr") + return normalize_values(self, info["values"], dtype, self.shape, info["is_iso"]) + values = self.to_csr(dtype, sort=True)[2] + return values.reshape(self._nrows, self._ncols, *values.shape[1:]) + + if dtype is None and not self.dtype._is_udt: + # dtype of fill_value can upcast the dtype + if type(fill_value) is not Scalar: + try: + fill_value = Scalar.from_value(fill_value, is_cscalar=None, name="") + except TypeError: + fill_value = self._expect_type( + fill_value, + Scalar, + within="to_dense", + keyword_name="fill_value", + extra_message="Literal scalars also accepted.", + ) + dtype = unify(fill_value.dtype, self.dtype, is_left_scalar=True) + + rv = self.dup(dtype, name="to_dense") + rv(~rv.S) << fill_value + return rv.to_dense() @classmethod def from_dicts( @@ -1442,7 +1490,7 @@ def from_dicts( *args, indptr, col_indices, values, dtype, nrows=nrows, ncols=ncols, name=name ) - def _to_csx(self, fmt, dtype=None): + def _to_csx(self, fmt, dtype, sort): Ap_len = _scalar_index("Ap_len") Ai_len = _scalar_index("Ai_len") Ax_len = _scalar_index("Ax_len") @@ -1471,12 +1519,24 @@ def _to_csx(self, fmt, dtype=None): Ax = Ax[:Ax_len] if dtype is not None: - dtype = lookup_dtype(dtype) - if dtype != self.dtype: - Ax = Ax.astype(dtype.np_type) + Ax = normalize_values(self, Ax, dtype) + if sort: + # indices may not be sorted within each Ai (i.e., row), so sort them + num = self._ncols if fmt is _CSR_FORMAT else self._nrows + if Ap[-1] == self._ncols * self._nrows: + # Fully dense matrix + indices = np.argsort(Ai + np.repeat(Ap[:-1], num)) + else: + offsets = np.repeat( + np.arange(0, (Ap.size - 1) * num, num, dtype=np.uint64), + np.diff(Ap.astype(np.int64)), + ) + indices = np.argsort(Ai + offsets) + Ai = Ai[indices] + Ax = Ax[indices] return Ap, Ai, Ax - def to_csr(self, dtype=None): + def to_csr(self, dtype=None, *, sort=True): """Returns three arrays of the standard CSR representation: indptr, col_indices, values. In CSR, the column indices for row i are stored in ``col_indices[indptr[i]:indptr[i+1]]`` @@ -1500,9 +1560,14 @@ def to_csr(self, dtype=None): Matrix.ss.export io.to_scipy_sparse """ - return self._to_csx(_CSR_FORMAT, dtype) + if backend == "suitesparse": + info = self.ss.export("csr", sort=sort) + cols = info["col_indices"] + values = normalize_values(self, info["values"], dtype, (cols.size,), info["is_iso"]) + return info["indptr"], cols, values + return self._to_csx(_CSR_FORMAT, dtype, sort) - def to_csc(self, dtype=None): + def to_csc(self, dtype=None, *, sort=True): """Returns three arrays of the standard CSC representation: indptr, row_indices, values. In CSC, the row indices for column i are stored in ``row_indices[indptr[i]:indptr[i+1]]`` @@ -1526,9 +1591,14 @@ def to_csc(self, dtype=None): Matrix.ss.export io.to_scipy_sparse """ - return self._to_csx(_CSC_FORMAT, dtype) + if backend == "suitesparse": + info = self.ss.export("csc", sort=sort) + rows = info["row_indices"] + values = normalize_values(self, info["values"], dtype, (rows.size,), info["is_iso"]) + return info["indptr"], rows, values + return self._to_csx(_CSC_FORMAT, dtype, sort) - def to_dcsr(self, dtype=None): + def to_dcsr(self, dtype=None, *, sort=True): """Returns four arrays of DCSR representation: compressed_rows, indptr, col_indices, values. In DCSR, we store the index of each non-empty row in ``compressed_rows``. @@ -1556,26 +1626,21 @@ def to_dcsr(self, dtype=None): io.to_scipy_sparse """ if backend == "suitesparse": - info = self.ss.export("hypercsr", sort=True) + info = self.ss.export("hypercsr", sort=sort) compressed_rows = info["rows"] indptr = info["indptr"] cols = info["col_indices"] - values = info["values"] - if info["is_iso"]: - values = np.broadcast_to(values, cols.size) + values = normalize_values(self, info["values"], dtype, (cols.size,), info["is_iso"]) else: - rows, cols, values = self.to_coo() # sorted by row then col + rows, cols, values = self.to_coo(sort=True) # sorted by row then col compressed_rows, indices = np.unique(rows, return_index=True) indptr = np.empty(indices.size + 1, np.uint64) indptr[:-1] = indices indptr[-1] = rows.size - if dtype is not None: - dtype = lookup_dtype(dtype) - if dtype != self.dtype: - values = values.astype(dtype.np_type) + values = normalize_values(self, values, dtype) return compressed_rows, indptr, cols, values - def to_dcsc(self, dtype=None): + def to_dcsc(self, dtype=None, *, sort=True): """Returns four arrays of DCSC representation: compressed_cols, indptr, row_indices, values. In DCSC, we store the index of each non-empty column in ``compressed_cols``. @@ -1603,13 +1668,11 @@ def to_dcsc(self, dtype=None): io.to_scipy_sparse """ if backend == "suitesparse": - info = self.ss.export("hypercsc", sort=True) + info = self.ss.export("hypercsc", sort=sort) compressed_cols = info["cols"] indptr = info["indptr"] rows = info["row_indices"] - values = info["values"] - if info["is_iso"]: - values = np.broadcast_to(values, rows.size) + values = normalize_values(self, info["values"], dtype, (rows.size,), info["is_iso"]) else: rows, cols, values = self.to_coo(sort=False) ind = np.lexsort((rows, cols)) # sort by columns, then rows @@ -1620,10 +1683,7 @@ def to_dcsc(self, dtype=None): indptr = np.empty(indices.size + 1, np.uint64) indptr[:-1] = indices indptr[-1] = cols.size - if dtype is not None: - dtype = lookup_dtype(dtype) - if dtype != self.dtype: - values = values.astype(dtype.np_type) + values = normalize_values(self, values, dtype) return compressed_cols, indptr, rows, values def to_dicts(self, order="rowwise"): @@ -2281,8 +2341,8 @@ def select(self, op, thunk=None): None, [self, mask, _select_mask, (self, mask)], # [*expr_args, func, args] expr_repr="{0.name}.select({1.name})", - nrows=self.nrows, - ncols=self.ncols, + nrows=self._nrows, + ncols=self._ncols, dtype=self.dtype, ) @@ -2826,10 +2886,7 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o except (TypeError, ValueError): if rowsize is not None or colsize is not None: try: - # Do a copy for suitesparse so we can give ownership to suitesparse - values, dtype = values_to_numpy_buffer( - value, dtype, copy=backend == "suitesparse" - ) + values, dtype = values_to_numpy_buffer(value, dtype) except Exception: pass else: @@ -2843,15 +2900,7 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o # C[i, J](m) << [1, 2, 3] expected_shape = (rowsize or colsize,) try: - if backend == "suitesparse": - vals = Vector.ss.import_full( - values, dtype=dtype, take_ownership=True - ) - else: - # TODO: GraphBLAS needs a way to import or assign dense - vals = Vector.from_coo( - np.arange(shape[0]), values, dtype, size=shape[0] - ) + vals = Vector.from_dense(values, dtype) except Exception: # pragma: no cover (safety) vals = None else: @@ -2863,12 +2912,7 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o # C[I, J](M) << [[1, 2, 3], [4, 5, 6]] expected_shape = (rowsize, colsize) try: - if backend == "suitesparse": - vals = Matrix.ss.import_fullr( - values, dtype=dtype, take_ownership=True - ) - else: - vals = Matrix._from_dense(values, dtype) + vals = Matrix.from_dense(values, dtype) except Exception: vals = None else: @@ -3226,6 +3270,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts): to_csr = wrapdoc(Matrix.to_csr)(property(automethods.to_csr)) to_dcsc = wrapdoc(Matrix.to_dcsc)(property(automethods.to_dcsc)) to_dcsr = wrapdoc(Matrix.to_dcsr)(property(automethods.to_dcsr)) + to_dense = wrapdoc(Matrix.to_dense)(property(automethods.to_dense)) to_dicts = wrapdoc(Matrix.to_dicts)(property(automethods.to_dicts)) to_edgelist = wrapdoc(Matrix.to_edgelist)(property(automethods.to_edgelist)) to_values = wrapdoc(Matrix.to_values)(property(automethods.to_values)) @@ -3325,6 +3370,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts): to_csr = wrapdoc(Matrix.to_csr)(property(automethods.to_csr)) to_dcsc = wrapdoc(Matrix.to_dcsc)(property(automethods.to_dcsc)) to_dcsr = wrapdoc(Matrix.to_dcsr)(property(automethods.to_dcsr)) + to_dense = wrapdoc(Matrix.to_dense)(property(automethods.to_dense)) to_dicts = wrapdoc(Matrix.to_dicts)(property(automethods.to_dicts)) to_edgelist = wrapdoc(Matrix.to_edgelist)(property(automethods.to_edgelist)) to_values = wrapdoc(Matrix.to_values)(property(automethods.to_values)) @@ -3426,20 +3472,25 @@ def _name_html(self): return f"{self._matrix._name_html}.T" @wrapdoc(Matrix.to_csr) - def to_csr(self, dtype=None): - return self._matrix.to_csc(dtype) + def to_csr(self, dtype=None, *, sort=True): + return self._matrix.to_csc(dtype, sort=sort) @wrapdoc(Matrix.to_csc) - def to_csc(self, dtype=None): - return self._matrix.to_csr(dtype) + def to_csc(self, dtype=None, *, sort=True): + return self._matrix.to_csr(dtype, sort=sort) @wrapdoc(Matrix.to_dcsr) - def to_dcsr(self, dtype=None): - return self._matrix.to_dcsc(dtype) + def to_dcsr(self, dtype=None, *, sort=True): + return self._matrix.to_dcsc(dtype, sort=sort) @wrapdoc(Matrix.to_dcsc) - def to_dcsc(self, dtype=None): - return self._matrix.to_dcsr(dtype) + def to_dcsc(self, dtype=None, *, sort=True): + return self._matrix.to_dcsr(dtype, sort=sort) + + @wrapdoc(Matrix.to_dense) + def to_dense(self, fill_value=None, dtype=None): + rv = self._matrix.to_dense(fill_value, dtype) + return rv.swapaxes(0, 1) @wrapdoc(Matrix.to_dicts) def to_dicts(self, order="rowwise"): diff --git a/graphblas/core/utils.py b/graphblas/core/utils.py index b09f71713..b97ff5136 100644 --- a/graphblas/core/utils.py +++ b/graphblas/core/utils.py @@ -106,6 +106,22 @@ def values_to_numpy_buffer( return array, dtype +def normalize_values(self, values, dtype, shape=None, is_iso=False): + """Expand and/or update dtype of values array.""" + if dtype is not None: + dtype = lookup_dtype(dtype) + if dtype != self.dtype: + values = values.astype(dtype.np_type) # copies + else: + dtype = self.dtype + if is_iso: + if dtype.np_type.subdtype is None: + values = np.broadcast_to(values, shape) + else: + values = np.broadcast_to(values, shape + values.shape) + return values + + def get_shape(nrows, ncols, dtype=None, **arrays): if nrows is None or ncols is None: # Get nrows and ncols from the first 2d array diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index a39a92f40..139714aed 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -26,6 +26,7 @@ _Pointer, class_property, ints_to_numpy_buffer, + normalize_values, output_type, values_to_numpy_buffer, wrapdoc, @@ -478,11 +479,7 @@ def to_coo(self, dtype=None, *, indices=True, values=True, sort=True): f"GrB_Vector_extractTuples_{dtype_name}", [c_indices, c_values, _Pointer(scalar), self] ) if values: - c_values = c_values.array - if dtype is not None: - dtype = lookup_dtype(dtype) - if dtype != self.dtype: - c_values = c_values.astype(dtype.np_type) # copies + c_values = normalize_values(self, c_values.array, dtype) if sort and backend != "suitesparse": c_indices = c_indices.array ind = np.argsort(c_indices) @@ -795,6 +792,66 @@ def from_pairs(cls, pairs, dtype=None, *, size=None, dup_op=None, name=None): ) return cls.from_coo(indices, values, dtype, size=size, dup_op=dup_op, name=name) + @classmethod + def from_dense(cls, values, dtype=None, *, size=None, name=None): + values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1) + if values.ndim == 0: + if size is None: + raise TypeError("size must be given when creating a dense Vector from a scalar") + if backend == "suitesparse": + return Vector.ss.import_full(values, dtype=dtype, size=size, is_iso=True, name=name) + rv = Vector(dtype, size=size, name=name) + rv << values + return rv + if values.ndim == 1 and dtype.np_type.subdtype is not None: + raise ValueError("A >1d array is required to create a dense Vector with subdtype") + if values.ndim > 1 and dtype.np_type.subdtype is None: + raise ValueError(f"values array must be 1d to create dense Vector with dtype {dtype}") + if backend == "suitesparse": + rv = Vector.ss.import_full(values, dtype=dtype, name=name) + else: + # TODO: GraphBLAS needs a better way to import or assign dense + rv = Vector.from_coo( + np.arange(values.shape[0], dtype=np.uint64), + values, + dtype, + size=values.shape[0], + name=name, + ) + if size is not None and size != rv._size: + rv.resize(size) + return rv + + def to_dense(self, fill_value=None, dtype=None): + if fill_value is None or self._nvals == self._size: + if self._nvals != self._size: + raise TypeError( + "fill_value must be given in `to_dense` when there are missing values" + ) + if backend == "suitesparse": + info = self.ss.export("full") + return normalize_values(self, info["values"], dtype, self._size, info["is_iso"]) + return self.to_coo(dtype, indices=False)[1] + + if dtype is None and not self.dtype._is_udt: + # dtype of fill_value can upcast the dtype + if type(fill_value) is not Scalar: + try: + fill_value = Scalar.from_value(fill_value, is_cscalar=None, name="") + except TypeError: + fill_value = self._expect_type( + fill_value, + Scalar, + within="to_dense", + keyword_name="fill_value", + extra_message="Literal scalars also accepted.", + ) + dtype = unify(fill_value.dtype, self.dtype, is_left_scalar=True) + + rv = self.dup(dtype, name="to_dense") + rv(~rv.S) << fill_value + return rv.to_dense() + @property def _carg(self): return self.gb_obj[0] @@ -1314,7 +1371,7 @@ def select(self, op, thunk=None): None, [self, mask, _select_mask, (self, mask)], # [*expr_args, func, args] expr_repr="{0.name}.select({1.name})", - size=self.size, + size=self._size, dtype=self.dtype, ) @@ -1649,24 +1706,13 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o # v(m)[I] << [1, 2, 3] # v[I](m) << [1, 2, 3] try: - # Do a copy for suitesparse so we can give ownership to suitesparse - values, dtype = values_to_numpy_buffer( - value, dtype, copy=backend == "suitesparse" - ) + values, dtype = values_to_numpy_buffer(value, dtype) except Exception: extra_message = "Literal scalars and lists also accepted." else: shape = values.shape try: - if backend == "suitesparse": - vals = Vector.ss.import_full( - values, dtype=dtype, take_ownership=True - ) - else: - # TODO: GraphBLAS needs a way to import or assign dense - vals = Vector.from_coo( - np.arange(shape[0]), values, dtype, size=shape[0] - ) + vals = Vector.from_dense(values, dtype) except Exception: # pragma: no cover (safety) vals = None else: @@ -1897,7 +1943,7 @@ def shape(self): @wrapdoc(Vector.dup) def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts): if clear: - return Vector(self.dtype if dtype is None else dtype, self.size, name=name) + return Vector(self.dtype if dtype is None else dtype, self._size, name=name) return self._new(dtype, mask, name) # Begin auto-generated code: Vector @@ -1938,6 +1984,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts): else: ss = Vector.__dict__["ss"] # raise if used to_coo = wrapdoc(Vector.to_coo)(property(automethods.to_coo)) + to_dense = wrapdoc(Vector.to_dense)(property(automethods.to_dense)) to_dict = wrapdoc(Vector.to_dict)(property(automethods.to_dict)) to_values = wrapdoc(Vector.to_values)(property(automethods.to_values)) vxm = wrapdoc(Vector.vxm)(property(automethods.vxm)) @@ -2022,6 +2069,7 @@ def dup(self, dtype=None, *, clear=False, mask=None, name=None, **opts): else: ss = Vector.__dict__["ss"] # raise if used to_coo = wrapdoc(Vector.to_coo)(property(automethods.to_coo)) + to_dense = wrapdoc(Vector.to_dense)(property(automethods.to_dense)) to_dict = wrapdoc(Vector.to_dict)(property(automethods.to_dict)) to_values = wrapdoc(Vector.to_values)(property(automethods.to_values)) vxm = wrapdoc(Vector.vxm)(property(automethods.vxm)) diff --git a/graphblas/io.py b/graphblas/io.py index 5111371ea..97fa3ed34 100644 --- a/graphblas/io.py +++ b/graphblas/io.py @@ -4,6 +4,7 @@ from . import backend as _backend from .core.matrix import Matrix as _Matrix +from .core.utils import normalize_values as _normalize_values from .core.utils import output_type as _output_type from .core.vector import Vector as _Vector from .dtypes import lookup_dtype as _lookup_dtype @@ -391,13 +392,10 @@ def to_scipy_sparse(A, format="csr"): info["col_indices"] = info["row_indices"] else: info = A.ss.export(format, sort=True) - if info["is_iso"]: - info["values"] = _np.broadcast_to(info["values"], A._nvals) + values = _normalize_values(A, info["values"], None, (A._nvals,), info["is_iso"]) if format == "csr": - return ss.csr_array( - (info["values"], info["col_indices"], info["indptr"]), shape=A.shape - ) - return ss.csc_array((info["values"], info["row_indices"], info["indptr"]), shape=A.shape) + return ss.csr_array((values, info["col_indices"], info["indptr"]), shape=A.shape) + return ss.csc_array((values, info["row_indices"], info["indptr"]), shape=A.shape) elif format == "csr": indptr, cols, vals = A.to_csr() return ss.csr_array((vals, cols, indptr), shape=A.shape) @@ -603,11 +601,7 @@ def mmread(source, *, dup_op=None, name=None): return _Matrix.from_coo( array.row, array.col, array.data, nrows=nrows, ncols=ncols, dup_op=dup_op, name=name ) - if _backend == "suitesparse": - return _Matrix.ss.import_fullr(values=array, take_ownership=True, name=name) - rv = _Matrix(array.dtype, *array.shape, name=name) - rv[...] = array - return rv + return _Matrix.from_dense(array, name=name) def mmwrite(target, matrix, *, comment="", field=None, precision=None, symmetry=None): diff --git a/graphblas/tests/test_matrix.py b/graphblas/tests/test_matrix.py index 351e44e64..971c4080d 100644 --- a/graphblas/tests/test_matrix.py +++ b/graphblas/tests/test_matrix.py @@ -2881,7 +2881,6 @@ def test_expr_is_like_matrix(A): "_deserialize", "_extract_element", "_from_csx", - "_from_dense", "_from_obj", "_name_counter", "_parent", @@ -2896,6 +2895,7 @@ def test_expr_is_like_matrix(A): "from_csr", "from_dcsc", "from_dcsr", + "from_dense", "from_dicts", "from_edgelist", "from_values", @@ -2944,7 +2944,6 @@ def test_index_expr_is_like_matrix(A): "_deserialize", "_extract_element", "_from_csx", - "_from_dense", "_from_obj", "_name_counter", "_parent", @@ -2959,6 +2958,7 @@ def test_index_expr_is_like_matrix(A): "from_csr", "from_dcsc", "from_dcsr", + "from_dense", "from_dicts", "from_edgelist", "from_values", @@ -4113,6 +4113,40 @@ def test_to_from_edgelist(A): Matrix.from_edgelist([[0, 1, 10], [2, 3, 20]], values=0) +def test_to_dense_from_dense(): + A = Matrix.from_dense(1, nrows=2, ncols=3) + B = Matrix(int, nrows=2, ncols=3) + B << 1 + assert A.isequal(B, check_dtype=True) + assert_array_equal(A.to_dense(dtype=float), [[1.0, 1, 1], [1, 1, 1]]) + A = Matrix.from_dense(np.arange(6).reshape(2, 3)) + B = Matrix.from_coo([0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2], np.arange(6)) + assert A.isequal(B, check_dtype=True) + assert_array_equal(A.to_dense(dtype=int), [[0, 1, 2], [3, 4, 5]]) + assert_array_equal(A.T.to_dense(dtype=int), [[0, 3], [1, 4], [2, 5]]) + del A[0, 0] + assert_array_equal(A.to_dense(6.5), [[6.5, 1, 2], [3, 4, 5]]) + assert_array_equal(A.to_dense(6.5, int), [[6, 1, 2], [3, 4, 5]]) + assert_array_equal(A.to_dense(Scalar.from_value(6.5)), [[6.5, 1, 2], [3, 4, 5]]) + + A = Matrix.from_dense(np.arange(6).reshape(2, 3), nrows=3, ncols=4) + B.resize(3, 4) + assert A.isequal(B, check_dtype=True) + assert_array_equal(A.to_dense(10), [[0, 1, 2, 10], [3, 4, 5, 10], [10, 10, 10, 10]]) + with pytest.raises(TypeError, match="must be given"): + Matrix.from_dense(1, nrows=2) + with pytest.raises(ValueError, match="is required to create a dense"): + Matrix.from_dense([1, 2, 3]) + with pytest.raises(TypeError, match="fill_value must be given"): + A.to_dense() + with pytest.raises(TypeError, match="Bad type for keyword argument `fill_value"): + A.to_dense(object()) + with pytest.raises(ValueError, match="must be 2d"): + Matrix.from_dense(np.arange(24).reshape(2, 3, 4), int) + with pytest.raises(ValueError, match=">2d array"): + Matrix.from_dense(np.arange(6).reshape(2, 3), "INT64[2]") + + @pytest.mark.skipif("not suitesparse") def test_ss_sort(A): A[3, 0] = 9 @@ -4263,6 +4297,10 @@ def test_subarray_dtypes(): Full2 = Matrix("INT64[4]", nrows=2, ncols=3) Full2[:, :] = b2 assert Full1.isequal(Full2, check_dtype=True) + Full2 = Matrix.from_dense(b2) + assert Full1.isequal(Full2, check_dtype=True) + Full2 = Matrix.from_dense(Full1.to_dense()) + assert Full2.isequal(Full2, check_dtype=True) if suitesparse: Full2 = Matrix.ss.import_fullr(b2) assert Full1.isequal(Full2, check_dtype=True) diff --git a/graphblas/tests/test_vector.py b/graphblas/tests/test_vector.py index 90d936a6c..066aa52b3 100644 --- a/graphblas/tests/test_vector.py +++ b/graphblas/tests/test_vector.py @@ -1616,6 +1616,7 @@ def test_expr_is_like_vector(v): "build", "clear", "from_coo", + "from_dense", "from_dict", "from_pairs", "from_values", @@ -1663,6 +1664,7 @@ def test_index_expr_is_like_vector(v): "build", "clear", "from_coo", + "from_dense", "from_dict", "from_pairs", "from_values", @@ -2486,6 +2488,37 @@ def test_from_pairs(): Vector.from_pairs([[1, 2, 3], [4, 5, 6]]) +def test_to_dense_from_dense(): + v = Vector.from_dense(1, size=3) + w = Vector.from_coo([0, 1, 2], 1) + assert v.isequal(w, check_dtype=True) + assert_array_equal(v.to_dense(), [1, 1, 1]) + v = Vector.from_dense([1, 2, 3]) + w = Vector.from_coo([0, 1, 2], [1, 2, 3]) + assert v.isequal(w, check_dtype=True) + assert_array_equal(v.to_dense(dtype=int), [1, 2, 3]) + v = Vector.from_dense([1, 2, 3], size=4) + w = Vector.from_coo([0, 1, 2], [1, 2, 3], size=4) + assert v.isequal(w, check_dtype=True) + assert_array_equal(v.to_dense(4.5, dtype=float), [1, 2, 3, 4.5]) + assert_array_equal(v.to_dense(4.5), [1, 2, 3, 4.5]) # Scalar type can upcast + assert_array_equal(v.to_dense(Scalar.from_value(4)), [1, 2, 3, 4]) + with pytest.raises(TypeError, match="fill_value must be given"): + v.to_dense() + with pytest.raises(TypeError, match="Bad type for keyword argument `fill_value"): + v.to_dense(object()) + v = Vector.from_dense([1, 2], size=2) + w = Vector.from_coo([0, 1], [1, 2], size=2) + assert v.isequal(w, check_dtype=True) + assert_array_equal(v.to_dense(dtype=float), [1.0, 2]) + with pytest.raises(TypeError, match="size must be given"): + Vector.from_dense(1) + with pytest.raises(ValueError, match="must be 1d"): + Vector.from_dense(np.arange(6).reshape(2, 3), int) + with pytest.raises(ValueError, match=">1d array"): + Vector.from_dense(np.arange(6), "INT64[2]") + + @pytest.mark.skipif("not suitesparse") def test_ss_sort(v): # For equal values, indices are guaranteed to be sorted @@ -2535,6 +2568,15 @@ def test_subarray_dtypes(): w = Vector.from_pairs([[1, [0, 1, 2, 3]], [3, [4, 5, 6, 7]], [5, [8, 9, 10, 11]]]) assert v.isequal(w, check_dtype=True) + filled1 = Vector.from_dense(v.to_dense(0)) + filled2 = v.dup() + filled2[[0, 2, 4]] = 0 + assert filled1.isequal(filled2, check_dtype=True) + filled1 = Vector.from_dense(v.to_dense([6, 5, 4, 3])) + filled2 = v.dup() + filled2[[0, 2, 4]] = [6, 5, 4, 3] + assert filled1.isequal(filled2, check_dtype=True) + full1 = Vector.from_coo([0, 1, 2], a) full2 = Vector("INT64[4]", size=3) full2[0] = [0, 1, 2, 3] @@ -2544,6 +2586,10 @@ def test_subarray_dtypes(): full2 = Vector("INT64[4]", size=3) full2[:] = a assert full1.isequal(full2, check_dtype=True) + full2 = Vector.from_dense(a) + assert full1.isequal(full2, check_dtype=True) + full2 = Vector.from_dense(full1.to_dense()) + assert full1.isequal(full2, check_dtype=True) if suitesparse: w = Vector.ss.import_sparse(indices=[1, 3, 5], values=a, size=6) assert v.isequal(w, check_dtype=True) From 3483ff5ca5b4738cb9c073c083d7d54523f0dd65 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sat, 4 Feb 2023 15:27:56 -0600 Subject: [PATCH 02/14] oops; use `cls`, not `self` --- graphblas/core/matrix.py | 12 ++++++------ graphblas/core/utils.py | 5 ++++- graphblas/core/vector.py | 8 ++++---- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index 30f133916..e193e8001 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -1046,7 +1046,7 @@ def _from_csx(cls, fmt, indptr, indices, values, dtype, num, check_num, name): ) values = np.broadcast_to(values, indices.size) new_mat = ffi_new("GrB_Matrix*") - rv = Matrix._from_obj(new_mat, dtype, nrows, ncols, name=name) + rv = cls._from_obj(new_mat, dtype, nrows, ncols, name=name) if dtype._is_udt: dtype_name = "UDT" else: @@ -1332,7 +1332,7 @@ def from_dcsc( return cls.from_coo(row_indices, cols, values, dtype, nrows=nrows, ncols=ncols, name=name) @classmethod - def from_dense(self, values, dtype=None, *, nrows=None, ncols=None, name=None): + def from_dense(cls, values, dtype=None, *, nrows=None, ncols=None, name=None): values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=2) if values.ndim == 0: if nrows is None or ncols is None: @@ -1341,10 +1341,10 @@ def from_dense(self, values, dtype=None, *, nrows=None, ncols=None, name=None): ) if backend == "suitesparse": # Should we try to handle F-contiguous data w/o a copy? - return Matrix.ss.import_fullr( + return cls.ss.import_fullr( values, dtype=dtype, nrows=nrows, ncols=ncols, is_iso=True, name=name ) - rv = Matrix(dtype, nrows=nrows, ncols=ncols, name=name) + rv = cls(dtype, nrows=nrows, ncols=ncols, name=name) rv << values return rv if values.ndim == 1: @@ -1354,7 +1354,7 @@ def from_dense(self, values, dtype=None, *, nrows=None, ncols=None, name=None): if values.ndim > 2 and dtype.np_type.subdtype is None: raise ValueError(f"values array must be 2d to create dense Matrix with dtype {dtype}") if backend == "suitesparse": - rv = Matrix.ss.import_fullr(values, dtype=dtype, name=name) + rv = cls.ss.import_fullr(values, dtype=dtype, name=name) nrows2, ncols2 = rv.shape else: nrows2, ncols2, *rest = values.shape @@ -1364,7 +1364,7 @@ def from_dense(self, values, dtype=None, *, nrows=None, ncols=None, name=None): values = values.reshape(nrows2 * ncols2, *rest) else: values = values.ravel() - rv = Matrix.from_csr( + rv = cls.from_csr( indptr, cols, values, diff --git a/graphblas/core/utils.py b/graphblas/core/utils.py index b97ff5136..83fa15cd5 100644 --- a/graphblas/core/utils.py +++ b/graphblas/core/utils.py @@ -383,4 +383,7 @@ def _autogenerate_code( f.write(new_text) import subprocess - subprocess.check_call(["black", filename]) + try: + subprocess.check_call(["black", filename]) + except FileNotFoundError: # pragma: no cover (safety) + pass # It's okay if `black` isn't installed; pre-commit hooks will do linting diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index 139714aed..013d2bd9e 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -799,8 +799,8 @@ def from_dense(cls, values, dtype=None, *, size=None, name=None): if size is None: raise TypeError("size must be given when creating a dense Vector from a scalar") if backend == "suitesparse": - return Vector.ss.import_full(values, dtype=dtype, size=size, is_iso=True, name=name) - rv = Vector(dtype, size=size, name=name) + return cls.ss.import_full(values, dtype=dtype, size=size, is_iso=True, name=name) + rv = cls(dtype, size=size, name=name) rv << values return rv if values.ndim == 1 and dtype.np_type.subdtype is not None: @@ -808,10 +808,10 @@ def from_dense(cls, values, dtype=None, *, size=None, name=None): if values.ndim > 1 and dtype.np_type.subdtype is None: raise ValueError(f"values array must be 1d to create dense Vector with dtype {dtype}") if backend == "suitesparse": - rv = Vector.ss.import_full(values, dtype=dtype, name=name) + rv = cls.ss.import_full(values, dtype=dtype, name=name) else: # TODO: GraphBLAS needs a better way to import or assign dense - rv = Vector.from_coo( + rv = cls.from_coo( np.arange(values.shape[0], dtype=np.uint64), values, dtype, From c42e9abf37fc91997f0573eaf738aeb1308bfd33 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sun, 5 Feb 2023 11:55:58 -0600 Subject: [PATCH 03/14] Add documentation (first draft) --- graphblas/core/matrix.py | 54 +++++++++++++++++++++++++++++++++++++ graphblas/core/ss/matrix.py | 2 +- graphblas/core/ss/vector.py | 2 +- graphblas/core/vector.py | 52 +++++++++++++++++++++++++++++++++++ graphblas/io.py | 11 ++++++++ 5 files changed, 119 insertions(+), 2 deletions(-) diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index e193e8001..9dda3171d 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -497,6 +497,7 @@ def to_coo(self, dtype=None, *, rows=True, columns=True, values=True, sort=True) See Also -------- + to_dense to_edgelist from_coo @@ -564,6 +565,7 @@ def to_edgelist(self, dtype=None, *, values=True, sort=True): See Also -------- to_coo + to_dense from_edgelist Returns @@ -860,6 +862,7 @@ def from_coo( See Also -------- + from_dense from_edgelist to_coo @@ -939,6 +942,7 @@ def from_edgelist( See Also -------- from_coo + from_dense to_edgelist Returns @@ -1333,6 +1337,35 @@ def from_dcsc( @classmethod def from_dense(cls, values, dtype=None, *, nrows=None, ncols=None, name=None): + """Create a fully dense Matrix from a NumPy array or scalar. + + Parameters + ---------- + values : list or np.ndarray or scalar + List of values. If a scalar is provided, all values will be set to this single value. + dtype : + Data type of the Matrix. If not provided, the values will be inspected + to choose an appropriate dtype. + nrows : int, optional + Number of rows of the Matrix. By default, nrows is determined from + the shape of the input array. Nrows is required for scalar inputs. + ncols : int, optional + Number of cols of the Matrix. By default, ncols is determined from + the shape of the input array. Ncols is required for scalar inputs. + name : str, optional + Name to give the Matrix. + + See Also + -------- + from_coo + from_edgelist + to_dense + io.from_numpy + + Returns + ------- + Matrix + """ values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=2) if values.ndim == 0: if nrows is None or ncols is None: @@ -1377,6 +1410,27 @@ def from_dense(cls, values, dtype=None, *, nrows=None, ncols=None, name=None): return rv def to_dense(self, fill_value=None, dtype=None): + """Convert Matrix to NumPy array of the same shape with missing values filled. + + Parameters + ---------- + fill_value : scalar, optional + Value used to fill missing values. This is required if there are missing values. + dtype : + Requested dtype for the output values array. + + See Also + -------- + to_coo + to_dicts + to_edgelist + from_dense + io.to_numpy + + Returns + ------- + np.ndarray + """ max_nvals = self._nrows * self._ncols if fill_value is None or self._nvals == max_nvals: if self._nvals != max_nvals: diff --git a/graphblas/core/ss/matrix.py b/graphblas/core/ss/matrix.py index 6c4809a83..7550f9a3b 100644 --- a/graphblas/core/ss/matrix.py +++ b/graphblas/core/ss/matrix.py @@ -559,7 +559,7 @@ def export(self, format=None, *, sort=False, give_ownership=False, raw=False, ** "coo", "coor", or "cooc". give_ownership : bool, default False Perform a zero-copy data transfer to Python if possible. This gives ownership of - the underlying memory buffers to Numpy. + the underlying memory buffers to NumPy. ** If True, this nullifies the current object, which should no longer be used! ** raw : bool, default False If True, always return 1d arrays the same size as returned by SuiteSparse. diff --git a/graphblas/core/ss/vector.py b/graphblas/core/ss/vector.py index 9635e8fb9..d13d78ac3 100644 --- a/graphblas/core/ss/vector.py +++ b/graphblas/core/ss/vector.py @@ -425,7 +425,7 @@ def export(self, format=None, *, sort=False, give_ownership=False, raw=False, ** Whether to sort indices if the format is "sparse" give_ownership : bool, default False Perform a zero-copy data transfer to Python if possible. This gives ownership of - the underlying memory buffers to Numpy. + the underlying memory buffers to NumPy. ** If True, this nullifies the current object, which should no longer be used! ** raw : bool, default False If True, always return array the same size as returned by SuiteSparse. diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index 013d2bd9e..39e79f53a 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -453,6 +453,7 @@ def to_coo(self, dtype=None, *, indices=True, values=True, sort=True): See Also -------- + to_dense to_dict from_coo @@ -713,6 +714,7 @@ def from_coo(cls, indices, values=1.0, dtype=None, *, size=None, dup_op=None, na See Also -------- + from_dense from_dict from_pairs to_coo @@ -770,6 +772,7 @@ def from_pairs(cls, pairs, dtype=None, *, size=None, dup_op=None, name=None): See Also -------- from_coo + from_dense from_dict to_coo @@ -794,6 +797,33 @@ def from_pairs(cls, pairs, dtype=None, *, size=None, dup_op=None, name=None): @classmethod def from_dense(cls, values, dtype=None, *, size=None, name=None): + """Create a fully dense Vector from a NumPy array or scalar. + + Parameters + ---------- + values : list or np.ndarray or scalar + List of values. If a scalar is provided, all values will be set to this single value. + dtype : + Data type of the Vector. If not provided, the values will be inspected + to choose an appropriate dtype. + size : int, optional + Size of the Vector. By default, the size of the Vector is the size of the input array. + Size is required for scalar inputs. + name : str, optional + Name to give the Vector. + + See Also + -------- + from_coo + from_dict + from_pairs + to_dense + io.from_numpy + + Returns + ------- + Vector + """ values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1) if values.ndim == 0: if size is None: @@ -823,6 +853,26 @@ def from_dense(cls, values, dtype=None, *, size=None, name=None): return rv def to_dense(self, fill_value=None, dtype=None): + """Convert Vector to NumPy array of the same shape with missing values filled. + + Parameters + ---------- + fill_value : scalar, optional + Value used to fill missing values. This is required if there are missing values. + dtype : + Requested dtype for the output values array. + + See Also + -------- + to_coo + to_dict + from_dense + io.to_numpy + + Returns + ------- + np.ndarray + """ if fill_value is None or self._nvals == self._size: if self._nvals != self._size: raise TypeError( @@ -1825,6 +1875,7 @@ def from_dict(cls, d, dtype=None, *, size=None, name=None): See Also -------- from_coo + from_dense from_pairs to_dict @@ -1852,6 +1903,7 @@ def to_dict(self): See Also -------- to_coo + to_dense from_dict Returns diff --git a/graphblas/io.py b/graphblas/io.py index 97fa3ed34..ab4b599e9 100644 --- a/graphblas/io.py +++ b/graphblas/io.py @@ -75,6 +75,11 @@ def from_numpy(m): m : np.ndarray Input array + See Also + -------- + Matrix.from_dense + Vector.from_dense + Returns ------- Vector or Matrix @@ -316,6 +321,12 @@ def to_numpy(m): m : Vector or Matrix GraphBLAS Vector or Matrix + See Also + -------- + to_scipy_sparse + Matrix.to_dense + Vector.to_dense + Returns ------- np.ndarray From d8902a1f048a49dab3bfff1f7315865c1b2f220e Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 6 Feb 2023 22:21:41 -0600 Subject: [PATCH 04/14] bump ruff --- .pre-commit-config.yaml | 2 +- pyproject.toml | 2 ++ scripts/check_versions.sh | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4eb2db4d0..4a8a66fb6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -71,7 +71,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.241 + rev: v0.0.242 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/pyproject.toml b/pyproject.toml index d23d2079a..a3f7e57ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -244,6 +244,8 @@ ignore = [ # Intentionally ignored "COM812", # Trailing comma missing "D203", # 1 blank line required before class docstring (Note: conflicts with D211, which is preferred) + "PLR0911", # Too many return statements + "PLR0912", # Too many branches "PLR0913", # Too many arguments to function call "PLR0915", # Too many statements "PLR2004", # Magic number used in comparison, consider replacing magic with a constant variable diff --git a/scripts/check_versions.sh b/scripts/check_versions.sh index f1fb8246e..b997d4d77 100755 --- a/scripts/check_versions.sh +++ b/scripts/check_versions.sh @@ -3,7 +3,7 @@ # Use, adjust, copy/paste, etc. as necessary to answer your questions. # This may be helpful when updating dependency versions in CI. # Tip: add `--json` for more information. -conda search 'numpy[channel=conda-forge]>=1.24.1' +conda search 'numpy[channel=conda-forge]>=1.24.2' conda search 'pandas[channel=conda-forge]>=1.5.3' conda search 'scipy[channel=conda-forge]>=1.10.0' conda search 'networkx[channel=conda-forge]>=3.0' From e4fa9130dcee859d8874f649a124323aa77c8983 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 6 Feb 2023 23:03:51 -0600 Subject: [PATCH 05/14] That was fast! --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4a8a66fb6..383811c49 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -71,7 +71,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.242 + rev: v0.0.243 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint From 0b2ac41064b536f10437c50bee2cbe772e51a8b5 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 7 Feb 2023 13:23:34 -0600 Subject: [PATCH 06/14] Add warning that `to_dense` can create very large arrays --- graphblas/core/matrix.py | 8 ++++++-- graphblas/core/vector.py | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index 9dda3171d..d47b23cb7 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -1412,6 +1412,9 @@ def from_dense(cls, values, dtype=None, *, nrows=None, ncols=None, name=None): def to_dense(self, fill_value=None, dtype=None): """Convert Matrix to NumPy array of the same shape with missing values filled. + .. warning:: + This can create very large arrays that require a lot of memory; please use caution. + Parameters ---------- fill_value : scalar, optional @@ -1458,8 +1461,9 @@ def to_dense(self, fill_value=None, dtype=None): ) dtype = unify(fill_value.dtype, self.dtype, is_left_scalar=True) - rv = self.dup(dtype, name="to_dense") - rv(~rv.S) << fill_value + rv = self.dup(dtype, clear=True, name="to_dense") + rv << fill_value + rv(self.S) << self return rv.to_dense() @classmethod diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index 39e79f53a..59814d8db 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -855,6 +855,9 @@ def from_dense(cls, values, dtype=None, *, size=None, name=None): def to_dense(self, fill_value=None, dtype=None): """Convert Vector to NumPy array of the same shape with missing values filled. + .. warning:: + This can create very large arrays that require a lot of memory; please use caution. + Parameters ---------- fill_value : scalar, optional @@ -898,8 +901,9 @@ def to_dense(self, fill_value=None, dtype=None): ) dtype = unify(fill_value.dtype, self.dtype, is_left_scalar=True) - rv = self.dup(dtype, name="to_dense") - rv(~rv.S) << fill_value + rv = self.dup(dtype, clear=True, name="to_dense") + rv << fill_value + rv(self.S) << self return rv.to_dense() @property From 687d44330e3a4b6321763a24bc17674543297680 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 7 Feb 2023 14:19:15 -0600 Subject: [PATCH 07/14] Add a test --- graphblas/core/matrix.py | 3 ++- graphblas/core/vector.py | 3 ++- graphblas/tests/test_matrix.py | 4 ++++ graphblas/tests/test_vector.py | 4 ++++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index d47b23cb7..6ae27b4ae 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -1372,7 +1372,8 @@ def from_dense(cls, values, dtype=None, *, nrows=None, ncols=None, name=None): raise TypeError( "nrows and ncols must be given when creating a dense Matrix from a scalar" ) - if backend == "suitesparse": + if backend == "suitesparse" and not dtype._is_udt: + # `Matrix.ss.import_fullr` does not yet handle all cases with UDTs # Should we try to handle F-contiguous data w/o a copy? return cls.ss.import_fullr( values, dtype=dtype, nrows=nrows, ncols=ncols, is_iso=True, name=name diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index 59814d8db..847a0d5dc 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -828,7 +828,8 @@ def from_dense(cls, values, dtype=None, *, size=None, name=None): if values.ndim == 0: if size is None: raise TypeError("size must be given when creating a dense Vector from a scalar") - if backend == "suitesparse": + if backend == "suitesparse" and not dtype._is_udt: + # `Vector.ss.import_full` does not yet handle all cases with UDTs return cls.ss.import_full(values, dtype=dtype, size=size, is_iso=True, name=name) rv = cls(dtype, size=size, name=name) rv << values diff --git a/graphblas/tests/test_matrix.py b/graphblas/tests/test_matrix.py index 971c4080d..bac52f1ef 100644 --- a/graphblas/tests/test_matrix.py +++ b/graphblas/tests/test_matrix.py @@ -4145,6 +4145,10 @@ def test_to_dense_from_dense(): Matrix.from_dense(np.arange(24).reshape(2, 3, 4), int) with pytest.raises(ValueError, match=">2d array"): Matrix.from_dense(np.arange(6).reshape(2, 3), "INT64[2]") + A = Matrix.from_dense(1, "INT64[2]", nrows=3, ncols=4) + B = Matrix("INT64[2]", nrows=3, ncols=4) + B << [1, 1] + assert A.isequal(B, check_dtype=True) @pytest.mark.skipif("not suitesparse") diff --git a/graphblas/tests/test_vector.py b/graphblas/tests/test_vector.py index 066aa52b3..4fe9f4b51 100644 --- a/graphblas/tests/test_vector.py +++ b/graphblas/tests/test_vector.py @@ -2517,6 +2517,10 @@ def test_to_dense_from_dense(): Vector.from_dense(np.arange(6).reshape(2, 3), int) with pytest.raises(ValueError, match=">1d array"): Vector.from_dense(np.arange(6), "INT64[2]") + v = Vector.from_dense(1, "INT64[2]", size=3) + w = Vector("INT64[2]", size=3) + w << [1, 1] + assert v.isequal(w, check_dtype=True) @pytest.mark.skipif("not suitesparse") From fe9d2c052507afd3b1d3b87898b1c9007c4ae4e4 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 8 Feb 2023 22:27:18 -0600 Subject: [PATCH 08/14] Add `from_iso_value`, `missing_value=` to `from_dense`, and deprecate `io.from_numpy` --- .pre-commit-config.yaml | 2 +- graphblas/core/matrix.py | 65 +++++++++++++---------------- graphblas/core/vector.py | 39 ++++++++--------- graphblas/io.py | 16 +++++++ graphblas/tests/test_io.py | 33 +++++++++------ graphblas/tests/test_matrix.py | 19 +++++---- graphblas/tests/test_prefix_scan.py | 15 ++----- graphblas/tests/test_vector.py | 21 ++++++---- 8 files changed, 116 insertions(+), 94 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 383811c49..f6ec054ab 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -71,7 +71,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.243 + rev: v0.0.244 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index 386715af0..7fd575e35 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -1329,22 +1329,28 @@ def from_dcsc( return cls.from_coo(row_indices, cols, values, dtype, nrows=nrows, ncols=ncols, name=name) @classmethod - def from_dense(cls, values, dtype=None, *, nrows=None, ncols=None, name=None): - """Create a fully dense Matrix from a NumPy array or scalar. + def from_iso_value(cls, value, nrows, ncols, dtype=None, *, name=None): + value, dtype = values_to_numpy_buffer(value, dtype, subarray_after=0) + if backend == "suitesparse" and not dtype._is_udt: + # `Matrix.ss.import_fullr` does not yet handle all cases with UDTs + return cls.ss.import_fullr( + value, dtype=dtype, nrows=nrows, ncols=ncols, is_iso=True, name=name + ) + rv = cls(dtype, nrows=nrows, ncols=ncols, name=name) + rv << value + return rv + + @classmethod + def from_dense(cls, values, missing_value=None, *, dtype=None, name=None): + """Create a fully dense Matrix from a NumPy array or list of lists. Parameters ---------- - values : list or np.ndarray or scalar - List of values. If a scalar is provided, all values will be set to this single value. + values : list or np.ndarray + List of values. dtype : Data type of the Matrix. If not provided, the values will be inspected to choose an appropriate dtype. - nrows : int, optional - Number of rows of the Matrix. By default, nrows is determined from - the shape of the input array. Nrows is required for scalar inputs. - ncols : int, optional - Number of cols of the Matrix. By default, ncols is determined from - the shape of the input array. Ncols is required for scalar inputs. name : str, optional Name to give the Matrix. @@ -1353,7 +1359,6 @@ def from_dense(cls, values, dtype=None, *, nrows=None, ncols=None, name=None): from_coo from_edgelist to_dense - io.from_numpy Returns ------- @@ -1361,19 +1366,10 @@ def from_dense(cls, values, dtype=None, *, nrows=None, ncols=None, name=None): """ values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=2) if values.ndim == 0: - if nrows is None or ncols is None: - raise TypeError( - "nrows and ncols must be given when creating a dense Matrix from a scalar" - ) - if backend == "suitesparse" and not dtype._is_udt: - # `Matrix.ss.import_fullr` does not yet handle all cases with UDTs - # Should we try to handle F-contiguous data w/o a copy? - return cls.ss.import_fullr( - values, dtype=dtype, nrows=nrows, ncols=ncols, is_iso=True, name=name - ) - rv = cls(dtype, nrows=nrows, ncols=ncols, name=name) - rv << values - return rv + raise TypeError( + "values must be an array or list, not a scalar. " + "To create a dense Matrix from a scalar, use `Matrix.from_iso_value`." + ) if values.ndim == 1: raise ValueError("A 2d array or scalar is required to create a dense Matrix") if values.ndim == 2 and dtype.np_type.subdtype is not None: @@ -1381,14 +1377,14 @@ def from_dense(cls, values, dtype=None, *, nrows=None, ncols=None, name=None): if values.ndim > 2 and dtype.np_type.subdtype is None: raise ValueError(f"values array must be 2d to create dense Matrix with dtype {dtype}") if backend == "suitesparse": + # Should we try to handle F-contiguous data w/o a copy? rv = cls.ss.import_fullr(values, dtype=dtype, name=name) - nrows2, ncols2 = rv.shape else: - nrows2, ncols2, *rest = values.shape - indptr = np.arange(0, nrows2 * ncols2 + 1, ncols2, dtype=np.uint64) - cols = np.repeat(np.arange(ncols2, dtype=np.uint64)[None, :], nrows2, 0).ravel() + nrows, ncols, *rest = values.shape + indptr = np.arange(0, nrows * ncols + 1, ncols, dtype=np.uint64) + cols = np.repeat(np.arange(ncols, dtype=np.uint64)[None, :], nrows, 0).ravel() if rest: # sub-array dtype - values = values.reshape(nrows2 * ncols2, *rest) + values = values.reshape(nrows * ncols, *rest) else: values = values.ravel() rv = cls.from_csr( @@ -1396,11 +1392,11 @@ def from_dense(cls, values, dtype=None, *, nrows=None, ncols=None, name=None): cols, values, dtype, - ncols=ncols2, + ncols=ncols, name=name, ) - if nrows is not None and nrows != nrows2 or ncols is not None and ncols != ncols2: - rv.resize(nrows2 if nrows is None else nrows, ncols2 if ncols is None else ncols) + if missing_value is not None: + rv << select.valuene(rv, missing_value) return rv def to_dense(self, fill_value=None, dtype=None): @@ -1422,7 +1418,6 @@ def to_dense(self, fill_value=None, dtype=None): to_dicts to_edgelist from_dense - io.to_numpy Returns ------- @@ -2952,7 +2947,7 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o # C[i, J](m) << [1, 2, 3] expected_shape = (rowsize or colsize,) try: - vals = Vector.from_dense(values, dtype) + vals = Vector.from_dense(values, dtype=dtype) except Exception: # pragma: no cover (safety) vals = None else: @@ -2964,7 +2959,7 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o # C[I, J](M) << [[1, 2, 3], [4, 5, 6]] expected_shape = (rowsize, colsize) try: - vals = Matrix.from_dense(values, dtype) + vals = Matrix.from_dense(values, dtype=dtype) except Exception: vals = None else: diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index 8c6d05cc1..34dfb15f3 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -789,19 +789,26 @@ def from_pairs(cls, pairs, dtype=None, *, size=None, dup_op=None, name=None): return cls.from_coo(indices, values, dtype, size=size, dup_op=dup_op, name=name) @classmethod - def from_dense(cls, values, dtype=None, *, size=None, name=None): + def from_iso_value(cls, value, size, dtype=None, *, name=None): + value, dtype = values_to_numpy_buffer(value, dtype, subarray_after=0) + if backend == "suitesparse" and not dtype._is_udt: + # `Vector.ss.import_full` does not yet handle all cases with UDTs + return cls.ss.import_full(value, dtype=dtype, size=size, is_iso=True, name=name) + rv = cls(dtype, size, name=name) + rv << value + return rv + + @classmethod + def from_dense(cls, values, missing_value=None, *, dtype=None, name=None): """Create a fully dense Vector from a NumPy array or scalar. Parameters ---------- - values : list or np.ndarray or scalar - List of values. If a scalar is provided, all values will be set to this single value. + values : list or np.ndarray + List of values. dtype : Data type of the Vector. If not provided, the values will be inspected to choose an appropriate dtype. - size : int, optional - Size of the Vector. By default, the size of the Vector is the size of the input array. - Size is required for scalar inputs. name : str, optional Name to give the Vector. @@ -811,7 +818,6 @@ def from_dense(cls, values, dtype=None, *, size=None, name=None): from_dict from_pairs to_dense - io.from_numpy Returns ------- @@ -819,14 +825,10 @@ def from_dense(cls, values, dtype=None, *, size=None, name=None): """ values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1) if values.ndim == 0: - if size is None: - raise TypeError("size must be given when creating a dense Vector from a scalar") - if backend == "suitesparse" and not dtype._is_udt: - # `Vector.ss.import_full` does not yet handle all cases with UDTs - return cls.ss.import_full(values, dtype=dtype, size=size, is_iso=True, name=name) - rv = cls(dtype, size=size, name=name) - rv << values - return rv + raise TypeError( + "values must be an array or list, not a scalar. " + "To create a dense Vector from a scalar, use `Vector.from_iso_value`." + ) if values.ndim == 1 and dtype.np_type.subdtype is not None: raise ValueError("A >1d array is required to create a dense Vector with subdtype") if values.ndim > 1 and dtype.np_type.subdtype is None: @@ -842,8 +844,8 @@ def from_dense(cls, values, dtype=None, *, size=None, name=None): size=values.shape[0], name=name, ) - if size is not None and size != rv._size: - rv.resize(size) + if missing_value is not None: + rv << select.valuene(rv, missing_value) return rv def to_dense(self, fill_value=None, dtype=None): @@ -864,7 +866,6 @@ def to_dense(self, fill_value=None, dtype=None): to_coo to_dict from_dense - io.to_numpy Returns ------- @@ -1760,7 +1761,7 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o else: shape = values.shape try: - vals = Vector.from_dense(values, dtype) + vals = Vector.from_dense(values, dtype=dtype) except Exception: # pragma: no cover (safety) vals = None else: diff --git a/graphblas/io.py b/graphblas/io.py index ab4b599e9..d1796e788 100644 --- a/graphblas/io.py +++ b/graphblas/io.py @@ -62,6 +62,9 @@ def from_networkx(G, nodelist=None, dtype=None, weight="weight", name=None): def from_numpy(m): """Create a sparse Vector or Matrix from a dense numpy array. + .. deprecated:: 2023.2.0 + TODO + A value of 0 is considered as "missing". - m.ndim == 1 returns a `Vector` @@ -84,6 +87,11 @@ def from_numpy(m): ------- Vector or Matrix """ + _warn( + "`graphblas.io.from_numpy` is deprecated; " + "use `Matrix.from_dense` and `Vector.from_dense` instead.", + DeprecationWarning, + ) if m.ndim > 2: raise _GraphblasException("m.ndim must be <= 2") @@ -312,6 +320,9 @@ def to_networkx(m, edge_attribute="weight"): def to_numpy(m): """Create a dense numpy array from a sparse Vector or Matrix. + .. deprecated:: 2023.2.0 + TODO + Missing values will become 0 in the output. numpy dtype will match the GraphBLAS dtype @@ -331,6 +342,11 @@ def to_numpy(m): ------- np.ndarray """ + _warn( + "`graphblas.io.to_numpy` is deprecated; " + "use `Matrix.to_dense` and `Vector.to_dense` instead.", + DeprecationWarning, + ) try: import scipy # noqa: F401 except ImportError: # pragma: no cover (import) diff --git a/graphblas/tests/test_io.py b/graphblas/tests/test_io.py index eb743daaa..0f78430c1 100644 --- a/graphblas/tests/test_io.py +++ b/graphblas/tests/test_io.py @@ -4,7 +4,7 @@ import pytest import graphblas as gb -from graphblas import Matrix, dtypes +from graphblas import Matrix, Vector, dtypes from graphblas.exceptions import GraphblasException try: @@ -34,12 +34,25 @@ suitesparse = gb.backend == "suitesparse" +@pytest.mark.skipif("not ss") +def test_deprecated(): + a = np.array([0.0, 2.0, 4.1]) + with pytest.warns(DeprecationWarning): + v = gb.io.from_numpy(a) + assert v.isequal(gb.Vector.from_coo([1, 2], [2.0, 4.1]), check_dtype=True) + with pytest.warns(DeprecationWarning): + a2 = gb.io.to_numpy(v) + np.testing.assert_array_equal(a, a2) + with pytest.warns(DeprecationWarning): + gb.io.to_scipy_sparse_matrix(v, "coo") + + @pytest.mark.skipif("not ss") def test_vector_to_from_numpy(): a = np.array([0.0, 2.0, 4.1]) - v = gb.io.from_numpy(a) + v = Vector.from_dense(a, 0) assert v.isequal(gb.Vector.from_coo([1, 2], [2.0, 4.1]), check_dtype=True) - a2 = gb.io.to_numpy(v) + a2 = v.to_dense(0) np.testing.assert_array_equal(a, a2) csr = gb.io.to_scipy_sparse(v, "csr") @@ -59,17 +72,14 @@ def test_vector_to_from_numpy(): assert coo.nnz == 2 np.testing.assert_array_equal(coo.toarray(), np.array([[0.0, 2.0, 4.1]])) - with pytest.warns(DeprecationWarning): - coo = gb.io.to_scipy_sparse_matrix(v, "coo") - @pytest.mark.skipif("not ss") @pytest.mark.parametrize("a", [np.array([7, 0]), np.array([0, 0]), np.array([])]) def test_vector_to_from_numpy_correct_size(a): # Make sure we use the right size - v = gb.io.from_numpy(a) + v = Vector.from_dense(a, 0) assert v.shape == a.shape - b = gb.io.to_numpy(v) + b = v.to_dense(0) np.testing.assert_array_equal(a, b) csr = gb.io.to_scipy_sparse(v, "csr") np.testing.assert_array_equal(a[None, :], csr.toarray()) @@ -80,9 +90,9 @@ def test_vector_to_from_numpy_correct_size(a): @pytest.mark.skipif("not ss") def test_matrix_to_from_numpy(): a = np.array([[1.0, 0.0], [2.0, 3.7]]) - M = gb.io.from_numpy(a) + M = Matrix.from_dense(a, 0) assert M.isequal(gb.Matrix.from_coo([0, 1, 1], [0, 0, 1], [1.0, 2.0, 3.7]), check_dtype=True) - a2 = gb.io.to_numpy(M) + a2 = M.to_dense(0) np.testing.assert_array_equal(a, a2) for format in ["csr", "csc", "coo"]: @@ -96,9 +106,6 @@ def test_matrix_to_from_numpy(): with pytest.raises(ValueError, match="Invalid format"): gb.io.to_scipy_sparse(M, "bad format") - with pytest.raises(GraphblasException, match="ndim must be"): - gb.io.from_numpy(np.array([[[1.0, 0.0], [2.0, 3.7]]])) - @pytest.mark.skipif("not nx or not ss") def test_matrix_to_from_networkx(): diff --git a/graphblas/tests/test_matrix.py b/graphblas/tests/test_matrix.py index 9f9b9921c..37e870b7d 100644 --- a/graphblas/tests/test_matrix.py +++ b/graphblas/tests/test_matrix.py @@ -2897,6 +2897,7 @@ def test_expr_is_like_matrix(A): "from_dcsr", "from_dense", "from_dicts", + "from_iso_value", "from_edgelist", "from_values", "resize", @@ -2961,6 +2962,7 @@ def test_index_expr_is_like_matrix(A): "from_dense", "from_dicts", "from_edgelist", + "from_iso_value", "from_values", "resize", } @@ -4108,7 +4110,7 @@ def test_to_from_edgelist(A): def test_to_dense_from_dense(): - A = Matrix.from_dense(1, nrows=2, ncols=3) + A = Matrix.from_iso_value(1, nrows=2, ncols=3) B = Matrix(int, nrows=2, ncols=3) B << 1 assert A.isequal(B, check_dtype=True) @@ -4123,12 +4125,13 @@ def test_to_dense_from_dense(): assert_array_equal(A.to_dense(6.5, int), [[6, 1, 2], [3, 4, 5]]) assert_array_equal(A.to_dense(Scalar.from_value(6.5)), [[6.5, 1, 2], [3, 4, 5]]) - A = Matrix.from_dense(np.arange(6).reshape(2, 3), nrows=3, ncols=4) + A = Matrix.from_dense(np.arange(6).reshape(2, 3)) + A.resize(3, 4) B.resize(3, 4) assert A.isequal(B, check_dtype=True) assert_array_equal(A.to_dense(10), [[0, 1, 2, 10], [3, 4, 5, 10], [10, 10, 10, 10]]) - with pytest.raises(TypeError, match="must be given"): - Matrix.from_dense(1, nrows=2) + with pytest.raises(TypeError, match="missing"): + Matrix.from_iso_value(1, nrows=2) with pytest.raises(ValueError, match="is required to create a dense"): Matrix.from_dense([1, 2, 3]) with pytest.raises(TypeError, match="fill_value must be given"): @@ -4136,10 +4139,12 @@ def test_to_dense_from_dense(): with pytest.raises(TypeError, match="Bad type for keyword argument `fill_value"): A.to_dense(object()) with pytest.raises(ValueError, match="must be 2d"): - Matrix.from_dense(np.arange(24).reshape(2, 3, 4), int) + Matrix.from_dense(np.arange(24).reshape(2, 3, 4), dtype=int) with pytest.raises(ValueError, match=">2d array"): - Matrix.from_dense(np.arange(6).reshape(2, 3), "INT64[2]") - A = Matrix.from_dense(1, "INT64[2]", nrows=3, ncols=4) + Matrix.from_dense(np.arange(6).reshape(2, 3), dtype="INT64[2]") + with pytest.raises(TypeError, match="from_iso_value"): + Matrix.from_dense(1) + A = Matrix.from_iso_value(1, dtype="INT64[2]", nrows=3, ncols=4) B = Matrix("INT64[2]", nrows=3, ncols=4) B << [1, 1] assert A.isequal(B, check_dtype=True) diff --git a/graphblas/tests/test_prefix_scan.py b/graphblas/tests/test_prefix_scan.py index 83742c0e0..ea169a632 100644 --- a/graphblas/tests/test_prefix_scan.py +++ b/graphblas/tests/test_prefix_scan.py @@ -1,21 +1,14 @@ import numpy as np import pytest -import graphblas as gb from graphblas import backend, binary, monoid from graphblas import Matrix, Vector # isort:skip (for dask-graphblas) -try: - # gb.io.to_numpy currently requires scipy - import scipy.sparse as ss -except ImportError: # pragma: no cover (import) - ss = None - suitesparse = backend == "suitesparse" -@pytest.mark.skipif("not ss or not suitesparse") +@pytest.mark.skipif("not suitesparse") @pytest.mark.parametrize("method", ["scan_rowwise", "scan_columnwise"]) @pytest.mark.parametrize("length", list(range(34))) @pytest.mark.parametrize("do_random", [False, True]) @@ -38,7 +31,7 @@ def test_scan_matrix(method, length, do_random): M = M.T.new(name="A") R = M.ss.scan(binary.plus, order="col").T.new() - result = gb.io.to_numpy(R) + result = R.to_dense(0) try: np.testing.assert_array_equal(result, expected) except Exception: # pragma: no cover (debug) @@ -46,7 +39,7 @@ def test_scan_matrix(method, length, do_random): raise -@pytest.mark.skipif("not ss or not suitesparse") +@pytest.mark.skipif("not suitesparse") @pytest.mark.parametrize("length", list(range(34))) @pytest.mark.parametrize("do_random", [False, True]) def test_scan_vector(length, do_random): @@ -62,7 +55,7 @@ def test_scan_vector(length, do_random): v = Vector.ss.import_full(values=a) expected = a.cumsum() r = v.ss.scan() - result = gb.io.to_numpy(r) + result = r.to_dense(0) try: np.testing.assert_array_equal(result, expected) except Exception: # pragma: no cover (debug) diff --git a/graphblas/tests/test_vector.py b/graphblas/tests/test_vector.py index 834435f33..9280e4612 100644 --- a/graphblas/tests/test_vector.py +++ b/graphblas/tests/test_vector.py @@ -1618,6 +1618,7 @@ def test_expr_is_like_vector(v): "from_coo", "from_dense", "from_dict", + "from_iso_value", "from_pairs", "from_values", "resize", @@ -1666,6 +1667,7 @@ def test_index_expr_is_like_vector(v): "from_coo", "from_dense", "from_dict", + "from_iso_value", "from_pairs", "from_values", "resize", @@ -2489,7 +2491,7 @@ def test_from_pairs(): def test_to_dense_from_dense(): - v = Vector.from_dense(1, size=3) + v = Vector.from_iso_value(1, size=3) w = Vector.from_coo([0, 1, 2], 1) assert v.isequal(w, check_dtype=True) assert_array_equal(v.to_dense(), [1, 1, 1]) @@ -2497,7 +2499,8 @@ def test_to_dense_from_dense(): w = Vector.from_coo([0, 1, 2], [1, 2, 3]) assert v.isequal(w, check_dtype=True) assert_array_equal(v.to_dense(dtype=int), [1, 2, 3]) - v = Vector.from_dense([1, 2, 3], size=4) + v = Vector.from_dense([1, 2, 3]) + v.resize(4) w = Vector.from_coo([0, 1, 2], [1, 2, 3], size=4) assert v.isequal(w, check_dtype=True) assert_array_equal(v.to_dense(4.5, dtype=float), [1, 2, 3, 4.5]) @@ -2507,17 +2510,19 @@ def test_to_dense_from_dense(): v.to_dense() with pytest.raises(TypeError, match="Bad type for keyword argument `fill_value"): v.to_dense(object()) - v = Vector.from_dense([1, 2], size=2) + v = Vector.from_dense([1, 2]) w = Vector.from_coo([0, 1], [1, 2], size=2) assert v.isequal(w, check_dtype=True) assert_array_equal(v.to_dense(dtype=float), [1.0, 2]) - with pytest.raises(TypeError, match="size must be given"): - Vector.from_dense(1) + with pytest.raises(TypeError, match="missing"): + Vector.from_iso_value(1) with pytest.raises(ValueError, match="must be 1d"): - Vector.from_dense(np.arange(6).reshape(2, 3), int) + Vector.from_dense(np.arange(6).reshape(2, 3), dtype=int) with pytest.raises(ValueError, match=">1d array"): - Vector.from_dense(np.arange(6), "INT64[2]") - v = Vector.from_dense(1, "INT64[2]", size=3) + Vector.from_dense(np.arange(6), dtype="INT64[2]") + with pytest.raises(TypeError, match="from_iso_value"): + Vector.from_dense(1) + v = Vector.from_iso_value(1, dtype="INT64[2]", size=3) w = Vector("INT64[2]", size=3) w << [1, 1] assert v.isequal(w, check_dtype=True) From 0a93f9390f6075f5befe99fb1571435f503b4ac8 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Thu, 9 Feb 2023 19:37:36 -0600 Subject: [PATCH 09/14] Update documentation --- graphblas/core/matrix.py | 58 ++++++++++++++++++++++++++++++---------- graphblas/core/vector.py | 55 ++++++++++++++++++++++++++++--------- graphblas/io.py | 14 ++++++---- 3 files changed, 96 insertions(+), 31 deletions(-) diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index 7fd575e35..130eceb32 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -1329,7 +1329,33 @@ def from_dcsc( return cls.from_coo(row_indices, cols, values, dtype, nrows=nrows, ncols=ncols, name=name) @classmethod - def from_iso_value(cls, value, nrows, ncols, dtype=None, *, name=None): + def from_iso_value(cls, value, nrows, ncols, dtype=None, *, name=None, **opts): + """Create a fully dense Matrix filled with a scalar value. + + Parameters + ---------- + value : scalar + Scalar value used to fill the Matrix. + nrows : int + Number of rows. + ncols : int + Number of columns. + dtype : DataType, optional + Data type of the Matrix. If not provided, the scalar value will be + inspected to choose an appropriate dtype. + name : str, optional + Name to give the Matrix. + + See Also + -------- + from_coo + from_dense + from_edgelist + + Returns + ------- + Matrix + """ value, dtype = values_to_numpy_buffer(value, dtype, subarray_after=0) if backend == "suitesparse" and not dtype._is_udt: # `Matrix.ss.import_fullr` does not yet handle all cases with UDTs @@ -1337,18 +1363,21 @@ def from_iso_value(cls, value, nrows, ncols, dtype=None, *, name=None): value, dtype=dtype, nrows=nrows, ncols=ncols, is_iso=True, name=name ) rv = cls(dtype, nrows=nrows, ncols=ncols, name=name) - rv << value + rv(**opts) << value return rv @classmethod - def from_dense(cls, values, missing_value=None, *, dtype=None, name=None): - """Create a fully dense Matrix from a NumPy array or list of lists. + def from_dense(cls, values, missing_value=None, *, dtype=None, name=None, **opts): + """Create a Matrix from a NumPy array or list of lists. Parameters ---------- values : list or np.ndarray List of values. - dtype : + missing_value : scalar, optional + A scalar value to consider "missing"; elements of this value will be dropped. + If None, then the resulting Matrix will be dense. + dtype : DataType, optional Data type of the Matrix. If not provided, the values will be inspected to choose an appropriate dtype. name : str, optional @@ -1358,6 +1387,7 @@ def from_dense(cls, values, missing_value=None, *, dtype=None, name=None): -------- from_coo from_edgelist + from_iso_value to_dense Returns @@ -1396,10 +1426,10 @@ def from_dense(cls, values, missing_value=None, *, dtype=None, name=None): name=name, ) if missing_value is not None: - rv << select.valuene(rv, missing_value) + rv(**opts) << select.valuene(rv, missing_value) return rv - def to_dense(self, fill_value=None, dtype=None): + def to_dense(self, fill_value=None, dtype=None, **opts): """Convert Matrix to NumPy array of the same shape with missing values filled. .. warning:: @@ -1409,7 +1439,7 @@ def to_dense(self, fill_value=None, dtype=None): ---------- fill_value : scalar, optional Value used to fill missing values. This is required if there are missing values. - dtype : + dtype : DataType, optional Requested dtype for the output values array. See Also @@ -1450,10 +1480,10 @@ def to_dense(self, fill_value=None, dtype=None): ) dtype = unify(fill_value.dtype, self.dtype, is_left_scalar=True) - rv = self.dup(dtype, clear=True, name="to_dense") - rv << fill_value - rv(self.S) << self - return rv.to_dense() + rv = self.dup(dtype, clear=True, name="to_dense", **opts) + rv(**opts) << fill_value + rv(self.S, **opts) << self + return rv.to_dense(**opts) @classmethod def from_dicts( @@ -3535,8 +3565,8 @@ def to_dcsc(self, dtype=None, *, sort=True): return self._matrix.to_dcsr(dtype, sort=sort) @wrapdoc(Matrix.to_dense) - def to_dense(self, fill_value=None, dtype=None): - rv = self._matrix.to_dense(fill_value, dtype) + def to_dense(self, fill_value=None, dtype=None, **opts): + rv = self._matrix.to_dense(fill_value, dtype, **opts) return rv.swapaxes(0, 1) @wrapdoc(Matrix.to_dicts) diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index 34dfb15f3..d6db11a97 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -789,24 +789,54 @@ def from_pairs(cls, pairs, dtype=None, *, size=None, dup_op=None, name=None): return cls.from_coo(indices, values, dtype, size=size, dup_op=dup_op, name=name) @classmethod - def from_iso_value(cls, value, size, dtype=None, *, name=None): + def from_iso_value(cls, value, size, dtype=None, *, name=None, **opts): + """Create a fully dense Vector filled with a scalar value. + + Parameters + ---------- + value : scalar + Scalar value used to fill the Vector. + nrows : int + Number of rows. + ncols : int + Number of columns. + dtype : DataType, optional + Data type of the Vector. If not provided, the scalar value will be + inspected to choose an appropriate dtype. + name : str, optional + Name to give the Vector. + + See Also + -------- + from_coo + from_dense + from_dict + from_pairs + + Returns + ------- + Vector + """ value, dtype = values_to_numpy_buffer(value, dtype, subarray_after=0) if backend == "suitesparse" and not dtype._is_udt: # `Vector.ss.import_full` does not yet handle all cases with UDTs return cls.ss.import_full(value, dtype=dtype, size=size, is_iso=True, name=name) rv = cls(dtype, size, name=name) - rv << value + rv(**opts) << value return rv @classmethod - def from_dense(cls, values, missing_value=None, *, dtype=None, name=None): - """Create a fully dense Vector from a NumPy array or scalar. + def from_dense(cls, values, missing_value=None, *, dtype=None, name=None, **opts): + """Create a Vector from a NumPy array or list. Parameters ---------- values : list or np.ndarray List of values. - dtype : + missing_value : scalar, optional + A scalar value to consider "missing"; elements of this value will be dropped. + If None, then the resulting Vector will be dense. + dtype : DataType, optional Data type of the Vector. If not provided, the values will be inspected to choose an appropriate dtype. name : str, optional @@ -816,6 +846,7 @@ def from_dense(cls, values, missing_value=None, *, dtype=None, name=None): -------- from_coo from_dict + from_iso_value from_pairs to_dense @@ -845,10 +876,10 @@ def from_dense(cls, values, missing_value=None, *, dtype=None, name=None): name=name, ) if missing_value is not None: - rv << select.valuene(rv, missing_value) + rv(**opts) << select.valuene(rv, missing_value) return rv - def to_dense(self, fill_value=None, dtype=None): + def to_dense(self, fill_value=None, dtype=None, **opts): """Convert Vector to NumPy array of the same shape with missing values filled. .. warning:: @@ -858,7 +889,7 @@ def to_dense(self, fill_value=None, dtype=None): ---------- fill_value : scalar, optional Value used to fill missing values. This is required if there are missing values. - dtype : + dtype : DataType, optional Requested dtype for the output values array. See Also @@ -896,10 +927,10 @@ def to_dense(self, fill_value=None, dtype=None): ) dtype = unify(fill_value.dtype, self.dtype, is_left_scalar=True) - rv = self.dup(dtype, clear=True, name="to_dense") - rv << fill_value - rv(self.S) << self - return rv.to_dense() + rv = self.dup(dtype, clear=True, name="to_dense", **opts) + rv(**opts) << fill_value + rv(self.S, **opts) << self + return rv.to_dense(**opts) @property def _carg(self): diff --git a/graphblas/io.py b/graphblas/io.py index d1796e788..0aeb174e8 100644 --- a/graphblas/io.py +++ b/graphblas/io.py @@ -58,12 +58,13 @@ def from_networkx(G, nodelist=None, dtype=None, weight="weight", name=None): return from_scipy_sparse(A, name=name) -# TODO: add parameter to indicate empty value (default is 0 and NaN) -def from_numpy(m): +def from_numpy(m): # pragma: no cover (deprecated) """Create a sparse Vector or Matrix from a dense numpy array. .. deprecated:: 2023.2.0 - TODO + `from_numpy` will be removed in a future release. + Use `Vector.from_dense` or `Matrix.from_dense` instead. + Will be removed in version 2023.10.0 or later A value of 0 is considered as "missing". @@ -82,6 +83,7 @@ def from_numpy(m): -------- Matrix.from_dense Vector.from_dense + from_scipy_sparse Returns ------- @@ -317,11 +319,13 @@ def to_networkx(m, edge_attribute="weight"): return G -def to_numpy(m): +def to_numpy(m): # pragma: no cover (deprecated) """Create a dense numpy array from a sparse Vector or Matrix. .. deprecated:: 2023.2.0 - TODO + `to_numpy` will be removed in a future release. + Use `Vector.to_dense` or `Matrix.to_dense` instead. + Will be removed in version 2023.10.0 or later Missing values will become 0 in the output. From 0bd936a54ac6908adfe9a35dbdd2abda1c97e2f7 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Thu, 9 Feb 2023 22:09:24 -0600 Subject: [PATCH 10/14] better (haha, what was I thinking before?) --- graphblas/core/matrix.py | 17 ++++++++++++++++- graphblas/core/vector.py | 17 ++++++++++++++++- graphblas/tests/test_matrix.py | 23 ++++++++++++++++------- graphblas/tests/test_vector.py | 22 +++++++++++++++------- 4 files changed, 63 insertions(+), 16 deletions(-) diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index 130eceb32..a6a7cbf36 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -1356,7 +1356,22 @@ def from_iso_value(cls, value, nrows, ncols, dtype=None, *, name=None, **opts): ------- Matrix """ - value, dtype = values_to_numpy_buffer(value, dtype, subarray_after=0) + if type(value) is not Scalar: + try: + value = Scalar.from_value(value, dtype, is_cscalar=None, name="") + except TypeError: + value = cls()._expect_type( + value, + Scalar, + within="from_iso_value", + keyword_name="value", + extra_message="Literal scalars also accepted.", + ) + dtype = value.dtype + elif dtype is None: + dtype = value.dtype + else: + dtype = lookup_dtype(dtype) if backend == "suitesparse" and not dtype._is_udt: # `Matrix.ss.import_fullr` does not yet handle all cases with UDTs return cls.ss.import_fullr( diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index d6db11a97..14a839452 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -817,7 +817,22 @@ def from_iso_value(cls, value, size, dtype=None, *, name=None, **opts): ------- Vector """ - value, dtype = values_to_numpy_buffer(value, dtype, subarray_after=0) + if type(value) is not Scalar: + try: + value = Scalar.from_value(value, dtype, is_cscalar=None, name="") + except TypeError: + value = cls()._expect_type( + value, + Scalar, + within="from_iso_value", + keyword_name="value", + extra_message="Literal scalars also accepted.", + ) + dtype = value.dtype + elif dtype is None: + dtype = value.dtype + else: + dtype = lookup_dtype(dtype) if backend == "suitesparse" and not dtype._is_udt: # `Vector.ss.import_full` does not yet handle all cases with UDTs return cls.ss.import_full(value, dtype=dtype, size=size, is_iso=True, name=name) diff --git a/graphblas/tests/test_matrix.py b/graphblas/tests/test_matrix.py index 37e870b7d..e13c4d974 100644 --- a/graphblas/tests/test_matrix.py +++ b/graphblas/tests/test_matrix.py @@ -4109,12 +4109,27 @@ def test_to_from_edgelist(A): Matrix.from_edgelist([[0, 1, 10], [2, 3, 20]], values=0) -def test_to_dense_from_dense(): +def test_from_iso_value(): A = Matrix.from_iso_value(1, nrows=2, ncols=3) B = Matrix(int, nrows=2, ncols=3) B << 1 assert A.isequal(B, check_dtype=True) assert_array_equal(A.to_dense(dtype=float), [[1.0, 1, 1], [1, 1, 1]]) + A = Matrix.from_iso_value(Scalar.from_value(1), nrows=2, ncols=3) + assert A.isequal(B, check_dtype=True) + A = Matrix.from_iso_value(Scalar.from_value(1.0), 2, 3, int) + assert A.isequal(B, check_dtype=True) + with pytest.raises(TypeError, match="missing"): + Matrix.from_iso_value(1, nrows=2) + with pytest.raises(TypeError, match="Literal scalars also accepted"): + Matrix.from_iso_value(A, nrows=2, ncols=3) + A = Matrix.from_iso_value(1, dtype="INT64[2]", nrows=3, ncols=4) + B = Matrix("INT64[2]", nrows=3, ncols=4) + B << [1, 1] + assert A.isequal(B, check_dtype=True) + + +def test_to_dense_from_dense(): A = Matrix.from_dense(np.arange(6).reshape(2, 3)) B = Matrix.from_coo([0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2], np.arange(6)) assert A.isequal(B, check_dtype=True) @@ -4130,8 +4145,6 @@ def test_to_dense_from_dense(): B.resize(3, 4) assert A.isequal(B, check_dtype=True) assert_array_equal(A.to_dense(10), [[0, 1, 2, 10], [3, 4, 5, 10], [10, 10, 10, 10]]) - with pytest.raises(TypeError, match="missing"): - Matrix.from_iso_value(1, nrows=2) with pytest.raises(ValueError, match="is required to create a dense"): Matrix.from_dense([1, 2, 3]) with pytest.raises(TypeError, match="fill_value must be given"): @@ -4144,10 +4157,6 @@ def test_to_dense_from_dense(): Matrix.from_dense(np.arange(6).reshape(2, 3), dtype="INT64[2]") with pytest.raises(TypeError, match="from_iso_value"): Matrix.from_dense(1) - A = Matrix.from_iso_value(1, dtype="INT64[2]", nrows=3, ncols=4) - B = Matrix("INT64[2]", nrows=3, ncols=4) - B << [1, 1] - assert A.isequal(B, check_dtype=True) @pytest.mark.skipif("not suitesparse") diff --git a/graphblas/tests/test_vector.py b/graphblas/tests/test_vector.py index 9280e4612..4bb78923c 100644 --- a/graphblas/tests/test_vector.py +++ b/graphblas/tests/test_vector.py @@ -2490,11 +2490,25 @@ def test_from_pairs(): Vector.from_pairs([[1, 2, 3], [4, 5, 6]]) -def test_to_dense_from_dense(): +def test_from_iso_value(): v = Vector.from_iso_value(1, size=3) w = Vector.from_coo([0, 1, 2], 1) assert v.isequal(w, check_dtype=True) assert_array_equal(v.to_dense(), [1, 1, 1]) + v = Vector.from_iso_value(Scalar.from_value(1), size=3) + assert v.isequal(w, check_dtype=True) + v = Vector.from_iso_value(Scalar.from_value(1.0), 3, int) + with pytest.raises(TypeError, match="missing"): + Vector.from_iso_value(1) + with pytest.raises(TypeError, match="Literal scalars also accepted"): + Vector.from_iso_value(v, size=2) + v = Vector.from_iso_value(1, dtype="INT64[2]", size=3) + w = Vector("INT64[2]", size=3) + w << [1, 1] + assert v.isequal(w, check_dtype=True) + + +def test_to_dense_from_dense(): v = Vector.from_dense([1, 2, 3]) w = Vector.from_coo([0, 1, 2], [1, 2, 3]) assert v.isequal(w, check_dtype=True) @@ -2514,18 +2528,12 @@ def test_to_dense_from_dense(): w = Vector.from_coo([0, 1], [1, 2], size=2) assert v.isequal(w, check_dtype=True) assert_array_equal(v.to_dense(dtype=float), [1.0, 2]) - with pytest.raises(TypeError, match="missing"): - Vector.from_iso_value(1) with pytest.raises(ValueError, match="must be 1d"): Vector.from_dense(np.arange(6).reshape(2, 3), dtype=int) with pytest.raises(ValueError, match=">1d array"): Vector.from_dense(np.arange(6), dtype="INT64[2]") with pytest.raises(TypeError, match="from_iso_value"): Vector.from_dense(1) - v = Vector.from_iso_value(1, dtype="INT64[2]", size=3) - w = Vector("INT64[2]", size=3) - w << [1, 1] - assert v.isequal(w, check_dtype=True) @pytest.mark.skipif("not suitesparse") From 93d533a607e9e80d7ad4caf495005c1abb1f23b5 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sun, 12 Feb 2023 12:07:44 -0600 Subject: [PATCH 11/14] bump ruff --- .pre-commit-config.yaml | 2 +- pyproject.toml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f6ec054ab..3f45482d5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -71,7 +71,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.244 + rev: v0.0.245 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/pyproject.toml b/pyproject.toml index a3f7e57ce..f19afbe7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -197,6 +197,7 @@ select = [ # "INP", # flake8-no-pep420 "PIE", # flake8-pie "T20", # flake8-print + # "PYI", # flake8-pyi "PT", # flake8-pytest-style "Q", # flake8-quotes # "RET", # flake8-return @@ -253,6 +254,7 @@ ignore = [ "PT003", # `scope='function'` is implied in `@pytest.fixture()` (Note: no harm in being explicit) "PT023", # Use `@pytest.mark.slow()` over `@pytest.mark.slow` (Note: why?) "S110", # `try`-`except`-`pass` detected, consider logging the exception (Note: good advice, but we don't log) + "S112", # `try`-`except`-`continue` detected, consider logging the exception (Note: good advice, but we don't log) "SIM102", # Use a single `if` statement instead of nested `if` statements (Note: often necessary) "SIM105", # Use contextlib.suppress(...) instead of try-except-pass (Note: try-except-pass is much faster) "SIM108", # Use ternary operator ... instead of if-else-block (Note: if-else better for coverage and sometimes clearer) From cefc282200e4621bfb684e275275864e94f2009e Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 15 Feb 2023 12:27:29 -0600 Subject: [PATCH 12/14] Rename `from_iso_value` to `from_scalar` --- .pre-commit-config.yaml | 2 +- graphblas/core/matrix.py | 12 ++++++++---- graphblas/core/vector.py | 12 ++++++++---- graphblas/tests/test_matrix.py | 20 ++++++++++---------- graphblas/tests/test_vector.py | 20 ++++++++++---------- pyproject.toml | 2 ++ 6 files changed, 39 insertions(+), 29 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3f45482d5..0ef7cdea4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -71,7 +71,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.245 + rev: v0.0.246 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index a6a7cbf36..4262b92a3 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -1329,9 +1329,13 @@ def from_dcsc( return cls.from_coo(row_indices, cols, values, dtype, nrows=nrows, ncols=ncols, name=name) @classmethod - def from_iso_value(cls, value, nrows, ncols, dtype=None, *, name=None, **opts): + def from_scalar(cls, value, nrows, ncols, dtype=None, *, name=None, **opts): """Create a fully dense Matrix filled with a scalar value. + For SuiteSparse:GraphBLAS backend, this creates an iso-valued full Matrix + that stores a single value regardless of the shape of the Matrix, so large + matrices created by ``Matrix.from_scalar`` will use very low memory. + Parameters ---------- value : scalar @@ -1363,7 +1367,7 @@ def from_iso_value(cls, value, nrows, ncols, dtype=None, *, name=None, **opts): value = cls()._expect_type( value, Scalar, - within="from_iso_value", + within="from_scalar", keyword_name="value", extra_message="Literal scalars also accepted.", ) @@ -1402,7 +1406,7 @@ def from_dense(cls, values, missing_value=None, *, dtype=None, name=None, **opts -------- from_coo from_edgelist - from_iso_value + from_scalar to_dense Returns @@ -1413,7 +1417,7 @@ def from_dense(cls, values, missing_value=None, *, dtype=None, name=None, **opts if values.ndim == 0: raise TypeError( "values must be an array or list, not a scalar. " - "To create a dense Matrix from a scalar, use `Matrix.from_iso_value`." + "To create a dense Matrix from a scalar, use `Matrix.from_scalar`." ) if values.ndim == 1: raise ValueError("A 2d array or scalar is required to create a dense Matrix") diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index 14a839452..145bda76d 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -789,9 +789,13 @@ def from_pairs(cls, pairs, dtype=None, *, size=None, dup_op=None, name=None): return cls.from_coo(indices, values, dtype, size=size, dup_op=dup_op, name=name) @classmethod - def from_iso_value(cls, value, size, dtype=None, *, name=None, **opts): + def from_scalar(cls, value, size, dtype=None, *, name=None, **opts): """Create a fully dense Vector filled with a scalar value. + For SuiteSparse:GraphBLAS backend, this creates an iso-valued full Vector + that stores a single value regardless of the size of the Vector, so large + vectors created by ``Vector.from_scalar`` will use very low memory. + Parameters ---------- value : scalar @@ -824,7 +828,7 @@ def from_iso_value(cls, value, size, dtype=None, *, name=None, **opts): value = cls()._expect_type( value, Scalar, - within="from_iso_value", + within="from_scalar", keyword_name="value", extra_message="Literal scalars also accepted.", ) @@ -861,8 +865,8 @@ def from_dense(cls, values, missing_value=None, *, dtype=None, name=None, **opts -------- from_coo from_dict - from_iso_value from_pairs + from_scalar to_dense Returns @@ -873,7 +877,7 @@ def from_dense(cls, values, missing_value=None, *, dtype=None, name=None, **opts if values.ndim == 0: raise TypeError( "values must be an array or list, not a scalar. " - "To create a dense Vector from a scalar, use `Vector.from_iso_value`." + "To create a dense Vector from a scalar, use `Vector.from_scalar`." ) if values.ndim == 1 and dtype.np_type.subdtype is not None: raise ValueError("A >1d array is required to create a dense Vector with subdtype") diff --git a/graphblas/tests/test_matrix.py b/graphblas/tests/test_matrix.py index e13c4d974..92d3fad13 100644 --- a/graphblas/tests/test_matrix.py +++ b/graphblas/tests/test_matrix.py @@ -2897,8 +2897,8 @@ def test_expr_is_like_matrix(A): "from_dcsr", "from_dense", "from_dicts", - "from_iso_value", "from_edgelist", + "from_scalar", "from_values", "resize", "update", @@ -2962,8 +2962,8 @@ def test_index_expr_is_like_matrix(A): "from_dense", "from_dicts", "from_edgelist", - "from_iso_value", "from_values", + "from_scalar", "resize", } assert attrs - expr_attrs == expected, ( @@ -4109,21 +4109,21 @@ def test_to_from_edgelist(A): Matrix.from_edgelist([[0, 1, 10], [2, 3, 20]], values=0) -def test_from_iso_value(): - A = Matrix.from_iso_value(1, nrows=2, ncols=3) +def test_from_scalar(): + A = Matrix.from_scalar(1, nrows=2, ncols=3) B = Matrix(int, nrows=2, ncols=3) B << 1 assert A.isequal(B, check_dtype=True) assert_array_equal(A.to_dense(dtype=float), [[1.0, 1, 1], [1, 1, 1]]) - A = Matrix.from_iso_value(Scalar.from_value(1), nrows=2, ncols=3) + A = Matrix.from_scalar(Scalar.from_value(1), nrows=2, ncols=3) assert A.isequal(B, check_dtype=True) - A = Matrix.from_iso_value(Scalar.from_value(1.0), 2, 3, int) + A = Matrix.from_scalar(Scalar.from_value(1.0), 2, 3, int) assert A.isequal(B, check_dtype=True) with pytest.raises(TypeError, match="missing"): - Matrix.from_iso_value(1, nrows=2) + Matrix.from_scalar(1, nrows=2) with pytest.raises(TypeError, match="Literal scalars also accepted"): - Matrix.from_iso_value(A, nrows=2, ncols=3) - A = Matrix.from_iso_value(1, dtype="INT64[2]", nrows=3, ncols=4) + Matrix.from_scalar(A, nrows=2, ncols=3) + A = Matrix.from_scalar(1, dtype="INT64[2]", nrows=3, ncols=4) B = Matrix("INT64[2]", nrows=3, ncols=4) B << [1, 1] assert A.isequal(B, check_dtype=True) @@ -4155,7 +4155,7 @@ def test_to_dense_from_dense(): Matrix.from_dense(np.arange(24).reshape(2, 3, 4), dtype=int) with pytest.raises(ValueError, match=">2d array"): Matrix.from_dense(np.arange(6).reshape(2, 3), dtype="INT64[2]") - with pytest.raises(TypeError, match="from_iso_value"): + with pytest.raises(TypeError, match="from_scalar"): Matrix.from_dense(1) diff --git a/graphblas/tests/test_vector.py b/graphblas/tests/test_vector.py index 4bb78923c..f373e39a9 100644 --- a/graphblas/tests/test_vector.py +++ b/graphblas/tests/test_vector.py @@ -1618,8 +1618,8 @@ def test_expr_is_like_vector(v): "from_coo", "from_dense", "from_dict", - "from_iso_value", "from_pairs", + "from_scalar", "from_values", "resize", "update", @@ -1667,8 +1667,8 @@ def test_index_expr_is_like_vector(v): "from_coo", "from_dense", "from_dict", - "from_iso_value", "from_pairs", + "from_scalar", "from_values", "resize", } @@ -2490,19 +2490,19 @@ def test_from_pairs(): Vector.from_pairs([[1, 2, 3], [4, 5, 6]]) -def test_from_iso_value(): - v = Vector.from_iso_value(1, size=3) +def test_from_scalar(): + v = Vector.from_scalar(1, size=3) w = Vector.from_coo([0, 1, 2], 1) assert v.isequal(w, check_dtype=True) assert_array_equal(v.to_dense(), [1, 1, 1]) - v = Vector.from_iso_value(Scalar.from_value(1), size=3) + v = Vector.from_scalar(Scalar.from_value(1), size=3) assert v.isequal(w, check_dtype=True) - v = Vector.from_iso_value(Scalar.from_value(1.0), 3, int) + v = Vector.from_scalar(Scalar.from_value(1.0), 3, int) with pytest.raises(TypeError, match="missing"): - Vector.from_iso_value(1) + Vector.from_scalar(1) with pytest.raises(TypeError, match="Literal scalars also accepted"): - Vector.from_iso_value(v, size=2) - v = Vector.from_iso_value(1, dtype="INT64[2]", size=3) + Vector.from_scalar(v, size=2) + v = Vector.from_scalar(1, dtype="INT64[2]", size=3) w = Vector("INT64[2]", size=3) w << [1, 1] assert v.isequal(w, check_dtype=True) @@ -2532,7 +2532,7 @@ def test_to_dense_from_dense(): Vector.from_dense(np.arange(6).reshape(2, 3), dtype=int) with pytest.raises(ValueError, match=">1d array"): Vector.from_dense(np.arange(6), dtype="INT64[2]") - with pytest.raises(TypeError, match="from_iso_value"): + with pytest.raises(TypeError, match="from_scalar"): Vector.from_dense(1) diff --git a/pyproject.toml b/pyproject.toml index f19afbe7c..1a672ce22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -189,6 +189,7 @@ select = [ "C4", # flake8-comprehensions "DTZ", # flake8-datetimez "T10", # flake8-debugger + # "DJ", # flake8-django # "EM", # flake8-errmsg "EXE", # flake8-executable "ISC", # flake8-implicit-str-concat @@ -217,6 +218,7 @@ select = [ "TRY", # tryceratops # "RSE", # flake8-raise # "SLF", # flake8-self + # "NPY", # NumPy-specific rules "RUF", # ruff-specific rules ] external = [ From 06ee5037882d60c93398cbe39cf4ac98e6029156 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 15 Feb 2023 13:12:52 -0600 Subject: [PATCH 13/14] Update `flake8-bugbear` and make improvements --- .pre-commit-config.yaml | 2 +- graphblas/agg/__init__.py | 1 + graphblas/binary/__init__.py | 1 + graphblas/core/matrix.py | 2 ++ graphblas/core/ss/matrix.py | 6 ++++++ graphblas/core/vector.py | 2 ++ graphblas/dtypes.py | 3 ++- graphblas/op/__init__.py | 1 + graphblas/semiring/__init__.py | 1 + graphblas/unary/__init__.py | 1 + scripts/check_versions.sh | 2 +- 11 files changed, 19 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0ef7cdea4..cc780486f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -56,7 +56,7 @@ repos: # These versions need updated manually - flake8==6.0.0 - flake8-comprehensions==3.10.1 - - flake8-bugbear==23.1.20 + - flake8-bugbear==23.2.13 - flake8-simplify==0.19.3 - repo: https://github.com/asottile/yesqa rev: v1.4.0 diff --git a/graphblas/agg/__init__.py b/graphblas/agg/__init__.py index 1447dd932..f2dddb851 100644 --- a/graphblas/agg/__init__.py +++ b/graphblas/agg/__init__.py @@ -91,6 +91,7 @@ def __getattr__(key): f"`{key}` is specific to SuiteSparse:GraphBLAS. " f"`gb.agg.{key}` will be removed in version 2023.9.0 or later.", DeprecationWarning, + stacklevel=2, ) rv = _deprecated[key] globals()[key] = rv diff --git a/graphblas/binary/__init__.py b/graphblas/binary/__init__.py index 089869eb7..e59c0405e 100644 --- a/graphblas/binary/__init__.py +++ b/graphblas/binary/__init__.py @@ -24,6 +24,7 @@ def __getattr__(key): f"`{key}` is specific to SuiteSparse:GraphBLAS. " f"`gb.binary.{key}` will be removed in version 2023.9.0 or later.", DeprecationWarning, + stacklevel=2, ) rv = _deprecated[key] globals()[key] = rv diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index 4262b92a3..70937ec78 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -473,6 +473,7 @@ def to_values(self, dtype=None, *, rows=True, columns=True, values=True, sort=Tr warnings.warn( "`Matrix.to_values(...)` is deprecated; please use `Matrix.to_coo(...)` instead.", DeprecationWarning, + stacklevel=2, ) return self.to_coo(dtype, rows=rows, columns=columns, values=values, sort=sort) @@ -810,6 +811,7 @@ def from_values( warnings.warn( "`Matrix.from_values(...)` is deprecated; please use `Matrix.from_coo(...)` instead.", DeprecationWarning, + stacklevel=2, ) return cls.from_coo( rows, columns, values, dtype, nrows=nrows, ncols=ncols, dup_op=dup_op, name=name diff --git a/graphblas/core/ss/matrix.py b/graphblas/core/ss/matrix.py index 7550f9a3b..b455d760e 100644 --- a/graphblas/core/ss/matrix.py +++ b/graphblas/core/ss/matrix.py @@ -3733,6 +3733,7 @@ def scan_columnwise(self, op=monoid.plus, *, name=None, **opts): "`Matrix.ss.scan_columnwise` is deprecated; " 'please use `Matrix.ss.scan(order="columnwise")` instead.', DeprecationWarning, + stacklevel=2, ) return prefix_scan(self._parent.T, op, name=name, within="scan_columnwise", **opts) @@ -3754,6 +3755,7 @@ def scan_rowwise(self, op=monoid.plus, *, name=None, **opts): warnings.warn( "`Matrix.ss.scan_rowwise` is deprecated; please use `Matrix.ss.scan` instead.", DeprecationWarning, + stacklevel=2, ) return prefix_scan(self._parent, op, name=name, within="scan_rowwise", **opts) @@ -3920,6 +3922,7 @@ def selectk_rowwise(self, how, k, *, name=None): # pragma: no cover (deprecated warnings.warn( "`Matrix.ss.selectk_rowwise` is deprecated; please use `Matrix.ss.selectk` instead.", DeprecationWarning, + stacklevel=2, ) how = how.lower() fmt = "hypercsr" @@ -3966,6 +3969,7 @@ def selectk_columnwise(self, how, k, *, name=None): # pragma: no cover (depreca "`Matrix.ss.selectk_columnwise` is deprecated; " 'please use `Matrix.ss.selectk(order="columnwise")` instead.', DeprecationWarning, + stacklevel=2, ) how = how.lower() fmt = "hypercsc" @@ -4090,6 +4094,7 @@ def compactify_rowwise( "`Matrix.ss.compactify_rowwise` is deprecated; " "please use `Matrix.ss.compactify` instead.", DeprecationWarning, + stacklevel=2, ) return self._compactify( how, reverse, asindex, "ncols", ncols, "hypercsr", "col_indices", name @@ -4132,6 +4137,7 @@ def compactify_columnwise( "`Matrix.ss.compactify_columnwise` is deprecated; " 'please use `Matrix.ss.compactify(order="columnwise")` instead.', DeprecationWarning, + stacklevel=2, ) return self._compactify( how, reverse, asindex, "nrows", nrows, "hypercsc", "row_indices", name diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index 145bda76d..7664a5901 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -434,6 +434,7 @@ def to_values(self, dtype=None, *, indices=True, values=True, sort=True): warnings.warn( "`Vector.to_values(...)` is deprecated; please use `Vector.to_coo(...)` instead.", DeprecationWarning, + stacklevel=2, ) return self.to_coo(dtype, indices=indices, values=values, sort=sort) @@ -680,6 +681,7 @@ def from_values(cls, indices, values, dtype=None, *, size=None, dup_op=None, nam warnings.warn( "`Vector.from_values(...)` is deprecated; please use `Vector.from_coo(...)` instead.", DeprecationWarning, + stacklevel=2, ) return cls.from_coo(indices, values, dtype, size=size, dup_op=dup_op, name=name) diff --git a/graphblas/dtypes.py b/graphblas/dtypes.py index e864a412f..2f8b40e43 100644 --- a/graphblas/dtypes.py +++ b/graphblas/dtypes.py @@ -130,7 +130,8 @@ def register_anonymous(dtype, name=None): np_repr = np_repr[: _lib.GxB_MAX_NAME_LEN] _warnings.warn( f"{msg}. It will use the following name, " - f"and the dtype may need to be specified when deserializing: {np_repr}" + f"and the dtype may need to be specified when deserializing: {np_repr}", + stacklevel=2, ) status = _lib.GxB_Type_new(gb_obj, dtype.itemsize, np_repr, _NULL) else: diff --git a/graphblas/op/__init__.py b/graphblas/op/__init__.py index b02b5228c..af05cbef4 100644 --- a/graphblas/op/__init__.py +++ b/graphblas/op/__init__.py @@ -17,6 +17,7 @@ def __getattr__(key): f"`{key}` is specific to SuiteSparse:GraphBLAS. " f"`gb.op.{key}` will be removed in version 2023.9.0 or later.", DeprecationWarning, + stacklevel=2, ) rv = _deprecated[key] globals()[key] = rv diff --git a/graphblas/semiring/__init__.py b/graphblas/semiring/__init__.py index be181d405..904ae192f 100644 --- a/graphblas/semiring/__init__.py +++ b/graphblas/semiring/__init__.py @@ -17,6 +17,7 @@ def __getattr__(key): f"`{key}` is specific to SuiteSparse:GraphBLAS. " f"`gb.semiring.{key}` will be removed in version 2023.9.0 or later.", DeprecationWarning, + stacklevel=2, ) rv = _deprecated[key] globals()[key] = rv diff --git a/graphblas/unary/__init__.py b/graphblas/unary/__init__.py index 92e508df3..a503b5141 100644 --- a/graphblas/unary/__init__.py +++ b/graphblas/unary/__init__.py @@ -17,6 +17,7 @@ def __getattr__(key): f"`{key}` is specific to SuiteSparse:GraphBLAS. " f"`gb.unary.{key}` will be removed in version 2023.9.0 or later.", DeprecationWarning, + stacklevel=2, ) rv = _deprecated[key] globals()[key] = rv diff --git a/scripts/check_versions.sh b/scripts/check_versions.sh index b997d4d77..14f39f18c 100755 --- a/scripts/check_versions.sh +++ b/scripts/check_versions.sh @@ -11,5 +11,5 @@ conda search 'awkward[channel=conda-forge]>=2.0.7' conda search 'numba[channel=conda-forge]>=0.56.4' conda search 'pyyaml[channel=conda-forge]>=6.0' conda search 'flake8-comprehensions[channel=conda-forge]>=3.10.1' -conda search 'flake8-bugbear[channel=conda-forge]>=23.1.20' +conda search 'flake8-bugbear[channel=conda-forge]>=23.2.13' conda search 'flake8-simplify[channel=conda-forge]>=0.19.3' From b4a80a4520ba0c86247bdd54eedd0eaf713c663f Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Fri, 17 Feb 2023 05:07:08 +0100 Subject: [PATCH 14/14] Add comment for how to create iso-valued objects with structure and scalar A more flexible way with any mask is e.g.: ```python w = Vector(v.dtype, size=v.size) w(~v.S) << value ``` --- .pre-commit-config.yaml | 2 +- graphblas/core/matrix.py | 3 +++ graphblas/core/vector.py | 3 +++ pyproject.toml | 2 +- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cc780486f..04a837493 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -71,7 +71,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.246 + rev: v0.0.247 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index 70937ec78..8b9b4b678 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -1338,6 +1338,9 @@ def from_scalar(cls, value, nrows, ncols, dtype=None, *, name=None, **opts): that stores a single value regardless of the shape of the Matrix, so large matrices created by ``Matrix.from_scalar`` will use very low memory. + If instead you want to create a new iso-valued Matrix with the same structure + as an existing Matrix, you may do: ``C = binary.second(A, value).new()``. + Parameters ---------- value : scalar diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index 7664a5901..dd183d856 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -798,6 +798,9 @@ def from_scalar(cls, value, size, dtype=None, *, name=None, **opts): that stores a single value regardless of the size of the Vector, so large vectors created by ``Vector.from_scalar`` will use very low memory. + If instead you want to create a new iso-valued Vector with the same structure + as an existing Vector, you may do: ``w = binary.second(v, value).new()``. + Parameters ---------- value : scalar diff --git a/pyproject.toml b/pyproject.toml index 1a672ce22..f0cef8e0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -218,7 +218,7 @@ select = [ "TRY", # tryceratops # "RSE", # flake8-raise # "SLF", # flake8-self - # "NPY", # NumPy-specific rules + "NPY", # NumPy-specific rules "RUF", # ruff-specific rules ] external = [