How to perform computations with NumPy#
Awkward Array’s integration with NumPy allows you to use NumPy’s array functions on data with complex structures, including ragged and heterogeneous arrays.
import awkward as ak
import numpy as np
Universal functions (ufuncs)#
NumPy’s universal functions (ufuncs) are functions that operate elementwise on arrays. They are broadcasting-aware, so they can naturally handle data structures like ragged arrays that are common in Awkward Arrays.
Here’s an example of applying np.sqrt, a NumPy ufunc, to an Awkward Array:
data = ak.Array([[1, 4, 9], [], [16, 25]])
np.sqrt(data)
[[1, 2, 3], [], [4, 5]] ----------- backend: cpu nbytes: 72 B type: 3 * var * float64
Notice that the ufunc applies to the numeric data, passing through all dimensions of nested lists, even if those lists have variable length. This also applies to heterogeneous data, in which the data are not all of the same type.
data = ak.Array([[1, 4, 9], [], 16, [[[25]]]])
np.sqrt(data)
[[1, 2, 3],
[],
4,
[[[5]]]]
-----------
backend: cpu
nbytes: 176 B
type: 4 * union[
var * union[
float64,
var * var * float6...Unary and binary operations on Awkward Arrays, such as +, -, >, and ==, are actually calling NumPy ufuncs. For instance, +:
array1 = ak.Array([[1, 2, 3], [], [4, 5]])
array2 = ak.Array([[10, 20, 30], [], [40, 50]])
array1 + array2
[[11, 22, 33], [], [44, 55]] -------------- backend: cpu nbytes: 72 B type: 3 * var * int64
is actually np.add:
np.add(array1, array2)
[[11, 22, 33], [], [44, 55]] -------------- backend: cpu nbytes: 72 B type: 3 * var * int64
Arrays with record fields#
Ufuncs can only be applied to numerical data in lists, not records.
records = ak.Array([{"x": 4, "y": 9}, {"x": 16, "y": 25}])
np.sqrt(records)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[7], line 1
----> 1 np.sqrt(records)
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/highlevel.py:1632, in Array.__array_ufunc__(self, ufunc, method, *inputs, **kwargs)
1567 """
1568 Intercepts attempts to pass this Array to a NumPy
1569 [universal functions](https://docs.scipy.org/doc/numpy/reference/ufuncs.html)
(...) 1629 See also #__array_function__.
1630 """
1631 name = f"{type(ufunc).__module__}.{ufunc.__name__}.{method!s}"
-> 1632 with ak._errors.OperationErrorContext(name, inputs, kwargs):
1633 return ak._connect.numpy.array_ufunc(ufunc, method, inputs, kwargs)
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_errors.py:80, in ErrorContext.__exit__(self, exception_type, exception_value, traceback)
78 self._slate.__dict__.clear()
79 # Handle caught exception
---> 80 raise self.decorate_exception(exception_type, exception_value)
81 else:
82 # Step out of the way so that another ErrorContext can become primary.
83 if self.primary() is self:
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/highlevel.py:1633, in Array.__array_ufunc__(self, ufunc, method, *inputs, **kwargs)
1631 name = f"{type(ufunc).__module__}.{ufunc.__name__}.{method!s}"
1632 with ak._errors.OperationErrorContext(name, inputs, kwargs):
-> 1633 return ak._connect.numpy.array_ufunc(ufunc, method, inputs, kwargs)
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_connect/numpy.py:485, in array_ufunc(ufunc, method, inputs, kwargs)
477 raise TypeError(
478 "no {}.{} overloads for custom types: {}".format(
479 type(ufunc).__module__, ufunc.__name__, ", ".join(error_message)
480 )
481 )
483 return None
--> 485 out = ak._broadcasting.broadcast_and_apply(
486 inputs,
487 action,
488 depth_context=depth_context,
489 lateral_context=lateral_context,
490 allow_records=False,
491 function_name=ufunc.__name__,
492 )
494 out_named_axis = functools.reduce(
495 _unify_named_axis, lateral_context[NAMED_AXIS_KEY].named_axis
496 )
497 if len(out) == 1:
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1223, in broadcast_and_apply(inputs, action, depth_context, lateral_context, allow_records, left_broadcast, right_broadcast, numpy_to_regular, regular_to_jagged, function_name, broadcast_parameters_rule)
1221 backend = backend_of(*inputs, coerce_to_common=False)
1222 isscalar = []
-> 1223 out = apply_step(
1224 backend,
1225 broadcast_pack(inputs, isscalar),
1226 action,
1227 0,
1228 depth_context,
1229 lateral_context,
1230 {
1231 "allow_records": allow_records,
1232 "left_broadcast": left_broadcast,
1233 "right_broadcast": right_broadcast,
1234 "numpy_to_regular": numpy_to_regular,
1235 "regular_to_jagged": regular_to_jagged,
1236 "function_name": function_name,
1237 "broadcast_parameters_rule": broadcast_parameters_rule,
1238 },
1239 )
1240 assert isinstance(out, tuple)
1241 return tuple(broadcast_unpack(x, isscalar) for x in out)
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1201, in apply_step(backend, inputs, action, depth, depth_context, lateral_context, options)
1199 return result
1200 elif result is None:
-> 1201 return continuation()
1202 else:
1203 raise AssertionError(result)
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1170, in apply_step.<locals>.continuation()
1168 # Any non-string list-types?
1169 elif any(x.is_list and not is_string_like(x) for x in contents):
-> 1170 return broadcast_any_list()
1172 # Any RecordArrays?
1173 elif any(x.is_record for x in contents):
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:663, in apply_step.<locals>.broadcast_any_list()
660 nextinputs.append(x)
661 nextparameters.append(NO_PARAMETERS)
--> 663 outcontent = apply_step(
664 backend,
665 nextinputs,
666 action,
667 depth + 1,
668 copy.copy(depth_context),
669 lateral_context,
670 options,
671 )
672 assert isinstance(outcontent, tuple)
673 parameters = parameters_factory(nextparameters, len(outcontent))
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1201, in apply_step(backend, inputs, action, depth, depth_context, lateral_context, options)
1199 return result
1200 elif result is None:
-> 1201 return continuation()
1202 else:
1203 raise AssertionError(result)
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1174, in apply_step.<locals>.continuation()
1172 # Any RecordArrays?
1173 elif any(x.is_record for x in contents):
-> 1174 return broadcast_any_record()
1176 else:
1177 raise ValueError(
1178 "cannot broadcast: {}{}".format(
1179 ", ".join(repr(type(x)) for x in inputs), in_function(options)
1180 )
1181 )
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:490, in apply_step.<locals>.broadcast_any_record()
488 def broadcast_any_record():
489 if not options["allow_records"]:
--> 490 raise ValueError(f"cannot broadcast records{in_function(options)}")
492 frozen_record_fields: frozenset[str] | None = UNSET
493 first_record = next(c for c in contents if c.is_record)
ValueError: cannot broadcast records in sqrt
This error occurred while calling
numpy.sqrt.__call__(
<Array [{x: 4, y: 9}, {x: 16, ...}] type='2 * {x: int64, y: int64}'>
)
However, you can pull each field out of a record and apply the ufunc to it.
np.sqrt(records.x)
[2, 4] --- backend: cpu nbytes: 16 B type: 2 * float64
np.sqrt(records.y)
[3, 5] --- backend: cpu nbytes: 16 B type: 2 * float64
If you want the result wrapped up in a new array of records, you can use ak.zip() to do that.
ak.zip({"x": np.sqrt(records.x), "y": np.sqrt(records.y)})
[{x: 2, y: 3},
{x: 4, y: 5}]
--------------
backend: cpu
nbytes: 32 B
type: 2 * {
x: float64,
y: float64
}Here’s an idiom that would apply a ufunc to every field individually, and then wrap up the result as a new record with the same fields (using ak.fields(), ak.unzip(), and ak.zip()):
ak.zip({key: np.sqrt(value) for key, value in zip(ak.fields(records), ak.unzip(records))})
[{x: 2, y: 3},
{x: 4, y: 5}]
--------------
backend: cpu
nbytes: 32 B
type: 2 * {
x: float64,
y: float64
}The reaons that Awkward Array does not do this automatically is to prevent mistakes: it’s common for records to represent coordinates of data points, and if the coordinates are not Cartesian, the one-to-one application is not correct.
Using non-NumPy ufuncs#
NumPy-compatible ufuncs exist in other libraries, like SciPy, and can be applied in the same way. Here’s how you can apply scipy.special.gamma and scipy.special.erf:
import scipy.special
data = ak.Array([[0.1, 0.2, 0.3], [], [0.4, 0.5]])
scipy.special.gamma(data)
[[9.51, 4.59, 2.99], [], [2.22, 1.77]] -------------------- backend: cpu nbytes: 72 B type: 3 * var * float64
scipy.special.erf(data)
[[0.112, 0.223, 0.329], [], [0.428, 0.52]] ----------------------- backend: cpu nbytes: 72 B type: 3 * var * float64
You can even create your own ufuncs using Numba’s @nb.vectorize:
import numba as nb
@nb.vectorize
def gcd_euclid(x, y):
# computation that is more complex than a formula
while y != 0:
x, y = y, x % y
return x
x = ak.Array([[10, 20, 30], [], [40, 50]])
y = ak.Array([[5, 40, 15], [], [24, 255]])
gcd_euclid(x, y)
[[5, 20, 15], [], [8, 5]] ------------- backend: cpu nbytes: 72 B type: 3 * var * int64
Since Numba has JIT-compiled this function, it would run much faster on large arrays than custom Python code.
Non-ufunc NumPy functions#
Some NumPy functions don’t satisfy the ufunc protocol, but have been implemented for Awkward Arrays because they are useful. You can tell when a NumPy function has an Awkward Array implementation when a function with the same name and signature exists in both libraries.
For instance, np.where works on Awkward Arrays because ak.where() exists:
np.where(y % 2 == 0, x, y)
[[5, 20, 15], [], [40, 255]] ------------- backend: cpu nbytes: 72 B type: 3 * var * int64
(The above selects elements from x when y is even and elements from y when y is odd.)
Similarly, np.concatenate works on Awkward Arrays because ak.concatenate() exists:
np.concatenate([x, y])
[[10, 20, 30], [], [40, 50], [5, 40, 15], [], [24, 255]] -------------- backend: cpu nbytes: 136 B type: 6 * var * int64
np.concatenate([x, y], axis=1)
[[10, 20, 30, 5, 40, 15], [], [40, 50, 24, 255]] ------------------------- backend: cpu nbytes: 112 B type: 3 * var * int64
Other NumPy functions, without an equivalent in the Awkward Array library, will work only if the Awkward Array can be converted into a NumPy array.
Ragged arrays can’t be converted to NumPy:
np.fft.fft(ak.Array([[1.1, 2.2, 3.3], [], [7.7, 8.8, 9.9]]))
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[21], line 1
----> 1 np.fft.fft(ak.Array([[1.1, 2.2, 3.3], [], [7.7, 8.8, 9.9]]))
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/highlevel.py:1649, in Array.__array_function__(self, func, types, args, kwargs)
1635 def __array_function__(self, func, types, args, kwargs):
1636 """
1637 Intercepts attempts to pass this Array to those NumPy functions other
1638 than universal functions that have an Awkward equivalent.
(...) 1647 See also #__array_ufunc__.
1648 """
-> 1649 return ak._connect.numpy.array_function(
1650 func, types, args, kwargs, behavior=self._behavior, attrs=self._attrs
1651 )
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_connect/numpy.py:110, in array_function(func, types, args, kwargs, behavior, attrs)
107 unique_backends = frozenset(_find_backends(all_arguments))
108 backend = common_backend(unique_backends)
--> 110 rectilinear_args = tuple(_to_rectilinear(x, backend) for x in args)
111 rectilinear_kwargs = {k: _to_rectilinear(v, backend) for k, v in kwargs.items()}
112 result = func(*rectilinear_args, **rectilinear_kwargs)
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_connect/numpy.py:110, in <genexpr>(.0)
107 unique_backends = frozenset(_find_backends(all_arguments))
108 backend = common_backend(unique_backends)
--> 110 rectilinear_args = tuple(_to_rectilinear(x, backend) for x in args)
111 rectilinear_kwargs = {k: _to_rectilinear(v, backend) for k, v in kwargs.items()}
112 result = func(*rectilinear_args, **rectilinear_kwargs)
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_connect/numpy.py:79, in _to_rectilinear(arg, backend)
70 # Otherwise, cast to layout and convert
71 else:
72 layout = ak.to_layout(
73 arg,
74 allow_record=False,
(...) 77 string_policy="error",
78 )
---> 79 return layout.to_backend(backend).to_backend_array(allow_missing=True)
80 elif isinstance(arg, tuple):
81 return tuple(_to_rectilinear(x, backend) for x in arg)
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/contents/content.py:1136, in Content.to_backend_array(self, allow_missing, backend)
1134 else:
1135 backend = regularize_backend(backend)
-> 1136 return self._to_backend_array(allow_missing, backend)
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/contents/listoffsetarray.py:2191, in ListOffsetArray._to_backend_array(self, allow_missing, backend)
2189 return buffer.view(np.dtype(("S", max_count)))
2190 else:
-> 2191 return self.to_RegularArray()._to_backend_array(allow_missing, backend)
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/contents/listoffsetarray.py:292, in ListOffsetArray.to_RegularArray(self)
287 _size = Index64.empty(1, self._backend.nplike)
288 assert (
289 _size.nplike is self._backend.nplike
290 and self._offsets.nplike is self._backend.nplike
291 )
--> 292 self._backend.maybe_kernel_error(
293 self._backend[
294 "awkward_ListOffsetArray_toRegularArray",
295 _size.dtype.type,
296 self._offsets.dtype.type,
297 ](
298 _size.data,
299 self._offsets.data,
300 self._offsets.length,
301 )
302 )
303 size = self._backend.nplike.index_as_shape_item(_size[0])
304 length = self._offsets.length - 1
File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_backends/backend.py:62, in Backend.maybe_kernel_error(self, error)
60 return
61 else:
---> 62 raise ValueError(self.format_kernel_error(error))
ValueError: cannot convert to RegularArray because subarray lengths are not regular (in compiled code: https://github.com/scikit-hep/awkward/blob/awkward-cpp-53/awkward-cpp/src/cpu-kernels/awkward_ListOffsetArray_toRegularArray.cpp#L22)
But arrays with equal-sized lists can:
np.fft.fft(ak.Array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]]))
[[6.6+0j, -1.65+0.953j, -1.65+-0.953j], [16.5+0j, -1.65+0.953j, -1.65+-0.953j], [26.4+0j, -1.65+0.953j, -1.65+-0.953j]] ---------------------------------------- backend: cpu nbytes: 144 B type: 3 * 3 * complex128