diff --git a/doc/_templates/autosummary/cython_class.rst b/doc/_templates/autosummary/cython_class.rst new file mode 100644 index 000000000000..d39cceca002e --- /dev/null +++ b/doc/_templates/autosummary/cython_class.rst @@ -0,0 +1,29 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ name }} + {% block methods %} + + {% if methods %} + .. rubric:: {{ _('Methods') }} + + .. autosummary:: + :toctree: generated + {% for item in methods if item != "__init__" %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block attributes %} + {% if attributes %} + .. rubric:: {{ _('Attributes') }} + + .. autosummary:: + :toctree: generated + {% for item in attributes %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} diff --git a/doc/_templates/autosummary/elementwise.rst b/doc/_templates/autosummary/elementwise.rst new file mode 100644 index 000000000000..47d3dc641598 --- /dev/null +++ b/doc/_templates/autosummary/elementwise.rst @@ -0,0 +1,12 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +{% if objtype == "data" %} +.. auto{{ objtype }}:: {{ objname }} + :no-value: +{% endif %} + +{% if objtype == "function" %} +.. auto{{ objtype }}:: {{ objname }} +{% endif %} diff --git a/doc/_templates/autosummary/usm_ndarray.rst b/doc/_templates/autosummary/usm_ndarray.rst new file mode 100644 index 000000000000..81026c7cd72f --- /dev/null +++ b/doc/_templates/autosummary/usm_ndarray.rst @@ -0,0 +1,45 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + + + +.. autoclass:: {{ name }} + + {% block methods %} + + {% if methods %} + .. rubric:: {{ _('Methods') }} + + .. autosummary:: + :toctree: generated + {% for item in methods if item != "__init__" %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block attributes %} + {% if attributes %} + .. rubric:: {{ _('Attributes') }} + + .. autosummary:: + :toctree: generated + {% for item in attributes %} + ~{{ name }}.{{ item }} + {%- endfor %} + + .. rubric:: {{ _('Special attributes') }} + + .. autosummary:: + :toctree: generated + + ~{{name}}.__dlpack_device__ + ~{{name}}.__dlpack__ + ~{{name}}.__sycl_usm_array_interface__ + ~{{name}}._pointer + ~{{name}}._element_offset + ~{{name}}._byte_bounds + + {% endif %} + {% endblock %} diff --git a/doc/index.rst b/doc/index.rst index 38c12489636b..beedd7c78aaf 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -12,6 +12,7 @@ Data Parallel Extension for NumPy* overview quick_start_guide + user_guides/index reference/index .. toctree:: diff --git a/doc/reference/index.rst b/doc/reference/index.rst index c6525004f71d..84c17bb8b930 100644 --- a/doc/reference/index.rst +++ b/doc/reference/index.rst @@ -13,6 +13,7 @@ API reference of the Data Parallel Extension for NumPy* .. toctree:: :maxdepth: 2 + tensor ndarray ufunc routines diff --git a/doc/reference/tensor.accumulation_functions.rst b/doc/reference/tensor.accumulation_functions.rst new file mode 100644 index 000000000000..de0da7c01898 --- /dev/null +++ b/doc/reference/tensor.accumulation_functions.rst @@ -0,0 +1,15 @@ +.. _dpnp_tensor_accumulation_functions: + +Accumulation functions +====================== + +Accumulation functions compute cumulative results along a given axis of the input array. + +.. currentmodule:: dpnp.tensor + +.. autosummary:: + :toctree: generated + + cumulative_logsumexp + cumulative_prod + cumulative_sum diff --git a/doc/reference/tensor.constants.rst b/doc/reference/tensor.constants.rst new file mode 100644 index 000000000000..6ce5b1155e7c --- /dev/null +++ b/doc/reference/tensor.constants.rst @@ -0,0 +1,35 @@ +.. _dpnp_tensor_constants: + +Constants +========= + +The following constants are defined in :py:mod:`dpnp.tensor`: + +.. currentmodule:: dpnp.tensor + +.. autodata:: DLDeviceType + +.. data:: e + + ``float``: + IEEE 754 floating-point representation of Euler's constant. + +.. data:: inf + + ``float``: + IEEE 754 floating-point representation of (positive) infinity. + +.. data:: nan + + ``float``: + IEEE 754 floating-point representation of Not a Number (NaN). + +.. data:: newaxis + + ``NoneType``: + Alias for ``None`` which is useful for indexing. + +.. data:: pi + + ``float``: + IEEE 754 floating-point representation of the mathematical constant π. diff --git a/doc/reference/tensor.creation_functions.rst b/doc/reference/tensor.creation_functions.rst new file mode 100644 index 000000000000..670c7625843d --- /dev/null +++ b/doc/reference/tensor.creation_functions.rst @@ -0,0 +1,31 @@ +.. _dpnp_tensor_creation_functions: + +Array creation functions +======================== + +The following functions in :py:mod:`dpnp.tensor` can be used +to create new arrays: + +.. currentmodule:: dpnp.tensor + +.. autosummary:: + :toctree: generated + + arange + asarray + empty + empty_like + eye + from_dlpack + full + full_like + linspace + meshgrid + ones + ones_like + tril + triu + zeros + zeros_like + from_numpy + copy diff --git a/doc/reference/tensor.data_type_functions.rst b/doc/reference/tensor.data_type_functions.rst new file mode 100644 index 000000000000..6adca69081b2 --- /dev/null +++ b/doc/reference/tensor.data_type_functions.rst @@ -0,0 +1,21 @@ +.. _dpnp_tensor_data_type_functions: + +Data type functions +=================== + +The package :py:mod:`dpnp.tensor` contains the following data type functions conforming +to `Python Array API specification `_: + +.. _array_api_data_type_fns: https://data-apis.org/array-api/latest/API_specification/data_type_functions.html + +.. currentmodule:: dpnp.tensor + +.. autosummary:: + :toctree: generated + + astype + can_cast + finfo + iinfo + isdtype + result_type diff --git a/doc/reference/tensor.data_types.rst b/doc/reference/tensor.data_types.rst new file mode 100644 index 000000000000..52230a9f1e89 --- /dev/null +++ b/doc/reference/tensor.data_types.rst @@ -0,0 +1,127 @@ +.. _dpnp_tensor_data_types: + +.. currentmodule:: dpnp.tensor + +Data types +========== + +:py:mod:`dpnp.tensor` supports the following data types: + ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Data Type | Description | ++================+=========================================================================================================================================================================================+ +| ``bool`` | Boolean (``True`` or ``False``) | ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``int8`` | An 8-bit signed integer type capable of representing :math:`v` subject to :math:`-2^7 \le v < 2^7` | ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``int16`` | A 16-bit signed integer type capable of representing :math:`v` subject to :math:`-2^{15} \le v < 2^{15}` | ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``int32`` | A 32-bit signed integer type capable of representing :math:`v` subject to :math:`-2^{31} \le v < 2^{31}` | ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``int64`` | A 64-bit signed integer type capable of representing :math:`v` subject to :math:`-2^{63} \le v < 2^{63}` | ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``uint8`` | An 8-bit unsigned integer type capable of representing :math:`v` subject to :math:`0 \le v < 2^8` | ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``uint16`` | A 16-bit unsigned integer type capable of representing :math:`v` subject to :math:`0 \le v < 2^{16}` | ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``uint32`` | A 32-bit unsigned integer type capable of representing :math:`v` subject to :math:`0 \le v < 2^{32}` | ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``uint64`` | A 64-bit unsigned integer type capable of representing :math:`v` subject to :math:`0 \le v < 2^{64}` | ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``float16`` | An IEEE-754 half-precision (16-bit) binary floating-point number (see `IEEE 754-2019`_) | ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``float32`` | An IEEE-754 single-precision (32-bit) binary floating-point number (see `IEEE 754-2019`_) | ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``float64`` | An IEEE-754 double-precision (64-bit) binary floating-point number (see `IEEE 754-2019`_) | ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``complex64`` | Single-precision (64-bit) complex floating-point number whose real and imaginary components are IEEE 754 single-precision (32-bit) binary floating-point numbers (see `IEEE 754-2019`_) | ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``complex128`` | Double-precision (128-bit) complex floating-point number whose real and imaginary components are IEEE 754 double-precision (64-bit) binary floating-point numbers (see `IEEE 754-2019`_)| ++----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +.. _IEEE 754-2019: https://doi.org/10.1109%2FIEEESTD.2019.8766229 + +Data type support by array object :py:class:`usm_ndarray` depends on capabilities of :class:`dpctl.SyclDevice` where array is allocated. + +Half-precision floating-point type ``float16`` is supported only for devices whose attribute :attr:`dpctl.SyclDevice.has_aspect_fp16` evaluates to ``True``. + +Double-precision floating-point type ``float64`` and double-precision complex floating-point type ``complex128`` are supported only for devices whose attribute :attr:`dpctl.SyclDevice.has_aspect_fp64` +evaluates to ``True``. + +If prerequisites are not met, requests to create an instance of an array object for these types will raise an exception. + +Data type objects are instances of :py:class:`dtype` object, and support equality comparison by implementing +special method :meth:`__eq__`. + +.. py:class:: dtype + + Same as :py:class:`numpy.dtype` + + .. py:method:: __eq__ + + Check if data-type instances are equal. + + +Default integral data type +-------------------------- + +The default integral data type is :attr:`int64` for all supported devices. + +Default indexing data type +-------------------------- + +The default indexing data type is :attr:`int64` for all supported devices. + +Default real floating-point data type +------------------------------------- + +The default real floating-point type depends on the capabilities of device where array is allocated. +If the device support double precision floating-point types, the default real floating-point type +is :attr:`float64`, otherwise :attr:`float32`. + +Make sure to select an appropriately capable device for an application that requires use of double +precision floating-point type. + +Default complex floating-point data type +---------------------------------------- + +Like for the default real floating-point type, the default complex floating-point type depends on +capabilities of device. If the device support double precision real floating-point types, the default +complex floating-point type is :attr:`complex128`, otherwise :attr:`complex64`. + + +Querying default data types programmatically +-------------------------------------------- + +The data type can be discovered programmatically using Array API :ref:`inspection functions `: + +.. code-block:: python + + import dpctl + from dpnp import tensor + + device = dpctl.select_default_device() + # get default data types for default-selected device + default_types = tensor.__array_namespace_info__().default_dtypes(device) + int_dt = default_types["integral"] + ind_dt = default_types["indexing"] + rfp_dt = default_types["real floating"] + cfp_dt = default_types["complex floating"] + + +Type promotion rules +-------------------- + +Type promotion rules govern the behavior of an array library when a function does not have +a dedicated implementation for the data type(s) of the input array(s). + +In such a case, input arrays may be cast to data types for which a dedicated implementation +exists. For example, when :data:`sin` is applied to array of integral values. + +Type promotion rules used in :py:mod:`dpnp.tensor` are consistent with the +Python Array API specification's `type promotion rules `_ +for devices that support double precision floating-point type. + + +For devices that do not support double precision floating-point type, the type promotion rule is +truncated by removing nodes corresponding to unsupported data types and edges that lead to them. diff --git a/doc/reference/tensor.elementwise_functions.rst b/doc/reference/tensor.elementwise_functions.rst new file mode 100644 index 000000000000..291f82abba15 --- /dev/null +++ b/doc/reference/tensor.elementwise_functions.rst @@ -0,0 +1,86 @@ +.. _dpnp_tensor_elementwise_functions: + +Element-wise functions +====================== + +Element-wise functions applied to input array(s) produce an output array of respective +function values computed for every element of input array(s). + +.. currentmodule:: dpnp.tensor + +.. autosummary:: + :toctree: generated + :template: autosummary/elementwise.rst + + abs + acos + acosh + add + angle + asin + asinh + atan + atan2 + atanh + bitwise_and + bitwise_left_shift + bitwise_invert + bitwise_or + bitwise_right_shift + bitwise_xor + cbrt + ceil + clip + conj + copysign + cos + cosh + divide + equal + exp + exp2 + expm1 + floor + floor_divide + greater + greater_equal + hypot + imag + isfinite + isinf + isnan + less + less_equal + log + log1p + log2 + log10 + logaddexp + logical_and + logical_not + logical_or + logical_xor + maximum + minimum + multiply + negative + nextafter + not_equal + positive + pow + proj + real + remainder + reciprocal + round + rsqrt + sign + signbit + sin + sinh + square + sqrt + subtract + tan + tanh + trunc diff --git a/doc/reference/tensor.flags.rst b/doc/reference/tensor.flags.rst new file mode 100644 index 000000000000..3aa9d812e095 --- /dev/null +++ b/doc/reference/tensor.flags.rst @@ -0,0 +1,22 @@ +.. _dpnp_tensor_flags_class: + +``Flags`` class +=================== + +.. autoclass:: dpnp.tensor._flags.Flags + :members: + + Note that dictionary-like access to some members is permitted: + + "C", "C_CONTIGUOUS": + Equivalent to ``c_contiguous`` + "F", "F_CONTIGUOUS": + Equivalent to ``f_contiguous`` + "W", "WRITABLE": + Equivalent to ``writable`` + "FC": + Equivalent to ``fc`` + "FNC": + Equivalent to ``fnc`` + "FORC", "CONTIGUOUS": + Equivalent to ``forc`` and ``contiguous`` diff --git a/doc/reference/tensor.indexing_functions.rst b/doc/reference/tensor.indexing_functions.rst new file mode 100644 index 000000000000..fee0b18a1875 --- /dev/null +++ b/doc/reference/tensor.indexing_functions.rst @@ -0,0 +1,19 @@ +.. _dpnp_tensor_indexing_functions: + +Indexing functions +================== + +These functions allow to retrieve or modify array elements indexed +by either integral arrays of indices or boolean mask arrays. + +.. currentmodule:: dpnp.tensor + +.. autosummary:: + :toctree: generated + + extract + place + put + put_along_axis + take + take_along_axis diff --git a/doc/reference/tensor.inspection.rst b/doc/reference/tensor.inspection.rst new file mode 100644 index 000000000000..3e0f7e54d4e8 --- /dev/null +++ b/doc/reference/tensor.inspection.rst @@ -0,0 +1,21 @@ +.. _dpnp_tensor_inspection: + +Inspection API +============== + + +:mod:`dpnp.tensor` implements a way to introspect implementation- and device- +capabilities of an array library as specified in +`Python Array API standard `_: + +.. currentmodule:: dpnp.tensor + +.. _array_api_inspection: https://data-apis.org/array-api/latest/API_specification/inspection.html + +.. autosummary:: + :toctree: generated + :template: autosummary/cython_class.rst + + __array_api_version__ + __array_namespace_info__ + _array_api.Info diff --git a/doc/reference/tensor.linear_algebra.rst b/doc/reference/tensor.linear_algebra.rst new file mode 100644 index 000000000000..58df8ebebc74 --- /dev/null +++ b/doc/reference/tensor.linear_algebra.rst @@ -0,0 +1,14 @@ +.. _dpnp_tensor_linear_algebra: + +Linear algebra functions +======================== + +.. currentmodule:: dpnp.tensor + +.. autosummary:: + :toctree: generated + + matmul + matrix_transpose + tensordot + vecdot diff --git a/doc/reference/tensor.manipulation_functions.rst b/doc/reference/tensor.manipulation_functions.rst new file mode 100644 index 000000000000..1749e432c66a --- /dev/null +++ b/doc/reference/tensor.manipulation_functions.rst @@ -0,0 +1,29 @@ +.. _dpnp_tensor_manipulation_functions: + +Array manipulation functions +============================ + +The following functions conform to `Python Array API standard `_: + +.. array_api_spec_manipulation_functions: https://data-apis.org/array-api/latest/API_specification/manipulation_functions.html + +.. currentmodule:: dpnp.tensor + +.. autosummary:: + :toctree: generated + + broadcast_arrays + broadcast_to + concat + expand_dims + flip + moveaxis + permute_dims + repeat + reshape + roll + squeeze + stack + swapaxes + tile + unstack diff --git a/doc/reference/tensor.print_functions.rst b/doc/reference/tensor.print_functions.rst new file mode 100644 index 000000000000..7005deafd65c --- /dev/null +++ b/doc/reference/tensor.print_functions.rst @@ -0,0 +1,17 @@ +.. _dpnp_tensor_print_functions: + +Printing functions +================== + +Functions for controlling and customizing the string representation of arrays. + +.. currentmodule:: dpnp.tensor + +.. autosummary:: + :toctree: generated + + get_print_options + set_print_options + print_options + usm_ndarray_repr + usm_ndarray_str diff --git a/doc/reference/tensor.rst b/doc/reference/tensor.rst new file mode 100644 index 000000000000..d482fecb69b6 --- /dev/null +++ b/doc/reference/tensor.rst @@ -0,0 +1,55 @@ +.. _dpnp_tensor_pyapi: + +Tensor (``dpnp.tensor``) +======================== + +.. py:module:: dpnp.tensor + +.. currentmodule:: dpnp.tensor + +:py:mod:`dpnp.tensor` provides a reference implementation of the +`Python Array API `_ specification. The implementation +uses data-parallel algorithms suitable for execution on accelerators, such as GPUs. + +:py:mod:`dpnp.tensor` is written using C++ and `SYCL `_ +and oneAPI extensions implemented in `Intel(R) oneAPI DPC++ compiler `_. + +This module contains: + +* Array object :py:class:`usm_ndarray` +* :ref:`Accumulation functions ` +* :ref:`Array creation functions ` +* :ref:`Array manipulation functions ` +* :ref:`Elementwise functions ` +* :ref:`Indexing functions ` +* :ref:`Introspection functions ` +* :ref:`Linear algebra functions ` +* :ref:`Searching functions ` +* :ref:`Set functions ` +* :ref:`Sorting functions ` +* :ref:`Statistical functions ` +* :ref:`Utility functions ` +* :ref:`Printing functions ` +* :ref:`Constants ` + + +.. toctree:: + :hidden: + + tensor.creation_functions + tensor.usm_ndarray + tensor.data_type_functions + tensor.data_types + tensor.elementwise_functions + tensor.accumulation_functions + tensor.indexing_functions + tensor.inspection + tensor.linear_algebra + tensor.manipulation_functions + tensor.searching_functions + tensor.set_functions + tensor.sorting_functions + tensor.statistical_functions + tensor.utility_functions + tensor.print_functions + tensor.constants diff --git a/doc/reference/tensor.searching_functions.rst b/doc/reference/tensor.searching_functions.rst new file mode 100644 index 000000000000..f451034672a6 --- /dev/null +++ b/doc/reference/tensor.searching_functions.rst @@ -0,0 +1,16 @@ +.. _dpnp_tensor_searching_functions: + +Searching functions +=================== + +.. currentmodule:: dpnp.tensor + +.. autosummary:: + :toctree: generated + + argmax + argmin + count_nonzero + nonzero + searchsorted + where diff --git a/doc/reference/tensor.set_functions.rst b/doc/reference/tensor.set_functions.rst new file mode 100644 index 000000000000..0cd2e8a47f1c --- /dev/null +++ b/doc/reference/tensor.set_functions.rst @@ -0,0 +1,15 @@ +.. _dpnp_tensor_set_functions: + +Set Functions +============= + +.. currentmodule:: dpnp.tensor + +.. autosummary:: + :toctree: generated + + isin + unique_all + unique_counts + unique_inverse + unique_values diff --git a/doc/reference/tensor.sorting_functions.rst b/doc/reference/tensor.sorting_functions.rst new file mode 100644 index 000000000000..b3dac1eff444 --- /dev/null +++ b/doc/reference/tensor.sorting_functions.rst @@ -0,0 +1,13 @@ +.. _dpnp_tensor_sorting_functions: + +Sorting functions +================= + +.. currentmodule:: dpnp.tensor + +.. autosummary:: + :toctree: generated + + argsort + sort + top_k diff --git a/doc/reference/tensor.statistical_functions.rst b/doc/reference/tensor.statistical_functions.rst new file mode 100644 index 000000000000..e8c2b26bffac --- /dev/null +++ b/doc/reference/tensor.statistical_functions.rst @@ -0,0 +1,19 @@ +.. _dpnp_tensor_statistical_functions: + +Statistical Functions +===================== + +.. currentmodule:: dpnp.tensor + +.. autosummary:: + :toctree: generated + + max + mean + min + prod + std + sum + var + logsumexp + reduce_hypot diff --git a/doc/reference/tensor.usm_ndarray.rst b/doc/reference/tensor.usm_ndarray.rst new file mode 100644 index 000000000000..5b958d673b21 --- /dev/null +++ b/doc/reference/tensor.usm_ndarray.rst @@ -0,0 +1,26 @@ +.. _dpnp_tensor_array_object: + +USM array object +================ + +.. currentmodule:: dpnp.tensor + +The array object represents a multi-dimensional tensor of uniform elemental datatype allocated on +a :py:class:`Device`. The tensor in stored in a USM allocation, which can be accessed via +:py:attr:`usm_ndarray.base` attribute. + +Implementation of :py:class:`usm_ndarray` conforms to +`Array API standard `_ specification. + +.. array_api_array_object: https://data-apis.org/array-api/latest/API_specification/array_object.html + +.. autosummary:: + :toctree: generated + :template: autosummary/usm_ndarray.rst + + usm_ndarray + +.. toctree:: + :hidden: + + tensor.flags diff --git a/doc/reference/tensor.utility_functions.rst b/doc/reference/tensor.utility_functions.rst new file mode 100644 index 000000000000..5f124667e9f9 --- /dev/null +++ b/doc/reference/tensor.utility_functions.rst @@ -0,0 +1,29 @@ +.. _dpnp_tensor_utility_functions: + +Utility functions +================= + +.. currentmodule:: dpnp.tensor + +.. autosummary:: + :toctree: generated + + all + any + allclose + diff + asnumpy + to_numpy + +Device object +------------- + +.. autoclass:: Device + + .. autosummary:: + ~create_device + ~sycl_queue + ~sycl_device + ~sycl_context + ~sycl_usm_shared_memory + ~usm_ndarray_to_device diff --git a/doc/user_guides/dlpack.rst b/doc/user_guides/dlpack.rst new file mode 100644 index 000000000000..38a7778cc4fb --- /dev/null +++ b/doc/user_guides/dlpack.rst @@ -0,0 +1,138 @@ +.. _dpnp_tensor_dlpack_support: + +DLPack exchange of USM allocated arrays +======================================= + +DLPack overview +--------------- + +`DLPack `_ is a commonly used C-ABI compatible data structure that allows data exchange +between major frameworks. DLPack strives to be minimal, intentionally leaves allocators API and +device API out of scope. + +Data shared via DLPack are owned by the producer who provides a deleter function stored in the +`DLManagedTensor `_, and are only accessed by consumer. +Python semantics of using the structure is `explained in dlpack docs `_. + +DLPack specifies data location in memory via ``void * data`` field of `DLTensor `_ struct, and via ``DLDevice device`` field. +The `DLDevice `_ struct has two members: an enumeration ``device_type`` and an integer ``device_id``. + +DLPack recognizes enumeration value ``DLDeviceType::kDLOneAPI`` reserved for sharing SYCL USM allocations. +It is not ``kDLSycl`` since importing USM-allocated tensor with this device type relies on oneAPI SYCL extensions +``sycl_ext_oneapi_filter_selector`` and ``sycl_ext_oneapi_default_platform_context`` to operate. + +.. _dlpack_docs: https://dmlc.github.io/dlpack/latest/ +.. _dlpack_managed_tensor: https://dmlc.github.io/dlpack/latest/c_api.html#c.DLManagedTensor +.. _dlpack_dltensor: https://dmlc.github.io/dlpack/latest/c_api.html#c.DLTensor +.. _dlpack_dldevice: https://dmlc.github.io/dlpack/latest/c_api.html#c.DLDevice +.. _dlpack_python_spec: https://dmlc.github.io/dlpack/latest/python_spec.html + +Exporting USM allocation to DLPack +----------------------------------- + +When sharing USM allocation (of any ``sycl::usm::kind``) with ``void * ptr`` bound to ``sycl::context ctx``: + +.. code-block:: cpp + :caption: Protocol for exporting USM allocation as DLPack + + // Input: void *ptr: + // USM allocation pointer + // sycl::context ctx: + // context the pointer is bound to + + // Get device where allocation was originally made + // Keep in mind, the device may be a sub-device + const sycl::device &ptr_dev = sycl::get_pointer_device(ptr, ctx); + + #if SYCL_KHR_DEFAULT_CONTEXT + const sycl::context &default_ctx = ptr_dev.get_platform().khr_get_default_context(); + #else + static_assert(false, "ext_oneapi_default_context extension is required"); + #endif + + // Assert that ctx is the default platform context, or throw + if (ctx != default_ctx) { + throw pybind11::type_error( + "Can not export USM allocations not " + "bound to default platform context." + ); + } + + // Find parent root device if ptr_dev is a sub-device + const sycl::device &parent_root_device = get_parent_root_device(ptr_dev); + + // find position of parent_root_device in sycl::get_devices + const auto &all_root_devs = sycl::device::get_devices(); + auto beg = std::begin(all_root_devs); + auto end = std::end(all_root_devs); + auto selectot_fn = [parent_root_device](const sycl::device &root_d) -> bool { + return parent_root_device == root_d; + }; + auto pos = find_if(beg, end, selector_fn); + + if (pos == end) { + throw pybind11::type_error("Could not produce DLPack: failed finding device_id"); + } + std::ptrdiff_t dev_idx = std::distance(beg, pos); + + // check that dev_idx can fit into int32_t if needed + int32_t device_id = static_cast(dev_idx); + + // populate DLTensor with DLDeviceType::kDLOneAPI and computed device_id + + +Importing DLPack with ``device_type == kDLOneAPI`` +-------------------------------------------------- + +.. code-block:: cpp + :caption: Protocol for recognizing DLPack as a valid USM allocation + + // Input: ptr = dlm_tensor->dl_tensor.data + // device_id = dlm_tensor->dl_tensor.device.device_id + + // Get root_device from device_id + const auto &device_vector = sycl::get_device(); + const sycl::device &root_device = device_vector.at(device_id); + + // Check if the backend of the device is supported by consumer + // Perhaps for certain backends (CUDA, hip, etc.) we should dispatch + // different dlpack importers + + // alternatively + // sycl::device root_device = sycl::device( + // sycl::ext::oneapi::filter_selector{ std::to_string(device_id)} + // ); + + // Get default platform context + #if SYCL_KHR_DEFAULT_CONTEXT + const sycl::context &default_ctx = root_device.get_platform().khr_get_default_context(); + #else + static_assert(false, "ext_oneapi_default_context extension is required"); + #endif + + // Check that pointer is known in the context + const sycl::usm::kind &alloc_type = sycl::get_pointer_type(ptr, ctx); + + if (alloc_type == sycl::usm::kind::unknown) { + throw pybind11::type_error( + "Data pointer in DLPack is not bound to the " + "default platform context of specified device" + ); + } + + // Perform check that USM allocation type is supported by consumer if needed + + // Get sycl::device where the data was allocated + const sycl::device &ptr_dev = sycl::get_pointer_device(ptr, ctx); + + // Create object of consumer's library from ptr, ptr_dev, ctx + +Support of DLPack with ``kDLOneAPI`` device type +------------------------------------------------ + +:py:mod:`dpnp.tensor` supports DLPack v0.8. Exchange of USM allocations made using Level-Zero backend +is supported with ``torch.Tensor(device='xpu')`` for PyTorch when using `intel-extension-for-pytorch `_, +as well as for TensorFlow when `intel-extension-for-tensorflow `_ is used. + +.. _intel_ext_for_torch: https://github.com/intel/intel-extension-for-pytorch +.. _intel_ext_for_tf: https://github.com/intel/intel-extension-for-tensorflow diff --git a/doc/user_guides/execution_model.rst b/doc/user_guides/execution_model.rst new file mode 100644 index 000000000000..1964ef230f85 --- /dev/null +++ b/doc/user_guides/execution_model.rst @@ -0,0 +1,146 @@ +.. _dpnp_execution_model: + +######################## +oneAPI programming model +######################## + +oneAPI library and its Python interface +======================================= + +Using oneAPI libraries, a user calls functions that take ``sycl::queue`` and a collection of +``sycl::event`` objects among other arguments. For example: + +.. code-block:: cpp + :caption: Prototypical call signature of oneMKL function + + sycl::event + compute( + sycl::queue &exec_q, + ..., + const std::vector &dependent_events + ); + +The function ``compute`` inserts computational tasks into the queue ``exec_q`` for DPC++ runtime to +execute on the device the queue targets. The execution may begin only after other tasks whose +execution status is represented by ``sycl::event`` objects in the provided ``dependent_events`` +vector complete. If the vector is empty, the runtime begins the execution as soon as the device is +ready. The function returns a ``sycl::event`` object representing completion of the set of +computational tasks submitted by the ``compute`` function. + +Hence, in the oneAPI programming model, the execution **queue** is used to specify which device the +function will execute on. To create a queue, one must specify a device to target. + +In :mod:`dpctl`, the ``sycl::queue`` is represented by :class:`dpctl.SyclQueue` Python type, +and a Python API to call such a function might look like + +.. code-block:: python + + def call_compute( + exec_q : dpctl.SyclQueue, + ..., + dependent_events : List[dpctl.SyclEvent] = [] + ) -> dpctl.SyclEvent: + ... + +When building Python API for a SYCL offloading function, and you choose to +map the SYCL API to a different API on the Python side, it must still translate to a +similar call under the hood. + +The arguments to the function must be suitable for use in the offloading functions. +Typically these are Python scalars, or objects representing USM allocations, such as +:class:`dpnp.tensor.usm_ndarray`, :class:`dpctl.memory.MemoryUSMDevice` and friends. + +.. note:: + The USM allocations these objects represent must not get deallocated before + offloaded tasks that access them complete. + + This is something authors of DPC++-based Python extensions must take care of, + and users of such extensions should assume assured. + + +USM allocations and compute-follows-data +======================================== + +To make a USM allocation on a device in SYCL, one needs to specify ``sycl::device`` in the +memory of which the allocation is made, and the ``sycl::context`` to which the allocation +is bound. + +A ``sycl::queue`` object is often used instead. In such cases ``sycl::context`` and ``sycl::device`` associated +with the queue are used to make the allocation. + +.. important:: + :mod:`dpnp.tensor` associates a queue object with every USM allocation. + + The associated queue may be queried using ``.sycl_queue`` property of the + Python type representing the USM allocation. + +This design choice allows :mod:`dpnp.tensor` to have a preferred queue to use when operating on any single +USM allocation. For example: + +.. code-block:: python + + def unary_func(x : dpnp.tensor.usm_ndarray): + code1 + _ = _func_impl(x.sycl_queue, ...) + code2 + +When combining several objects representing USM-allocations, the +:ref:`programming model ` +adopted in :mod:`dpnp.tensor` insists that queues associated with each object be the same, in which +case it is the execution queue used. Alternatively :exc:`dpctl.utils.ExecutionPlacementError` is raised. + +.. code-block:: python + + def binary_func( + x1 : dpnp.tensor.usm_ndarray, + x2 : dpnp.tensor.usm_ndarray + ): + exec_q = dpctl.utils.get_execution_queue((x1.sycl_queue, x2.sycl_queue)) + if exec_q is None: + raise dpctl.utils.ExecutionPlacementError + ... + +In order to ensure that compute-follows-data works seamlessly out-of-the-box, :mod:`dpnp.tensor` maintains +a cache with context and device as keys and queues as values used by :class:`dpnp.tensor.Device` class. + +.. code-block:: python + + >>> import dpctl + >>> from dpnp import tensor + + >>> sycl_dev = dpctl.SyclDevice("cpu") + >>> d1 = tensor.Device.create_device(sycl_dev) + >>> d2 = tensor.Device.create_device("cpu") + >>> d3 = tensor.Device.create_device(dpctl.select_cpu_device()) + + >>> d1.sycl_queue == d2.sycl_queue, d1.sycl_queue == d3.sycl_queue, d2.sycl_queue == d3.sycl_queue + (True, True, True) + +Since :class:`dpnp.tensor.Device` class is used by all :ref:`array creation functions ` +in :mod:`dpnp.tensor`, the same value used as ``device`` keyword argument results in array instances that +can be combined together in accordance with compute-follows-data programming model. + +.. code-block:: python + + >>> from dpnp import tensor + >>> import dpctl + + >>> # queue for default-constructed device is used + >>> x1 = tensor.arange(100, dtype="int32") + >>> x2 = tensor.zeros(100, dtype="int32") + >>> x12 = tensor.concat((x1, x2)) + >>> x12.sycl_queue == x1.sycl_queue, x12.sycl_queue == x2.sycl_queue + (True, True) + >>> # default constructors of SyclQueue class create different instance of the queue + >>> q1 = dpctl.SyclQueue() + >>> q2 = dpctl.SyclQueue() + >>> q1 == q2 + False + >>> y1 = tensor.arange(100, dtype="int32", sycl_queue=q1) + >>> y2 = tensor.zeros(100, dtype="int32", sycl_queue=q2) + >>> # this call raises ExecutionPlacementError since compute-follows-data + >>> # rules are not met + >>> tensor.concat((y1, y2)) + +Please refer to the :ref:`array migration ` section of the introduction to +:mod:`dpnp.tensor` for examples on how to resolve ``ExecutionPlacementError`` exceptions. diff --git a/doc/user_guides/index.rst b/doc/user_guides/index.rst new file mode 100644 index 000000000000..aeae74bc8591 --- /dev/null +++ b/doc/user_guides/index.rst @@ -0,0 +1,12 @@ +.. _user_guides: + +*********** +User Guides +*********** + +.. toctree:: + :maxdepth: 2 + + tensor_intro + execution_model + dlpack diff --git a/doc/user_guides/tensor_intro.rst b/doc/user_guides/tensor_intro.rst new file mode 100644 index 000000000000..a1f73a5208b5 --- /dev/null +++ b/doc/user_guides/tensor_intro.rst @@ -0,0 +1,287 @@ +.. _user_guide_tensor_intro: + +Intro to :py:mod:`dpnp.tensor` +=============================== + +Supported array data types +-------------------------- + +The tensor submodule provides an N-dimensional array object for a tensor whose values have the same data type +from the :ref:`following list `: + +.. currentmodule:: dpnp.tensor + +.. list-table:: + + * - + - :attr:`int8` + - :attr:`int16` + - :attr:`int32` + - :attr:`int64` + - + - :attr:`float16` + - :attr:`float32` + - :attr:`complex64` + + * - :attr:`bool` + - :attr:`uint8` + - :attr:`uint16` + - :attr:`uint32` + - :attr:`uint64` + - + - + - :attr:`float64` + - :attr:`complex128` + + +Creating an array +----------------- + +Array :ref:`creation functions ` support keyword arguments that +control the device where the array is allocated as well as aspects of +USM allocation for the array. + +These three keywords are: + +.. list-table:: + :header-rows: 1 + + * - Keyword arguments + - Default value + - Description + * - ``usm_type`` + - ``"device"`` + - type of USM allocation to make + * - ``device`` + - ``None`` + - :py:class:`dpnp.tensor.Device` instance + * - ``sycl_queue`` + - ``None`` + - Instance of :class:`dpctl.SyclQueue` associated with array + +Arguments ``sycl_queue`` and ``device`` are complementary to each other, and +a user need only provide one of these. + +A valid setting for the ``device`` keyword argument is any object that can be passed to :py:meth:`dpnp.tensor.Device.create_device`. +If both ``device`` and ``sycl_queue`` keyword arguments are specified, they must correspond to :class:`dpctl.SyclQueue` instances which +compare equal to one another. + +A created instance of :class:`usm_ndarray` has an associated :class:`dpctl.SyclQueue` instance that can be retrieved +using :attr:`dpnp.tensor.usm_ndarray.sycl_queue` property. The underlying USM allocation +is allocated on :class:`dpctl.SyclDevice` and is bound to :class:`dpctl.SyclContext` targeted by this queue. + +.. _dpnp_tensor_compute_follows_data: + +Execution model +--------------- + +When one of more instances of ``usm_ndarray`` objects are passed to a function in :py:mod:`dpnp.tensor` other than creation function, +a "compute follows data" execution model is followed. + +The model requires that :class:`dpctl.SyclQueue` instances associated with each array compared equal to one another, signifying that +each one corresponds to the same underlying ``sycl::queue`` object. In such a case, the output array is associated with the same +``sycl::queue`` and computations are scheduled for execution using this ``sycl::queue``. + +.. note:: + Two instances :class:`dpctl.SyclQueue` may target the same ``sycl::device`` and be using the same ``sycl::context``, but correspond + to different scheduling entries, and hence be in violation of the compute-follows-data requirement. One common example of this are + ``SyclQueue`` corresponding to default-selected device and using platform default context but created using different properties, e.g. + one with `"enable_profiling"` set and another without it. + +If input arrays do not conform to the compute-follows-data requirements, :py:exc:`dpctl.utils.ExecutionPlacementError` is raised. +User must explicitly migrate the data to unambiguously control the execution placement. + +.. _dpnp_tensor_array_migration: + +Migrating arrays +---------------- + +Array content can be migrated to a different device +using either :meth:`dpnp.tensor.usm_ndarray.to_device` method, or by using :func:`dpnp.tensor.asarray` function. + +The ``arr.to_device(device=target_device)`` method will be zero-copy if the ``arr.sycl_queue`` and the :class:`dpctl.SyclQueue` +instance associated with new target device have the same underlying ``sycl::device`` and ``sycl::context`` instances. + +Here is an example of migration without a copy using ``.to_device`` method: + +.. code-block:: python + :caption: Example: Use ``.to_device`` to zero-copy migrate array content to be associated with a different ``sycl::queue`` + + import dpctl + from dpnp import tensor + + x = tensor.linspace(0, 1, num=10**8) + q_prof = dpctl.SyclQueue(x.sycl_context, x.sycl_device, property="enable_profiling") + + timer = dpctl.SyclTimer() + # no data migration takes place here (zero-copy), + # but x and x1 arrays do not satisfy compute-follows-data requirements + x1 = x.to_device(q_prof) + + with timer(q_prof): + y1 = tensor.sin(2*x1)*tensor.exp(-tensor.square(x1)) + + # also a zero copy operation + y = y1.to_device(x.device) + + host_dt, device_dt = timer.dt + print(f"Execution on device {x.sycl_device.name} took {device_dt} seconds") + print(f"Execution on host took {host_dt} seconds") + +Data migration when the current and the target SYCL contexts are different is performed via host. That means that data are copied from +the current device to the host, and then from the host to the target device: + +.. code-block:: python + :caption: Example: Using ``.to_device`` to migrate data may involve copy via host + + from dpnp import tensor + + x_cpu = tensor.concat((tensor.ones(10, device="cpu"), tensor.zeros(1000, device="cpu"))) + + # data migration is performed via host + x_gpu = x_cpu.to_device("gpu") + +An alternative way to migrate data is to use :py:func:`asarray` and specify device-placement keyword arguments: + +.. code-block:: python + :caption: Example: Using ``asarray`` to migrate data may involve copy via host + + from dpnp import tensor + + x_cpu = tensor.concat((tensor.ones(10, device="cpu"), tensor.zeros(1000, device="cpu"))) + + # data migration is performed via host + x_gpu = tensor.asarray(x_cpu, device="gpu") + +An advantage of using the function ``asarray`` is that migration from ``usm_ndarray`` instances allocated on different +devices as well migration from :py:class:`numpy.ndarray` may be accomplished in a single call: + +.. code-block:: python + :caption: Example: ``asarray`` may migrate multiple arrays + + from dpnp import tensor + import numpy + + x_cpu = tensor.ones((10, 10), device="cpu") + x_gpu = tensor.zeros((10, 10), device="opencl:gpu") + x_np = numpy.random.randn(10, 10) + + # Array w has shape (3, 10, 10) + w = tensor.asarray([x_cpu, x_gpu, x_np], device="level_zero:gpu") + +Migration may also occur during calls to other array creation functions, e.g., :py:func:`full` when the ``fill_value`` parameter is an instance +of :py:class:`usm_ndarray`. In such a case default values of device placement keywords are interpreted to avoid data migration, i.e., the +new array is created on the same device where ``fill_value`` array was allocated. + +.. code-block:: python + :caption: Example: Using ``usm_ndarray`` as arguments to array construction + + from dpnp import tensor + + # Zero-dimensional array allocated on CPU device + pi_on_device = tensor.asarray(tensor.pi, dtype=tensor.float32, device="cpu") + + # x will also be allocated on CPU device + x = tensor.full(shape=(100, 100), fill_value=pi_on_device) + + # Create array on GPU. Migration of `pi_on_device` to GPU via host + # takes place under the hood + y_gpu = tensor.full(shape=(100, 100), fill_value=pi_on_device, device="gpu") + + +Combining arrays with different USM types +----------------------------------------- + +For functions with single argument the returned array has the same ``usm_type`` as the input array. + +Functions that combine several ``usm_ndarray`` instances the ``usm_type`` of the output array is determined +using the following coercion rule: + ++------------+----------+----------+----------+ +| | "device" | "shared" | "host" | ++------------+----------+----------+----------+ +| "device" | "device" | "device" | "device" | ++------------+----------+----------+----------+ +| "shared" | "device" | "shared" | "shared" | ++------------+----------+----------+----------+ +| "host" | "device" | "shared" | "host" | ++------------+----------+----------+----------+ + +If assigning USM-type "device" a score of 0, USM-type "shared" a score of 1, and USM-type "host" a score of 2, +the USM-type of the output array has the smallest score of all its inputs. + +.. currentmodule:: dpctl.utils + +The convenience function :py:func:`get_coerced_usm_type` is a convenience function to determine the USM-type +following this convention: + +.. code-block:: python + + from dpctl.utils import get_coerced_usm_type + + # r1 has value "device" + r1 = get_coerced_usm_type(["device", "shared", "host"]) + + # r2 has value "shared" + r2 = get_coerced_usm_type(["shared", "shared", "host"]) + + # r3 has value "host" + r3 = get_coerced_usm_type(["host", "host", "host"]) + +Sharing data between devices and Python +--------------------------------------- + +Python objects, such as sequences of :class:`int`, :class:`float`, or :class:`complex` objects, +or NumPy arrays can be converted to :class:`dpnp.tensor.usm_ndarray` using :func:`dpnp.tensor.asarray` +function. + +.. code-block:: python + + >>> from dpnp import tensor as dpt + >>> import numpy as np + >>> import mkl_random + + >>> # Sample from true random number generator + >>> rs = mkl_random.RandomState(brng="nondeterm") + >>> x_np = rs.uniform(-1, 1, size=(6, 512)).astype(np.float32) + + >>> # copy data to USM-device (default) allocated array + >>> x_usm = dpt.asarray(x_np) + >>> dpt.max(x_usm, axis=1) + usm_ndarray([0.9998379 , 0.9963589 , 0.99818915, 0.9975991 , 0.9999802 , + 0.99851537], dtype=float32) + >>> np.max(x_np, axis=1) + array([0.9998379 , 0.9963589 , 0.99818915, 0.9975991 , 0.9999802 , + 0.99851537], dtype=float32) + +The content of :class:`dpnp.tensor.usm_ndarray` may be copied into +a NumPy array using :func:`dpnp.tensor.asnumpy` function: + +.. code-block:: python + + from dpnp import tensor as dpt + import numpy as np + + def sieve_pass(r : dpt.usm_ndarray, v : dpt.usm_ndarray) -> dpt.usm_ndarray: + "Single pass of sieve of Eratosthenes" + m = dpt.min(r[r > v]) + r[ (r > m) & (r % m == 0) ] = 0 + return m + + def sieve(n : int) -> dpt.usm_ndarray: + "Find primes <=n using sieve of Erathosthenes" + idt = dpt.int32 + s = dpt.concat(( + dpt.arange(2, 3, dtype=idt), + dpt.arange(3, n + 1, 2, dtype=idt) + )) + lb = dpt.zeros(tuple(), dtype=idt) + while lb * lb < n + 1: + lb = sieve_pass(s, lb) + return s[s > 0] + + # get prime numbers <= a million into NumPy array + # to save to disk + ps_np = dpt.asnumpy(sieve(10**6)) + + np.savetxt("primes.txt", ps_np, fmt="%d") diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt index d7acf368bcd0..e24c6ee76693 100644 --- a/dpnp/CMakeLists.txt +++ b/dpnp/CMakeLists.txt @@ -186,6 +186,22 @@ add_subdirectory(backend/extensions/ufunc) add_subdirectory(backend/extensions/vm) add_subdirectory(backend/extensions/window) -add_subdirectory(dpnp_algo) -add_subdirectory(dpnp_utils) -add_subdirectory(random) +if(DPNP_BUILD_COMPONENTS STREQUAL "ALL" OR DPNP_BUILD_COMPONENTS STREQUAL "TENSOR_ONLY") + add_subdirectory(tensor) +endif() + +if(DPNP_BUILD_COMPONENTS STREQUAL "ALL" OR DPNP_BUILD_COMPONENTS STREQUAL "SKIP_TENSOR") + add_subdirectory(backend) + add_subdirectory(backend/extensions/blas) + add_subdirectory(backend/extensions/fft) + add_subdirectory(backend/extensions/indexing) + add_subdirectory(backend/extensions/lapack) + add_subdirectory(backend/extensions/statistics) + add_subdirectory(backend/extensions/ufunc) + add_subdirectory(backend/extensions/vm) + add_subdirectory(backend/extensions/window) + + add_subdirectory(dpnp_algo) + add_subdirectory(dpnp_utils) + add_subdirectory(random) +endif() diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt index 433ab298d476..a33e59e3bb02 100644 --- a/dpnp/backend/CMakeLists.txt +++ b/dpnp/backend/CMakeLists.txt @@ -70,7 +70,12 @@ endif() # target_compile_definitions(${_trgt} PRIVATE _WIN=1) # endif() -target_link_options(${_trgt} PUBLIC -fsycl-device-code-split=per_kernel) +# For coverage builds, use per_source instead of per_kernel to reduce memory +if(DPNP_GENERATE_COVERAGE) + target_link_options(${_trgt} PUBLIC -fsycl-device-code-split=per_source) +else() + target_link_options(${_trgt} PUBLIC -fsycl-device-code-split=per_kernel) +endif() if(DPNP_GENERATE_COVERAGE) target_link_options(${_trgt} PRIVATE -fprofile-instr-generate -fcoverage-mapping) diff --git a/dpnp/backend/extensions/blas/CMakeLists.txt b/dpnp/backend/extensions/blas/CMakeLists.txt index 67e0d4cf02e1..34838c4109be 100644 --- a/dpnp/backend/extensions/blas/CMakeLists.txt +++ b/dpnp/backend/extensions/blas/CMakeLists.txt @@ -97,7 +97,18 @@ else() ) endif() -target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) +# For coverage builds, use per_source instead of per_kernel to reduce memory +if(DPNP_GENERATE_COVERAGE) + target_link_options( + ${python_module_name} + PUBLIC -fsycl-device-code-split=per_source + ) +else() + target_link_options( + ${python_module_name} + PUBLIC -fsycl-device-code-split=per_kernel + ) +endif() if(DPNP_GENERATE_COVERAGE) target_link_options( diff --git a/dpnp/backend/extensions/blas/gemm_batch.cpp b/dpnp/backend/extensions/blas/gemm_batch.cpp index a6cd7ac4e130..7ad631aee380 100644 --- a/dpnp/backend/extensions/blas/gemm_batch.cpp +++ b/dpnp/backend/extensions/blas/gemm_batch.cpp @@ -410,7 +410,8 @@ struct GemmBatchContigFactory fnT get() { if constexpr (types::GemmBatchTypePairSupportFactory::is_defined) { + Tc>::is_defined) + { return gemm_batch_impl; } else { diff --git a/dpnp/backend/extensions/blas/gemv.cpp b/dpnp/backend/extensions/blas/gemv.cpp index a9c5414ef8c7..afbf7b9dd139 100644 --- a/dpnp/backend/extensions/blas/gemv.cpp +++ b/dpnp/backend/extensions/blas/gemv.cpp @@ -269,7 +269,8 @@ std::pair const int vectorY_typenum = vectorY.get_typenum(); if (matrixA_typenum != vectorX_typenum || - matrixA_typenum != vectorY_typenum) { + matrixA_typenum != vectorY_typenum) + { throw py::value_error("Given arrays must be of the same type."); } diff --git a/dpnp/backend/extensions/common/ext/common.hpp b/dpnp/backend/extensions/common/ext/common.hpp index 036eb635a3bd..b0a0ed25bd44 100644 --- a/dpnp/backend/extensions/common/ext/common.hpp +++ b/dpnp/backend/extensions/common/ext/common.hpp @@ -106,7 +106,8 @@ struct IsNan return IsNan::isnan(real1) || IsNan::isnan(imag1); } else if constexpr (std::is_floating_point_v || - std::is_same_v) { + std::is_same_v) + { return sycl::isnan(v); } @@ -215,8 +216,7 @@ sycl::nd_range<1> pybind11::dtype dtype_from_typenum(int dst_typenum); template - typename factoryT, + template typename factoryT, int _num_types = type_dispatch::num_types> inline void init_dispatch_vector(dispatchT dispatch_vector[]) { @@ -225,8 +225,7 @@ inline void init_dispatch_vector(dispatchT dispatch_vector[]) } template - typename factoryT, + template typename factoryT, int _num_types = type_dispatch::num_types> inline void init_dispatch_table(dispatchT dispatch_table[][_num_types]) { diff --git a/dpnp/backend/extensions/common/ext/dispatch_table.hpp b/dpnp/backend/extensions/common/ext/dispatch_table.hpp index 4cfe1bd57250..6655f054f355 100644 --- a/dpnp/backend/extensions/common/ext/dispatch_table.hpp +++ b/dpnp/backend/extensions/common/ext/dispatch_table.hpp @@ -99,8 +99,7 @@ using SupportedDTypeList2 = std::vector; template - typename Func> + template typename Func> struct TableBuilder { template @@ -125,8 +124,7 @@ struct TableBuilder template - typename Func> + template typename Func> struct TableBuilder2 { template @@ -232,8 +230,7 @@ class DispatchTable2 } template - typename Func> + template typename Func> void populate_dispatch_table() { using TBulder = typename TableBuilder2::type; diff --git a/dpnp/backend/extensions/elementwise_functions/common.hpp b/dpnp/backend/extensions/elementwise_functions/common.hpp index df2b3afe53b9..4c0004676094 100644 --- a/dpnp/backend/extensions/elementwise_functions/common.hpp +++ b/dpnp/backend/extensions/elementwise_functions/common.hpp @@ -92,7 +92,8 @@ struct UnaryTwoOutputsContigFunctor /* NOTE: work-group size must be divisible by sub-group size */ if constexpr (enable_sg_loadstore && - UnaryTwoOutputsOpT::is_constant::value) { + UnaryTwoOutputsOpT::is_constant::value) + { // value of operator is known to be a known constant constexpr resT1 const_val1 = UnaryTwoOutputsOpT::constant_value1; constexpr resT2 const_val2 = UnaryTwoOutputsOpT::constant_value2; @@ -528,21 +529,18 @@ struct BinaryTwoOutputsStridedFunctor * dpctl::tensor::kernels::elementwise_common namespace. */ template - class UnaryTwoOutputsType, + template class UnaryTwoOutputsType, template - class UnaryTwoOutputsContigFunctorT, + bool enable> class UnaryTwoOutputsContigFunctorT, template - class kernel_name, + std::uint8_t nv> class kernel_name, std::uint8_t vec_sz = 4u, std::uint8_t n_vecs = 2u> sycl::event @@ -613,12 +611,15 @@ sycl::event * dpctl::tensor::kernels::elementwise_common namespace. */ template - class UnaryTwoOutputsType, - template - class UnaryTwoOutputsStridedFunctorT, - template - class kernel_name> + template class UnaryTwoOutputsType, + template class UnaryTwoOutputsStridedFunctorT, + template class kernel_name> sycl::event unary_two_outputs_strided_impl( sycl::queue &exec_q, std::size_t nelems, @@ -665,27 +666,25 @@ sycl::event unary_two_outputs_strided_impl( * @note It extends binary_contig_impl from * dpctl::tensor::kernels::elementwise_common namespace. */ -template - class BinaryTwoOutputsType, - template - class BinaryTwoOutputsContigFunctorT, - template - class kernel_name, - std::uint8_t vec_sz = 4u, - std::uint8_t n_vecs = 2u> +template < + typename argTy1, + typename argTy2, + template class BinaryTwoOutputsType, + template class BinaryTwoOutputsContigFunctorT, + template class kernel_name, + std::uint8_t vec_sz = 4u, + std::uint8_t n_vecs = 2u> sycl::event binary_two_outputs_contig_impl(sycl::queue &exec_q, std::size_t nelems, @@ -761,15 +760,19 @@ sycl::event * @note It extends binary_strided_impl from * dpctl::tensor::kernels::elementwise_common namespace. */ -template < - typename argTy1, - typename argTy2, - template - class BinaryTwoOutputsType, - template - class BinaryTwoOutputsStridedFunctorT, - template - class kernel_name> +template class BinaryTwoOutputsType, + template class BinaryTwoOutputsStridedFunctorT, + template class kernel_name> sycl::event binary_two_outputs_strided_impl( sycl::queue &exec_q, std::size_t nelems, diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp index bd06ba1bd583..c31636221945 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp @@ -189,7 +189,8 @@ std::pair simplified_dst_strides, src_offset, dst_offset); if (nd == 1 && simplified_src_strides[0] == 1 && - simplified_dst_strides[0] == 1) { + simplified_dst_strides[0] == 1) + { // Special case of contiguous data auto contig_fn = contig_dispatch_vector[src_typeid]; @@ -893,7 +894,8 @@ std::pair output_types_table[src1_typeid][src2_typeid]; if (dst1_typeid != output_typeids.first || - dst2_typeid != output_typeids.second) { + dst2_typeid != output_typeids.second) + { throw py::value_error( "One of destination arrays has unexpected elemental data type."); } diff --git a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp index e34cb74fcb0a..d485e95677f0 100644 --- a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp +++ b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp @@ -183,7 +183,8 @@ void simplify_iteration_space_3( simplified_dst_strides.reserve(nd); if ((src1_strides[0] < 0) && (src2_strides[0] < 0) && - (dst_strides[0] < 0)) { + (dst_strides[0] < 0)) + { simplified_src1_strides.push_back(-src1_strides[0]); simplified_src2_strides.push_back(-src2_strides[0]); simplified_dst_strides.push_back(-dst_strides[0]); diff --git a/dpnp/backend/extensions/fft/CMakeLists.txt b/dpnp/backend/extensions/fft/CMakeLists.txt index 8f5179bbbd76..57791746cb62 100644 --- a/dpnp/backend/extensions/fft/CMakeLists.txt +++ b/dpnp/backend/extensions/fft/CMakeLists.txt @@ -90,7 +90,18 @@ else() ) endif() -target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) +# For coverage builds, use per_source instead of per_kernel to reduce memory +if(DPNP_GENERATE_COVERAGE) + target_link_options( + ${python_module_name} + PUBLIC -fsycl-device-code-split=per_source + ) +else() + target_link_options( + ${python_module_name} + PUBLIC -fsycl-device-code-split=per_kernel + ) +endif() if(DPNP_GENERATE_COVERAGE) target_link_options( diff --git a/dpnp/backend/extensions/fft/common.hpp b/dpnp/backend/extensions/fft/common.hpp index f76da9721316..b7c097449b2a 100644 --- a/dpnp/backend/extensions/fft/common.hpp +++ b/dpnp/backend/extensions/fft/common.hpp @@ -66,10 +66,7 @@ class DescriptorWrapper queue_ptr_ = std::make_unique(q); } - descr_type &get_descriptor() - { - return descr_; - } + descr_type &get_descriptor() { return descr_; } const sycl::queue &get_queue() const { diff --git a/dpnp/backend/extensions/indexing/CMakeLists.txt b/dpnp/backend/extensions/indexing/CMakeLists.txt index 0ca611bfdc9f..73fee2553ec3 100644 --- a/dpnp/backend/extensions/indexing/CMakeLists.txt +++ b/dpnp/backend/extensions/indexing/CMakeLists.txt @@ -95,7 +95,18 @@ else() endif() target_compile_options(${python_module_name} PUBLIC -fno-sycl-id-queries-fit-in-int) -target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) +# For coverage builds, use per_source instead of per_kernel to reduce memory +if(DPNP_GENERATE_COVERAGE) + target_link_options( + ${python_module_name} + PUBLIC -fsycl-device-code-split=per_source + ) +else() + target_link_options( + ${python_module_name} + PUBLIC -fsycl-device-code-split=per_kernel + ) +endif() if(DPNP_GENERATE_COVERAGE) target_link_options( diff --git a/dpnp/backend/extensions/indexing/choose.cpp b/dpnp/backend/extensions/indexing/choose.cpp index 7b5284418b00..ad09984bde6a 100644 --- a/dpnp/backend/extensions/indexing/choose.cpp +++ b/dpnp/backend/extensions/indexing/choose.cpp @@ -299,7 +299,8 @@ std::pair } if (!(chc_type_id == - array_types.typenum_to_lookup_id(chc_.get_typenum()))) { + array_types.typenum_to_lookup_id(chc_.get_typenum()))) + { throw py::type_error( "Choice array data types are not all the same."); } @@ -420,7 +421,8 @@ struct ChooseFactory fnT get() { if constexpr (std::is_integral::value && - !std::is_same::value) { + !std::is_same::value) + { fnT fn = kernels::choose_impl; return fn; } diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt index 6bf25ee651d2..5a4f8a85299f 100644 --- a/dpnp/backend/extensions/lapack/CMakeLists.txt +++ b/dpnp/backend/extensions/lapack/CMakeLists.txt @@ -115,7 +115,18 @@ else() ) endif() -target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) +# For coverage builds, use per_source instead of per_kernel to reduce memory +if(DPNP_GENERATE_COVERAGE) + target_link_options( + ${python_module_name} + PUBLIC -fsycl-device-code-split=per_source + ) +else() + target_link_options( + ${python_module_name} + PUBLIC -fsycl-device-code-split=per_kernel + ) +endif() if(DPNP_GENERATE_COVERAGE) target_link_options( diff --git a/dpnp/backend/extensions/lapack/geqrf_batch.cpp b/dpnp/backend/extensions/lapack/geqrf_batch.cpp index e0821e23e440..033c3db01b10 100644 --- a/dpnp/backend/extensions/lapack/geqrf_batch.cpp +++ b/dpnp/backend/extensions/lapack/geqrf_batch.cpp @@ -98,13 +98,13 @@ static sycl::event geqrf_batch_impl(sycl::queue &exec_q, geqrf_batch_event = mkl_lapack::geqrf_batch( exec_q, - m, // The number of rows in each matrix in the batch; (0 ≤ m). - // It must be a non-negative integer. - n, // The number of columns in each matrix in the batch; (0 ≤ n). - // It must be a non-negative integer. - a, // Pointer to the batch of matrices, each of size (m x n). - lda, // The leading dimension of each matrix in the batch. - // For row major layout, lda ≥ max(1, m). + m, // The number of rows in each matrix in the batch; (0 ≤ m). + // It must be a non-negative integer. + n, // The number of columns in each matrix in the batch; (0 ≤ n). + // It must be a non-negative integer. + a, // Pointer to the batch of matrices, each of size (m x n). + lda, // The leading dimension of each matrix in the batch. + // For row major layout, lda ≥ max(1, m). stride_a, // Stride between consecutive matrices in the batch. tau, // Pointer to the array of scalar factors of the elementary // reflectors for each matrix in the batch. diff --git a/dpnp/backend/extensions/lapack/gesv.cpp b/dpnp/backend/extensions/lapack/gesv.cpp index 0569fab2c350..9ab6d4c67799 100644 --- a/dpnp/backend/extensions/lapack/gesv.cpp +++ b/dpnp/backend/extensions/lapack/gesv.cpp @@ -114,14 +114,14 @@ static sycl::event gesv_impl(sycl::queue &exec_q, try { getrf_event = mkl_lapack::getrf( exec_q, - n, // The order of the square matrix A (0 ≤ n). - // It must be a non-negative integer. - n, // The number of columns in the square matrix A (0 ≤ n). - // It must be a non-negative integer. - a, // Pointer to the square matrix A (n x n). - lda, // The leading dimension of matrix A. - // It must be at least max(1, n). - ipiv, // Pointer to the output array of pivot indices. + n, // The order of the square matrix A (0 ≤ n). + // It must be a non-negative integer. + n, // The number of columns in the square matrix A (0 ≤ n). + // It must be a non-negative integer. + a, // Pointer to the square matrix A (n x n). + lda, // The leading dimension of matrix A. + // It must be at least max(1, n). + ipiv, // Pointer to the output array of pivot indices. scratchpad, // Pointer to scratchpad memory to be used by MKL // routine for storing intermediate results. scratchpad_size, depends); diff --git a/dpnp/backend/extensions/lapack/gesv_batch.cpp b/dpnp/backend/extensions/lapack/gesv_batch.cpp index ce02f8517eb5..958591ad38ef 100644 --- a/dpnp/backend/extensions/lapack/gesv_batch.cpp +++ b/dpnp/backend/extensions/lapack/gesv_batch.cpp @@ -258,10 +258,10 @@ static sycl::event gesv_batch_impl(sycl::queue &exec_q, try { gesv_event = mkl_lapack::gesv( exec_q, - n, // The order of the square matrix A - // and the number of rows in matrix B (0 ≤ n). - nrhs, // The number of right-hand sides, - // i.e., the number of columns in matrix B (0 ≤ nrhs). + n, // The order of the square matrix A + // and the number of rows in matrix B (0 ≤ n). + nrhs, // The number of right-hand sides, + // i.e., the number of columns in matrix B (0 ≤ nrhs). a_batch, // Pointer to the square coefficient matrix A (n x n). lda, // The leading dimension of a, must be at least max(1, n). current_ipiv, // The pivot indices that define the permutation diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index abf20aff643a..870ccc8e811a 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -91,14 +91,14 @@ static sycl::event getrf_impl(sycl::queue &exec_q, getrf_event = mkl_lapack::getrf( exec_q, - m, // The number of rows in the input matrix A (0 ≤ m). - // It must be a non-negative integer. - n, // The number of columns in the input matrix A (0 ≤ n). - // It must be a non-negative integer. - a, // Pointer to the input matrix A (m x n). - lda, // The leading dimension of matrix A. - // It must be at least max(1, m). - ipiv, // Pointer to the output array of pivot indices. + m, // The number of rows in the input matrix A (0 ≤ m). + // It must be a non-negative integer. + n, // The number of columns in the input matrix A (0 ≤ n). + // It must be a non-negative integer. + a, // Pointer to the input matrix A (m x n). + lda, // The leading dimension of matrix A. + // It must be at least max(1, m). + ipiv, // Pointer to the output array of pivot indices. scratchpad, // Pointer to scratchpad memory to be used by MKL // routine for storing intermediate results. scratchpad_size, depends); diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp index 1927736fc454..5e1c7a15e192 100644 --- a/dpnp/backend/extensions/lapack/getrf_batch.cpp +++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp @@ -264,7 +264,8 @@ std::pair const py::ssize_t *ipiv_array_shape = ipiv_array.get_shape_raw(); if (ipiv_array_shape[0] != batch_size || - ipiv_array_shape[1] != std::min(m, n)) { + ipiv_array_shape[1] != std::min(m, n)) + { throw py::value_error( "The shape of 'ipiv_array' must be (batch_size, min(m, n))"); } diff --git a/dpnp/backend/extensions/lapack/linalg_exceptions.hpp b/dpnp/backend/extensions/lapack/linalg_exceptions.hpp index d087adfbd2b6..c823d1995a4e 100644 --- a/dpnp/backend/extensions/lapack/linalg_exceptions.hpp +++ b/dpnp/backend/extensions/lapack/linalg_exceptions.hpp @@ -37,10 +37,7 @@ class LinAlgError : public std::exception public: explicit LinAlgError(const char *message) : msg_(message) {} - const char *what() const noexcept override - { - return msg_.c_str(); - } + const char *what() const noexcept override { return msg_.c_str(); } private: std::string msg_; diff --git a/dpnp/backend/extensions/lapack/orgqr_batch.cpp b/dpnp/backend/extensions/lapack/orgqr_batch.cpp index ef1c85b91f4a..a29fe9b342fc 100644 --- a/dpnp/backend/extensions/lapack/orgqr_batch.cpp +++ b/dpnp/backend/extensions/lapack/orgqr_batch.cpp @@ -100,15 +100,15 @@ static sycl::event orgqr_batch_impl(sycl::queue &exec_q, orgqr_batch_event = mkl_lapack::orgqr_batch( exec_q, - m, // The number of rows in each matrix in the batch; (0 ≤ m). - // It must be a non-negative integer. - n, // The number of columns in each matrix in the batch; (0 ≤ n). - // It must be a non-negative integer. - k, // The number of elementary reflectors - // whose product defines the matrices Qi; (0 ≤ k ≤ n). - a, // Pointer to the batch of matrices, each of size (m x n). - lda, // The leading dimension of each matrix in the batch. - // For row major layout, lda ≥ max(1, m). + m, // The number of rows in each matrix in the batch; (0 ≤ m). + // It must be a non-negative integer. + n, // The number of columns in each matrix in the batch; (0 ≤ n). + // It must be a non-negative integer. + k, // The number of elementary reflectors + // whose product defines the matrices Qi; (0 ≤ k ≤ n). + a, // Pointer to the batch of matrices, each of size (m x n). + lda, // The leading dimension of each matrix in the batch. + // For row major layout, lda ≥ max(1, m). stride_a, // Stride between consecutive matrices in the batch. tau, // Pointer to the array of scalar factors of the elementary // reflectors for each matrix in the batch. diff --git a/dpnp/backend/extensions/lapack/ungqr_batch.cpp b/dpnp/backend/extensions/lapack/ungqr_batch.cpp index 7c890d968b0a..04de27cb257c 100644 --- a/dpnp/backend/extensions/lapack/ungqr_batch.cpp +++ b/dpnp/backend/extensions/lapack/ungqr_batch.cpp @@ -100,15 +100,15 @@ static sycl::event ungqr_batch_impl(sycl::queue &exec_q, ungqr_batch_event = mkl_lapack::ungqr_batch( exec_q, - m, // The number of rows in each matrix in the batch; (0 ≤ m). - // It must be a non-negative integer. - n, // The number of columns in each matrix in the batch; (0 ≤ n). - // It must be a non-negative integer. - k, // The number of elementary reflectors - // whose product defines the matrices Qi; (0 ≤ k ≤ n). - a, // Pointer to the batch of matrices, each of size (m x n). - lda, // The leading dimension of each matrix in the batch. - // For row major layout, lda ≥ max(1, m). + m, // The number of rows in each matrix in the batch; (0 ≤ m). + // It must be a non-negative integer. + n, // The number of columns in each matrix in the batch; (0 ≤ n). + // It must be a non-negative integer. + k, // The number of elementary reflectors + // whose product defines the matrices Qi; (0 ≤ k ≤ n). + a, // Pointer to the batch of matrices, each of size (m x n). + lda, // The leading dimension of each matrix in the batch. + // For row major layout, lda ≥ max(1, m). stride_a, // Stride between consecutive matrices in the batch. tau, // Pointer to the array of scalar factors of the elementary // reflectors for each matrix in the batch. diff --git a/dpnp/backend/extensions/statistics/CMakeLists.txt b/dpnp/backend/extensions/statistics/CMakeLists.txt index 701a852c5903..d1d7e2e8cf66 100644 --- a/dpnp/backend/extensions/statistics/CMakeLists.txt +++ b/dpnp/backend/extensions/statistics/CMakeLists.txt @@ -99,7 +99,18 @@ else() ) endif() -target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) +# For coverage builds, use per_source instead of per_kernel to reduce memory +if(DPNP_GENERATE_COVERAGE) + target_link_options( + ${python_module_name} + PUBLIC -fsycl-device-code-split=per_source + ) +else() + target_link_options( + ${python_module_name} + PUBLIC -fsycl-device-code-split=per_kernel + ) +endif() if(DPNP_GENERATE_COVERAGE) target_link_options( diff --git a/dpnp/backend/extensions/statistics/bincount.cpp b/dpnp/backend/extensions/statistics/bincount.cpp index ba258cd55447..9bfe5c2a2449 100644 --- a/dpnp/backend/extensions/statistics/bincount.cpp +++ b/dpnp/backend/extensions/statistics/bincount.cpp @@ -59,10 +59,7 @@ struct BincountEdges { } - boundsT get_bounds() const - { - return {min, max}; - } + boundsT get_bounds() const { return {min, max}; } template size_t get_bin(const sycl::nd_item<_Dims> &, diff --git a/dpnp/backend/extensions/statistics/histogram_common.hpp b/dpnp/backend/extensions/statistics/histogram_common.hpp index 539b42475fbf..02fc66f26610 100644 --- a/dpnp/backend/extensions/statistics/histogram_common.hpp +++ b/dpnp/backend/extensions/statistics/histogram_common.hpp @@ -64,10 +64,7 @@ struct CachedData local_data = LocalData(shape, cgh); } - T *get_ptr() const - { - return &local_data[0]; - } + T *get_ptr() const { return &local_data[0]; } template void init(const sycl::nd_item<_Dims> &item) const @@ -83,15 +80,9 @@ struct CachedData } } - size_t size() const - { - return local_data.size(); - } + size_t size() const { return local_data.size(); } - T &operator[](const sycl::id &id) const - { - return local_data[id]; - } + T &operator[](const sycl::id &id) const { return local_data[id]; } template > T &operator[](const size_t id) const @@ -119,25 +110,16 @@ struct UncachedData _shape = shape; } - T *get_ptr() const - { - return global_data; - } + T *get_ptr() const { return global_data; } template void init(const sycl::nd_item<_Dims> &) const { } - size_t size() const - { - return _shape.size(); - } + size_t size() const { return _shape.size(); } - T &operator[](const sycl::id &id) const - { - return global_data[id]; - } + T &operator[](const sycl::id &id) const { return global_data[id]; } template > T &operator[](const size_t id) const @@ -235,10 +217,7 @@ struct HistWithLocalCopies } } - uint32_t size() const - { - return local_hist.size(); - } + uint32_t size() const { return local_hist.size(); } private: LocalHist local_hist; @@ -251,10 +230,7 @@ struct HistGlobalMemory static constexpr bool const sync_after_init = false; static constexpr bool const sync_before_finalize = false; - HistGlobalMemory(T *global_data) - { - global_hist = global_data; - } + HistGlobalMemory(T *global_data) { global_hist = global_data; } template void init(const sycl::nd_item<_Dims> &) const @@ -280,24 +256,15 @@ struct HistGlobalMemory template struct NoWeights { - constexpr T get(size_t) const - { - return 1; - } + constexpr T get(size_t) const { return 1; } }; template struct Weights { - Weights(T *weights) - { - data = weights; - } + Weights(T *weights) { data = weights; } - T get(size_t id) const - { - return data[id]; - } + T get(size_t id) const { return data[id]; } private: T *data = nullptr; diff --git a/dpnp/backend/extensions/statistics/histogramdd.cpp b/dpnp/backend/extensions/statistics/histogramdd.cpp index a5ed4a8c7d1c..bd2177073333 100644 --- a/dpnp/backend/extensions/statistics/histogramdd.cpp +++ b/dpnp/backend/extensions/statistics/histogramdd.cpp @@ -90,10 +90,7 @@ struct EdgesDd } } - boundsT get_bounds() const - { - return {&min[0], &max[0]}; - } + boundsT get_bounds() const { return {&min[0], &max[0]}; } auto get_bin_for_dim(const EdgesT &val, const EdgesT *edges_data, diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.hpp b/dpnp/backend/extensions/statistics/sliding_window1d.hpp index c5a5bac111dd..f33a23609666 100644 --- a/dpnp/backend/extensions/statistics/sliding_window1d.hpp +++ b/dpnp/backend/extensions/statistics/sliding_window1d.hpp @@ -129,30 +129,15 @@ class _RegistryDataStorage return sycl::shift_group_right(sbgroup, data[y], x); } - constexpr SizeT size_y() const - { - return _size; - } + constexpr SizeT size_y() const { return _size; } - SizeT size_x() const - { - return sbgroup.get_max_local_range()[0]; - } + SizeT size_x() const { return sbgroup.get_max_local_range()[0]; } - SizeT total_size() const - { - return size_x() * size_y(); - } + SizeT total_size() const { return size_x() * size_y(); } - ncT *ptr() - { - return data; - } + ncT *ptr() { return data; } - SizeT x() const - { - return sbgroup.get_local_linear_id(); - } + SizeT x() const { return sbgroup.get_local_linear_id(); } protected: const sycl::sub_group sbgroup; @@ -277,8 +262,7 @@ struct RegistryData : public _RegistryDataStorage T *load(const T *const data, const bool &mask, const T &default_v) { - return load( - data, [mask](auto &&) { return mask; }, default_v); + return load(data, [mask](auto &&) { return mask; }, default_v); } T *load(const T *const data) @@ -349,10 +333,7 @@ struct RegistryData : public _RegistryDataStorage return store(data, [mask](auto &&) { return mask; }); } - T *store(T *const data) - { - return store(data, true); - } + T *store(T *const data) { return store(data, true); } }; template @@ -379,10 +360,7 @@ struct RegistryWindow : public RegistryData } } - void advance_left(const T &fill_value) - { - advance_left(1, fill_value); - } + void advance_left(const T &fill_value) { advance_left(1, fill_value); } void advance_left() { @@ -400,25 +378,13 @@ class Span Span(T *const data, const SizeT size) : data_(data), size_(size) {} - T *begin() const - { - return data(); - } + T *begin() const { return data(); } - T *end() const - { - return data() + size(); - } + T *end() const { return data() + size(); } - SizeT size() const - { - return size_; - } + SizeT size() const { return size_; } - T *data() const - { - return data_; - } + T *data() const { return data_; } protected: T *const data_; @@ -443,15 +409,9 @@ class PaddedSpan : public Span { } - T *padded_begin() const - { - return this->begin() - pad(); - } + T *padded_begin() const { return this->begin() - pad(); } - SizeT pad() const - { - return pad_; - } + SizeT pad() const { return pad_; } protected: const SizeT pad_; diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt index 68e6bf29135d..782c4e2fa171 100644 --- a/dpnp/backend/extensions/ufunc/CMakeLists.txt +++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt @@ -126,7 +126,18 @@ else() ) endif() -target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) +# For coverage builds, use per_source instead of per_kernel to reduce memory +if(DPNP_GENERATE_COVERAGE) + target_link_options( + ${python_module_name} + PUBLIC -fsycl-device-code-split=per_source + ) +else() + target_link_options( + ${python_module_name} + PUBLIC -fsycl-device-code-split=per_kernel + ) +endif() if(DPNP_GENERATE_COVERAGE) target_link_options( diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp index fff0118d06aa..cae3d0402b9f 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp @@ -141,7 +141,8 @@ using ew_cmn_ns::unary_strided_impl_fn_ptr_t; fnT get() \ { \ if constexpr (std::is_same_v::value_type, \ - void>) { \ + void>) \ + { \ fnT fn = nullptr; \ return fn; \ } \ @@ -176,7 +177,8 @@ using ew_cmn_ns::unary_strided_impl_fn_ptr_t; fnT get() \ { \ if constexpr (std::is_same_v::value_type, \ - void>) { \ + void>) \ + { \ fnT fn = nullptr; \ return fn; \ } \ @@ -188,8 +190,7 @@ using ew_cmn_ns::unary_strided_impl_fn_ptr_t; }; template