|  | CUTLASS
    CUDA Templates for Linear Algebra Subroutines and Solvers | 
| Namespaces | |
| detail | |
| kernel | |
| thread | |
| Classes | |
| struct | BlockForEach | 
| struct | Gemm | 
| struct | Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, AccumulatorType, arch::OpMultiplyAdd > | 
| Partial specialization for multiply-add.  More... | |
| struct | Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, AccumulatorType, arch::OpMultiplyAddSaturate > | 
| Partial specialization for multiply-add-saturate.  More... | |
| struct | Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, AccumulatorType, arch::OpXorPopc > | 
| Partial specialization for XOR-popc.  More... | |
| struct | TensorDiagonalForEach | 
| Launches a kernel calling a functor for each element along a tensor's diagonal.  More... | |
| struct | TensorForEach | 
| Launches a kernel calling a functor for each element in a tensor's index space.  More... | |
| Functions | |
| template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename AccumulatorType , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>> | |
| void | compute_gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, TensorRef< ElementB, LayoutB > tensor_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, TensorRef< ElementC, LayoutC > tensor_d, AccumulatorType initial_accum) | 
| template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename AccumulatorType , typename InnerProductOp = multiply_add<AccumulatorType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>> | |
| void | compute_gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, TensorRef< ElementB, LayoutB > tensor_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, AccumulatorType initial_accum) | 
| template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename InnerProductOp , typename ConvertOp > | |
| void | BatchedGemm (gemm::GemmCoord problem_size, int batch_count, ScalarType alpha, TensorRefCollectionA const &tensor_a, TensorRefCollectionB const &tensor_b, ScalarType beta, TensorRefCollectionC &tensor_c, AccumulatorType initial_accum) | 
| Computes a batch of GEMMs over a set of matrices of common dimension.  More... | |
| template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType > | |
| void | BatchedGemm (gemm::GemmCoord problem_size, int batch_count, ScalarType alpha, TensorRefCollectionA const &tensor_a, TensorRefCollectionB const &tensor_b, ScalarType beta, TensorRefCollectionC &tensor_c) | 
| template<typename Element > | |
| bool | BlockCompareEqual (Element const *ptr_A, Element const *ptr_B, size_t capacity, int grid_size=0, int block_size=0) | 
| Performs a bit-level equality check between two blocks.  More... | |
| template<typename Element > | |
| bool | BlockCompareRelativelyEqual (Element const *ptr_A, Element const *ptr_B, size_t capacity, Element epsilon, Element nonzero_floor, int grid_size=0, int block_size=0) | 
| Performs a bit-level equality check between two blocks.  More... | |
| template<typename Element , typename Layout > | |
| void | TensorFillRandomGaussian (TensorView< Element, Layout > view, uint64_t seed, Element mean=Element(0), Element stddev=Element(1), int bits=-1) | 
| Fills a tensor with random values with a Gaussian distribution.  More... | |
| template<typename Element > | |
| void | BlockFillRandomGaussian (Element *ptr, size_t capacity, uint64_t seed, Element mean=Element(0), Element stddev=Element(1), int bits=-1) | 
| Fills a tensor with random values with a Gaussian distribution.  More... | |
| template<typename Element , typename Layout > | |
| void | TensorFillRandomUniform (TensorView< Element, Layout > view, uint64_t seed, Element max=Element(1), Element min=Element(0), int bits=-1) | 
| Fills a tensor with random values with a uniform random distribution.  More... | |
| template<typename Element > | |
| void | BlockFillRandomUniform (Element *ptr, size_t capacity, uint64_t seed, Element max=Element(1), Element min=Element(0), int bits=-1) | 
| Fills a tensor with random values with a uniform random distribution.  More... | |
| template<typename Element , typename Layout > | |
| void | TensorFillDiagonal (TensorView< Element, Layout > view, Element diag=Element(1), Element other=Element(0)) | 
| Fills a tensor everywhere with a unique value for its diagonal.  More... | |
| template<typename Element , typename Layout > | |
| void | TensorFill (TensorView< Element, Layout > view, Element val=Element(0)) | 
| Fills a tensor with a uniform value.  More... | |
| template<typename Element , typename Layout > | |
| void | TensorFillIdentity (TensorView< Element, Layout > view) | 
| Fills a tensor's diagonal with 1 and 0 everywhere else.  More... | |
| template<typename Element , typename Layout > | |
| void | TensorUpdateDiagonal (TensorView< Element, Layout > view, Element diag=Element(1)) | 
| Writes a uniform value to the diagonal of a tensor without modifying off-diagonal elements.  More... | |
| template<typename Element , typename Layout > | |
| void | TensorUpdateOffDiagonal (TensorView< Element, Layout > view, Element other=Element(1)) | 
| Writes a uniform value to all elements in the tensor without modifying diagonal elements.  More... | |
| template<typename Element , typename Layout > | |
| void | TensorFillLinear (TensorView< Element, Layout > view, Array< Element, Layout::kRank > const &v, Element s=Element(0)) | 
| Fills tensor with a linear combination of its coordinate and another vector.  More... | |
| template<typename Element > | |
| void | BlockFillSequential (Element *ptr, int64_t capacity, Element v=Element(1), Element s=Element(0)) | 
| Fills a block of data with sequential elements.  More... | |
| template<typename Element > | |
| void | BlockFillRandom (Element *ptr, size_t capacity, uint64_t seed, Distribution dist) | 
| Fills a block of data with sequential elements.  More... | |
| template<typename Element , typename Layout > | |
| void | TensorCopyDiagonalIn (TensorView< Element, Layout > view, Element const *ptr) | 
| Copies a diagonal in from host memory without modifying off-diagonal elements.  More... | |
| template<typename Element , typename Layout > | |
| void | TensorCopyDiagonalOut (Element *ptr, TensorView< Element, Layout > view) | 
| Copies the diagonal of a tensor into a dense buffer in host memory.  More... | |
| void cutlass::reference::device::BatchedGemm | ( | gemm::GemmCoord | problem_size, | 
| int | batch_count, | ||
| ScalarType | alpha, | ||
| TensorRefCollectionA const & | tensor_a, | ||
| TensorRefCollectionB const & | tensor_b, | ||
| ScalarType | beta, | ||
| TensorRefCollectionC & | tensor_c, | ||
| AccumulatorType | initial_accum | ||
| ) | 
| void cutlass::reference::device::BatchedGemm | ( | gemm::GemmCoord | problem_size, | 
| int | batch_count, | ||
| ScalarType | alpha, | ||
| TensorRefCollectionA const & | tensor_a, | ||
| TensorRefCollectionB const & | tensor_b, | ||
| ScalarType | beta, | ||
| TensorRefCollectionC & | tensor_c | ||
| ) | 
Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.
| bool cutlass::reference::device::BlockCompareEqual | ( | Element const * | ptr_A, | 
| Element const * | ptr_B, | ||
| size_t | capacity, | ||
| int | grid_size = 0, | ||
| int | block_size = 0 | ||
| ) | 
| bool cutlass::reference::device::BlockCompareRelativelyEqual | ( | Element const * | ptr_A, | 
| Element const * | ptr_B, | ||
| size_t | capacity, | ||
| Element | epsilon, | ||
| Element | nonzero_floor, | ||
| int | grid_size = 0, | ||
| int | block_size = 0 | ||
| ) | 
| void cutlass::reference::device::BlockFillRandom | ( | Element * | ptr, | 
| size_t | capacity, | ||
| uint64_t | seed, | ||
| Distribution | dist | ||
| ) | 
| void cutlass::reference::device::BlockFillRandomGaussian | ( | Element * | ptr, | 
| size_t | capacity, | ||
| uint64_t | seed, | ||
| Element | mean = Element(0), | ||
| Element | stddev = Element(1), | ||
| int | bits = -1 | ||
| ) | 
< Element type
< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.
| seed | seed for RNG | 
| mean | Gaussian distribution's mean | 
| stddev | Gaussian distribution's standard deviation | 
| void cutlass::reference::device::BlockFillRandomUniform | ( | Element * | ptr, | 
| size_t | capacity, | ||
| uint64_t | seed, | ||
| Element | max = Element(1), | ||
| Element | min = Element(0), | ||
| int | bits = -1 | ||
| ) | 
< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.
| seed | seed for RNG | 
| max | upper bound of distribution | 
| min | lower bound for distribution | 
| void cutlass::reference::device::BlockFillSequential | ( | Element * | ptr, | 
| int64_t | capacity, | ||
| Element | v = Element(1), | ||
| Element | s = Element(0) | ||
| ) | 
| void cutlass::reference::device::compute_gemm | ( | gemm::GemmCoord | problem_size, | 
| ScalarType | alpha, | ||
| TensorRef< ElementA, LayoutA > | tensor_a, | ||
| TensorRef< ElementB, LayoutB > | tensor_b, | ||
| ScalarType | beta, | ||
| TensorRef< ElementC, LayoutC > | tensor_c, | ||
| TensorRef< ElementC, LayoutC > | tensor_d, | ||
| AccumulatorType | initial_accum | ||
| ) | 
Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.
Explicitly naming types needed by this template can be cumbersome, particularly for the accumulator type, so a function argument 'initial_accum' is exposed. Passing AccumulatorType(0) as the last function argument can be easier than naming all template arguments explicitly.
| void cutlass::reference::device::compute_gemm | ( | gemm::GemmCoord | problem_size, | 
| ScalarType | alpha, | ||
| TensorRef< ElementA, LayoutA > | tensor_a, | ||
| TensorRef< ElementB, LayoutB > | tensor_b, | ||
| ScalarType | beta, | ||
| TensorRef< ElementC, LayoutC > | tensor_c, | ||
| AccumulatorType | initial_accum | ||
| ) | 
Computes a general matrix product among matrices (tensors of rank=2) pointed to by TensorRef objects.
This assumes the accumulator type is the same type as the scalars.
| void cutlass::reference::device::TensorCopyDiagonalIn | ( | TensorView< Element, Layout > | view, | 
| Element const * | ptr | ||
| ) | 
< Layout function
< dense buffer of elements
| view | destination tensor | 
| void cutlass::reference::device::TensorCopyDiagonalOut | ( | Element * | ptr, | 
| TensorView< Element, Layout > | view | ||
| ) | 
< Layout function
< source tensor
| ptr | dense buffer of elements | 
| void cutlass::reference::device::TensorFill | ( | TensorView< Element, Layout > | view, | 
| Element | val = Element(0) | ||
| ) | 
< Layout function
< value to uniformly fill it with
| view | destination tensor | 
| void cutlass::reference::device::TensorFillDiagonal | ( | TensorView< Element, Layout > | view, | 
| Element | diag = Element(1), | ||
| Element | other = Element(0) | ||
| ) | 
< Layout function
< value to write off the diagonal
| view | destination tensor | 
| diag | value to write in the diagonal | 
| void cutlass::reference::device::TensorFillIdentity | ( | TensorView< Element, Layout > | view | ) | 
< Layout function
< destination tensor
| void cutlass::reference::device::TensorFillLinear | ( | TensorView< Element, Layout > | view, | 
| Array< Element, Layout::kRank > const & | v, | ||
| Element | s = Element(0) | ||
| ) | 
< Layout function
| view | destination tensor | 
| void cutlass::reference::device::TensorFillRandomGaussian | ( | TensorView< Element, Layout > | view, | 
| uint64_t | seed, | ||
| Element | mean = Element(0), | ||
| Element | stddev = Element(1), | ||
| int | bits = -1 | ||
| ) | 
< Layout function
< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.
| view | destination tensor | 
| seed | seed for RNG | 
| mean | Gaussian distribution's mean | 
| stddev | Gaussian distribution's standard deviation | 
| void cutlass::reference::device::TensorFillRandomUniform | ( | TensorView< Element, Layout > | view, | 
| uint64_t | seed, | ||
| Element | max = Element(1), | ||
| Element | min = Element(0), | ||
| int | bits = -1 | ||
| ) | 
< Layout function
< If non-negative, specifies number of fractional bits that are not truncated to zero. Permits reducing precision of data.
| view | destination tensor | 
| seed | seed for RNG | 
| max | upper bound of distribution | 
| min | lower bound for distribution | 
| void cutlass::reference::device::TensorUpdateDiagonal | ( | TensorView< Element, Layout > | view, | 
| Element | diag = Element(1) | ||
| ) | 
< Layout function
| view | destination tensor | 
| void cutlass::reference::device::TensorUpdateOffDiagonal | ( | TensorView< Element, Layout > | view, | 
| Element | other = Element(1) | ||
| ) | 
< Layout function
| view | destination tensor | 
 1.8.11
 1.8.11