| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #ifndef CLBLAST_CLBLAST_H_ |
| | #define CLBLAST_CLBLAST_H_ |
| |
|
| | #include <cstdlib> |
| | #include <string> |
| | #include <unordered_map> |
| |
|
| | |
| | #if defined(__APPLE__) || defined(__MACOSX) |
| | #include <OpenCL/opencl.h> |
| | #else |
| | #include <CL/opencl.h> |
| | #endif |
| |
|
| | |
| | |
| | #if defined(_WIN32) && defined(CLBLAST_DLL) |
| | #if defined(COMPILING_DLL) |
| | #define PUBLIC_API __declspec(dllexport) |
| | #else |
| | #define PUBLIC_API __declspec(dllimport) |
| | #endif |
| | #else |
| | #define PUBLIC_API |
| | #endif |
| |
|
| | |
| | #define CLBLAST_VERSION_MAJOR 1 |
| | #define CLBLAST_VERSION_MINOR 6 |
| | #define CLBLAST_VERSION_PATCH 0 |
| |
|
| | namespace clblast { |
| | |
| |
|
| | |
| | |
| | enum class StatusCode { |
| |
|
| | |
| | kSuccess = 0, |
| | kOpenCLCompilerNotAvailable= -3, |
| | kTempBufferAllocFailure = -4, |
| | kOpenCLOutOfResources = -5, |
| | kOpenCLOutOfHostMemory = -6, |
| | kOpenCLBuildProgramFailure = -11, |
| | kInvalidValue = -30, |
| | kInvalidCommandQueue = -36, |
| | kInvalidMemObject = -38, |
| | kInvalidBinary = -42, |
| | kInvalidBuildOptions = -43, |
| | kInvalidProgram = -44, |
| | kInvalidProgramExecutable = -45, |
| | kInvalidKernelName = -46, |
| | kInvalidKernelDefinition = -47, |
| | kInvalidKernel = -48, |
| | kInvalidArgIndex = -49, |
| | kInvalidArgValue = -50, |
| | kInvalidArgSize = -51, |
| | kInvalidKernelArgs = -52, |
| | kInvalidLocalNumDimensions = -53, |
| | kInvalidLocalThreadsTotal = -54, |
| | kInvalidLocalThreadsDim = -55, |
| | kInvalidGlobalOffset = -56, |
| | kInvalidEventWaitList = -57, |
| | kInvalidEvent = -58, |
| | kInvalidOperation = -59, |
| | kInvalidBufferSize = -61, |
| | kInvalidGlobalWorkSize = -63, |
| |
|
| | |
| | kNotImplemented = -1024, |
| | kInvalidMatrixA = -1022, |
| | kInvalidMatrixB = -1021, |
| | kInvalidMatrixC = -1020, |
| | kInvalidVectorX = -1019, |
| | kInvalidVectorY = -1018, |
| | kInvalidDimension = -1017, |
| | kInvalidLeadDimA = -1016, |
| | kInvalidLeadDimB = -1015, |
| | kInvalidLeadDimC = -1014, |
| | kInvalidIncrementX = -1013, |
| | kInvalidIncrementY = -1012, |
| | kInsufficientMemoryA = -1011, |
| | kInsufficientMemoryB = -1010, |
| | kInsufficientMemoryC = -1009, |
| | kInsufficientMemoryX = -1008, |
| | kInsufficientMemoryY = -1007, |
| |
|
| | |
| | kInsufficientMemoryTemp = -2050, |
| | kInvalidBatchCount = -2049, |
| | kInvalidOverrideKernel = -2048, |
| | kMissingOverrideParameter = -2047, |
| | kInvalidLocalMemUsage = -2046, |
| | kNoHalfPrecision = -2045, |
| | kNoDoublePrecision = -2044, |
| | kInvalidVectorScalar = -2043, |
| | kInsufficientMemoryScalar = -2042, |
| | kDatabaseError = -2041, |
| | kUnknownError = -2040, |
| | kUnexpectedError = -2039, |
| | }; |
| |
|
| | |
| | enum class Layout { kRowMajor = 101, kColMajor = 102 }; |
| | enum class Transpose { kNo = 111, kYes = 112, kConjugate = 113 }; |
| | enum class Triangle { kUpper = 121, kLower = 122 }; |
| | enum class Diagonal { kNonUnit = 131, kUnit = 132 }; |
| | enum class Side { kLeft = 141, kRight = 142 }; |
| | enum class KernelMode { kCrossCorrelation = 151, kConvolution = 152 }; |
| |
|
| | |
| | enum class Precision { kHalf = 16, kSingle = 32, kDouble = 64, |
| | kComplexSingle = 3232, kComplexDouble = 6464, kAny = -1 }; |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | template <typename T> |
| | StatusCode Rotg(cl_mem sa_buffer, const size_t sa_offset, |
| | cl_mem sb_buffer, const size_t sb_offset, |
| | cl_mem sc_buffer, const size_t sc_offset, |
| | cl_mem ss_buffer, const size_t ss_offset, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Rotmg(cl_mem sd1_buffer, const size_t sd1_offset, |
| | cl_mem sd2_buffer, const size_t sd2_offset, |
| | cl_mem sx1_buffer, const size_t sx1_offset, |
| | const cl_mem sy1_buffer, const size_t sy1_offset, |
| | cl_mem sparam_buffer, const size_t sparam_offset, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Rot(const size_t n, |
| | cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | const T cos, |
| | const T sin, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Rotm(const size_t n, |
| | cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_mem sparam_buffer, const size_t sparam_offset, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Swap(const size_t n, |
| | cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Scal(const size_t n, |
| | const T alpha, |
| | cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Copy(const size_t n, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Axpy(const size_t n, |
| | const T alpha, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Dot(const size_t n, |
| | cl_mem dot_buffer, const size_t dot_offset, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Dotu(const size_t n, |
| | cl_mem dot_buffer, const size_t dot_offset, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Dotc(const size_t n, |
| | cl_mem dot_buffer, const size_t dot_offset, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Nrm2(const size_t n, |
| | cl_mem nrm2_buffer, const size_t nrm2_offset, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Asum(const size_t n, |
| | cl_mem asum_buffer, const size_t asum_offset, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Sum(const size_t n, |
| | cl_mem sum_buffer, const size_t sum_offset, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Amax(const size_t n, |
| | cl_mem imax_buffer, const size_t imax_offset, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Amin(const size_t n, |
| | cl_mem imin_buffer, const size_t imin_offset, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Max(const size_t n, |
| | cl_mem imax_buffer, const size_t imax_offset, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Min(const size_t n, |
| | cl_mem imin_buffer, const size_t imin_offset, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | template <typename T> |
| | StatusCode Gemv(const Layout layout, const Transpose a_transpose, |
| | const size_t m, const size_t n, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const T beta, |
| | cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Gbmv(const Layout layout, const Transpose a_transpose, |
| | const size_t m, const size_t n, const size_t kl, const size_t ku, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const T beta, |
| | cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Hemv(const Layout layout, const Triangle triangle, |
| | const size_t n, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const T beta, |
| | cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Hbmv(const Layout layout, const Triangle triangle, |
| | const size_t n, const size_t k, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const T beta, |
| | cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Hpmv(const Layout layout, const Triangle triangle, |
| | const size_t n, |
| | const T alpha, |
| | const cl_mem ap_buffer, const size_t ap_offset, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const T beta, |
| | cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Symv(const Layout layout, const Triangle triangle, |
| | const size_t n, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const T beta, |
| | cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Sbmv(const Layout layout, const Triangle triangle, |
| | const size_t n, const size_t k, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const T beta, |
| | cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Spmv(const Layout layout, const Triangle triangle, |
| | const size_t n, |
| | const T alpha, |
| | const cl_mem ap_buffer, const size_t ap_offset, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const T beta, |
| | cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Trmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| | const size_t n, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| | const size_t n, const size_t k, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Tpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| | const size_t n, |
| | const cl_mem ap_buffer, const size_t ap_offset, |
| | cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Trsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| | const size_t n, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Tbsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| | const size_t n, const size_t k, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Tpsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| | const size_t n, |
| | const cl_mem ap_buffer, const size_t ap_offset, |
| | cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Ger(const Layout layout, |
| | const size_t m, const size_t n, |
| | const T alpha, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Geru(const Layout layout, |
| | const size_t m, const size_t n, |
| | const T alpha, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Gerc(const Layout layout, |
| | const size_t m, const size_t n, |
| | const T alpha, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Her(const Layout layout, const Triangle triangle, |
| | const size_t n, |
| | const T alpha, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Hpr(const Layout layout, const Triangle triangle, |
| | const size_t n, |
| | const T alpha, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_mem ap_buffer, const size_t ap_offset, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Her2(const Layout layout, const Triangle triangle, |
| | const size_t n, |
| | const T alpha, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Hpr2(const Layout layout, const Triangle triangle, |
| | const size_t n, |
| | const T alpha, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_mem ap_buffer, const size_t ap_offset, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Syr(const Layout layout, const Triangle triangle, |
| | const size_t n, |
| | const T alpha, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Spr(const Layout layout, const Triangle triangle, |
| | const size_t n, |
| | const T alpha, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | cl_mem ap_buffer, const size_t ap_offset, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Syr2(const Layout layout, const Triangle triangle, |
| | const size_t n, |
| | const T alpha, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Spr2(const Layout layout, const Triangle triangle, |
| | const size_t n, |
| | const T alpha, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | cl_mem ap_buffer, const size_t ap_offset, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | template <typename T> |
| | StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, |
| | const size_t m, const size_t n, const size_t k, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| | const T beta, |
| | cl_mem c_buffer, const size_t c_offset, const size_t c_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr, |
| | cl_mem temp_buffer = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Symm(const Layout layout, const Side side, const Triangle triangle, |
| | const size_t m, const size_t n, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| | const T beta, |
| | cl_mem c_buffer, const size_t c_offset, const size_t c_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle, |
| | const size_t m, const size_t n, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| | const T beta, |
| | cl_mem c_buffer, const size_t c_offset, const size_t c_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, |
| | const size_t n, const size_t k, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | const T beta, |
| | cl_mem c_buffer, const size_t c_offset, const size_t c_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_transpose, |
| | const size_t n, const size_t k, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | const T beta, |
| | cl_mem c_buffer, const size_t c_offset, const size_t c_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, |
| | const size_t n, const size_t k, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| | const T beta, |
| | cl_mem c_buffer, const size_t c_offset, const size_t c_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T, typename U> |
| | StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, |
| | const size_t n, const size_t k, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| | const U beta, |
| | cl_mem c_buffer, const size_t c_offset, const size_t c_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| | const size_t m, const size_t n, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, |
| | const size_t m, const size_t n, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | template <typename T> |
| | StatusCode Had(const size_t n, |
| | const T alpha, |
| | const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, |
| | const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, |
| | const T beta, |
| | cl_mem z_buffer, const size_t z_offset, const size_t z_inc, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Omatcopy(const Layout layout, const Transpose a_transpose, |
| | const size_t m, const size_t n, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, |
| | cl_mem b_buffer, const size_t b_offset, const size_t b_ld, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Im2col(const KernelMode kernel_mode, |
| | const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, |
| | const cl_mem im_buffer, const size_t im_offset, |
| | cl_mem col_buffer, const size_t col_offset, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Col2im(const KernelMode kernel_mode, |
| | const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, |
| | const cl_mem col_buffer, const size_t col_offset, |
| | cl_mem im_buffer, const size_t im_offset, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode Convgemm(const KernelMode kernel_mode, |
| | const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, const size_t num_kernels, const size_t batch_count, |
| | const cl_mem im_buffer, const size_t im_offset, |
| | const cl_mem kernel_buffer, const size_t kernel_offset, |
| | cl_mem result_buffer, const size_t result_offset, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode AxpyBatched(const size_t n, |
| | const T *alphas, |
| | const cl_mem x_buffer, const size_t *x_offsets, const size_t x_inc, |
| | cl_mem y_buffer, const size_t *y_offsets, const size_t y_inc, |
| | const size_t batch_count, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode GemmBatched(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, |
| | const size_t m, const size_t n, const size_t k, |
| | const T *alphas, |
| | const cl_mem a_buffer, const size_t *a_offsets, const size_t a_ld, |
| | const cl_mem b_buffer, const size_t *b_offsets, const size_t b_ld, |
| | const T *betas, |
| | cl_mem c_buffer, const size_t *c_offsets, const size_t c_ld, |
| | const size_t batch_count, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| | template <typename T> |
| | StatusCode GemmStridedBatched(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, |
| | const size_t m, const size_t n, const size_t k, |
| | const T alpha, |
| | const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const size_t a_stride, |
| | const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const size_t b_stride, |
| | const T beta, |
| | cl_mem c_buffer, const size_t c_offset, const size_t c_ld, const size_t c_stride, |
| | const size_t batch_count, |
| | cl_command_queue* queue, cl_event* event = nullptr); |
| |
|
| | |
| |
|
| | |
| | template <typename T> |
| | StatusCode GemmTempBufferSize(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, |
| | const size_t m, const size_t n, const size_t k, |
| | const size_t a_offset, const size_t a_ld, |
| | const size_t b_offset, const size_t b_ld, |
| | const size_t c_offset, const size_t c_ld, |
| | cl_command_queue* queue, size_t& temp_buffer_size); |
| |
|
| | |
| |
|
| | |
| | |
| | StatusCode PUBLIC_API ClearCache(); |
| |
|
| | |
| | |
| | StatusCode PUBLIC_API FillCache(const cl_device_id device); |
| |
|
| | |
| |
|
| | |
| | StatusCode PUBLIC_API RetrieveParameters(const cl_device_id device, const std::string &kernel_name, |
| | const Precision precision, |
| | std::unordered_map<std::string,size_t> ¶meters); |
| |
|
| | |
| | |
| | StatusCode PUBLIC_API OverrideParameters(const cl_device_id device, const std::string &kernel_name, |
| | const Precision precision, |
| | const std::unordered_map<std::string,size_t> ¶meters); |
| |
|
| | |
| |
|
| | |
| | template <typename T> |
| | StatusCode TuneXaxpy(cl_command_queue* queue, const size_t n, |
| | const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
| |
|
| | |
| | template <typename T> |
| | StatusCode TuneXdot(cl_command_queue* queue, const size_t n, |
| | const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
| |
|
| | |
| | template <typename T> |
| | StatusCode TuneXgemv(cl_command_queue* queue, const size_t m, const size_t n, |
| | const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
| |
|
| | |
| | template <typename T> |
| | StatusCode TuneXger(cl_command_queue* queue, const size_t m, const size_t n, |
| | const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
| |
|
| | |
| | template <typename T> |
| | StatusCode TuneXgemm(cl_command_queue* queue, const size_t m, const size_t n, const size_t k, |
| | const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
| |
|
| | |
| | template <typename T> |
| | StatusCode TuneXgemmDirect(cl_command_queue* queue, const size_t m, const size_t n, const size_t k, |
| | const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
| |
|
| | |
| | template <typename T> |
| | StatusCode TuneCopy(cl_command_queue* queue, const size_t m, const size_t n, |
| | const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
| |
|
| | |
| | template <typename T> |
| | StatusCode TunePad(cl_command_queue* queue, const size_t m, const size_t n, |
| | const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
| |
|
| | |
| | template <typename T> |
| | StatusCode TuneTranspose(cl_command_queue* queue, const size_t m, const size_t n, |
| | const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
| |
|
| | |
| | template <typename T> |
| | StatusCode TunePadtranspose(cl_command_queue* queue, const size_t m, const size_t n, |
| | const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
| |
|
| | |
| | template <typename T> |
| | StatusCode TuneInvert(cl_command_queue* queue, const size_t m, const size_t n, const size_t k, |
| | const double fraction, std::unordered_map<std::string,size_t> ¶meters); |
| |
|
| | |
| |
|
| | } |
| |
|
| | |
| | #endif |
| |
|