/* ************************************************************************
 * Copyright (C) 2016-2025 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 * ************************************************************************ */

//! HIP = Heterogeneous-compute Interface for Portability
//!
//! Define a extremely thin runtime layer that allows source code to be compiled unmodified
//! through either AMD HCC or NVCC.   Key features tend to be in the spirit
//! and terminology of CUDA, but with a portable path to other accelerators as well.
//!
//!  This is the master include file for hipblas, wrapping around rocblas and cublas "version 2"

#ifndef HIPBLAS_H
#define HIPBLAS_H

#include "hipblas-export.h"
#include "hipblas-version.h"
#include <hip/hip_complex.h>
#include <hip/hip_runtime_api.h>
#include <hip/library_types.h>
#include <hipblas-common/hipblas-common.h>
#include <stddef.h>
#include <stdint.h>

#ifdef __HIP_PLATFORM_NVCC__
#include <cublas_v2.h>
#endif

/* Workaround clang bug:

   This macro expands to static if clang is used; otherwise it expands empty.
   It is intended to be used in variable template specializations, where clang
   requires static in order for the specializations to have internal linkage,
   while technically, storage class specifiers besides thread_local are not
   allowed in template specializations, and static in the primary template
   definition should imply internal linkage for all specializations.

   If clang shows an error for improperly using a storage class specifier in
   a specialization, then HIPBLAS_CLANG_STATIC should be redefined as empty,
   and perhaps removed entirely, if the above bug has been fixed.
*/
#if __clang__
#define HIPBLAS_CLANG_STATIC static
#else
#define HIPBLAS_CLANG_STATIC
#endif

#ifndef HIPBLAS_NO_DEPRECATED_WARNINGS
#ifndef HIPBLAS_DEPRECATED_MSG
#ifndef _MSC_VER
#define HIPBLAS_DEPRECATED_MSG(MSG) //__attribute__((deprecated(#MSG)))
#else
#define HIPBLAS_DEPRECATED_MSG(MSG) //__declspec(deprecated(#MSG))
#endif
#endif
#else
#ifndef HIPBLAS_DEPRECATED_MSG
#define HIPBLAS_DEPRECATED_MSG(MSG)
#endif
#endif

/*
 *  Naming conventions and Notations:
 *
 *  1. Please follow the naming convention
 *      Big case for matrix, e.g. matrix A, B, C   GEMM (C = A*B)
 *      Lower case for vector, e.g. vector x, y    GEMV (y = A*x)
 *
 *  2. Specify the supported precisions for both rocBLAS and cuBLAS backend for any new functions:
 *     For eg:
 *          - Supported precisions in rocBLAS : h,bf,s,d,c,z
 *          - Supported precisions in cuBLAS  : s,d,c,z
 *     Use the following notations,
 *     h  = half
 *     bf = 16 bit brian floating point
 *     s  = single
 *     d  = double
 *     c  = single complex
 *     z  = double complex
 */

/*! \brief ``hipblasHandle_t`` is a void pointer that stores the library context (either rocBLAS or cuBLAS).*/
typedef void* hipblasHandle_t;

/*! \brief To specify the datatype as an unsigned short */

#if __cplusplus < 201103L || !defined(HIPBLAS_USE_HIP_HALF)

typedef uint16_t hipblasHalf;

#else

#include <hip/hip_fp16.h>
typedef __half hipblasHalf;

#endif

/*! \brief  To specify the datatype as a signed char */
typedef int8_t hipblasInt8;

/*! \brief  Stride between matrices or vectors in strided_batched functions.*/
typedef int64_t hipblasStride;

/*! \brief  Struct to represent a 16-bit Brain floating-point number.*/
#if defined(HIPBLAS_USE_HIP_BFLOAT16)

// note that hip_bfloat16 isn't currently supported with cuda backend
#include <hip/hip_bfloat16.h>
typedef hip_bfloat16 hipblasBfloat16;

#elif __cplusplus < 201103L || !defined(HIPBLAS_BFLOAT16_CLASS)

// If this is a C or C++ compiler below C++11, or not requesting HIPBLAS_BFLOAT16_CLASS,
// we only include a minimal definition of hipblasBfloat16
typedef struct hipblasBfloat16
{
    uint16_t data;
} hipblasBfloat16;

#else

class hipblasBfloat16
{
public:
    uint16_t data;

    // zero extend lower 16 bits of bfloat16 to convert to IEEE float
    static float bfloat16_to_float(hipblasBfloat16 val)
    {
        union
        {
            uint32_t int32;
            float    fp32;
        } u = {uint32_t(val.data) << 16};
        return u.fp32;
    }

    static hipblasBfloat16 float_to_bfloat16(float f)
    {
        hipblasBfloat16 rv;
        union
        {
            float    fp32;
            uint32_t int32;
        } u = {f};
        if(~u.int32 & 0x7f800000)
        {
            u.int32 += 0x7fff + ((u.int32 >> 16) & 1); // Round to nearest, round to even
        }
        else if(u.int32 & 0xffff)
        {
            u.int32 |= 0x10000; // Preserve signaling NaN
        }
        rv.data = uint16_t(u.int32 >> 16);
        return rv;
    }

    hipblasBfloat16() = default;

    // round upper 16 bits of IEEE float to convert to bfloat16
    explicit hipblasBfloat16(float f)
        : data(float_to_bfloat16(f))
    {
    }

    // zero extend lower 16 bits of bfloat16 to convert to IEEE float
    operator float() const
    {
        union
        {
            uint32_t int32;
            float    fp32;
        } u = {uint32_t(data) << 16};
        return u.fp32;
    }

    explicit operator bool() const
    {
        return data & 0x7fff;
    }

    inline hipblasBfloat16& operator+=(const hipblasBfloat16& a)
    {
        *this = hipblasBfloat16(float(*this) + float(a));
        return *this;
    }
};

typedef struct
{
    uint16_t data;
} hipblasBfloat16_public;

static_assert(std::is_standard_layout<hipblasBfloat16>{},
              "hipblasBfloat16 is not a standard layout type, and thus is "
              "incompatible with C.");

static_assert(std::is_trivial<hipblasBfloat16>{},
              "hipblasBfloat16 is not a trivial type, and thus is "
              "incompatible with C.");

static_assert(sizeof(hipblasBfloat16) == sizeof(hipblasBfloat16_public)
                  && offsetof(hipblasBfloat16, data) == offsetof(hipblasBfloat16_public, data),
              "internal hipblasBfloat16 does not match public hipblasBfloat16_public");

#endif

/*! \brief Indicates whether scalar pointers are on the host or device. This is used for scalars alpha and beta and for scalar function return values. */
typedef enum
{
    HIPBLAS_POINTER_MODE_HOST, /**< Scalar values affected by this variable will be located on the host. */
    HIPBLAS_POINTER_MODE_DEVICE /**<  Scalar values affected by this variable will be located on the device. */
} hipblasPointerMode_t;

#ifndef HIPBLAS_FILL_MODE_DECLARED
#define HIPBLAS_FILL_MODE_DECLARED
/*! \brief Used by the Hermitian, symmetric, and triangular matrix routines to specify whether the upper or lower triangle is being referenced. */
typedef enum
{
    HIPBLAS_FILL_MODE_UPPER = 121, /**<  Upper triangle. */
    HIPBLAS_FILL_MODE_LOWER = 122, /**<  Lower triangle. */
    HIPBLAS_FILL_MODE_FULL  = 123
} hipblasFillMode_t;

#elif __cplusplus >= 201103L
static_assert(HIPBLAS_FILL_MODE_UPPER == 121,
              "Inconsistent declaration of HIPBLAS_FILL_MODE_UPPER");
static_assert(HIPBLAS_FILL_MODE_LOWER == 122,
              "Inconsistent declaration of HIPBLAS_FILL_MODE_LOWER");
static_assert(HIPBLAS_FILL_MODE_FULL == 123, "Inconsistent declaration of HIPBLAS_FILL_MODE_FULL");
#endif // HIPBLAS_FILL_MODE_DECLARED

#ifndef HIPBLAS_DIAG_TYPE_DECLARED
#define HIPBLAS_DIAG_TYPE_DECLARED
/*! \brief Used by the triangular matrix routines to specify whether the matrix is unit triangular.*/
typedef enum
{
    HIPBLAS_DIAG_NON_UNIT = 131, /**<  Non-unit triangular. */
    HIPBLAS_DIAG_UNIT     = 132 /**<  Unit triangular. */
} hipblasDiagType_t;

#elif __cplusplus >= 201103L
static_assert(HIPBLAS_DIAG_NON_UNIT == 131, "Inconsistent declaration of HIPBLAS_DIAG_NON_UNIT");
static_assert(HIPBLAS_DIAG_UNIT == 132, "Inconsistent declaration of HIPBLAS_DIAG_UNIT");
#endif // HIPBLAS_DIAG_TYPE_DECLARED

#ifndef HIPBLAS_SIDE_MODE_DECLARED
#define HIPBLAS_SIDE_MODE_DECLARED
/*! \brief Indicates the side matrix A is located on, relative to matrix B, during multiplication. */
typedef enum
{
    HIPBLAS_SIDE_LEFT
    = 141, /**< Multiply general matrix by symmetric, Hermitian, or triangular matrix on the left. */
    HIPBLAS_SIDE_RIGHT
    = 142, /**< Multiply general matrix by symmetric, Hermitian, or triangular matrix on the right. */
    HIPBLAS_SIDE_BOTH = 143
} hipblasSideMode_t;
#elif __cplusplus >= 201103L
static_assert(HIPBLAS_SIDE_LEFT == 141, "Inconsistent declaration of HIPBLAS_SIDE_LEFT");
static_assert(HIPBLAS_SIDE_RIGHT == 142, "Inconsistent declaration of HIPBLAS_SIDE_RIGHT");
static_assert(HIPBLAS_SIDE_BOTH == 143, "Inconsistent declaration of HIPBLAS_SIDE_BOTH");
#endif // HIPBLAS_SIDE_MODE_DECLARED

typedef enum
{
    HIPBLAS_DEFAULT_MATH,
    HIPBLAS_XF32_XDL_MATH, /* equivalent to rocblas_xf32_xdl_math_op, not supported in cuBLAS */
    HIPBLAS_PEDANTIC_MATH, /* equivalent to CUBLAS_PEDANTIC_MATH, not yet supported in rocBLAS */
    HIPBLAS_TF32_TENSOR_OP_MATH, /* use TF32 tensor cores with cuBLAS backend, not supported in rocBLAS */
    HIPBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION, /* see cuBLAS documentation, not supported in rocBLAS */
    HIPBLAS_TENSOR_OP_MATH /* DEPRECATED, use Tensor Core operations with cuBLAS backend */
} hipblasMath_t;

/*! \brief Indicates whether the layer is active through the use of a bitmask. */
typedef enum
{
    HIPBLAS_GEMM_DEFAULT = 160 /**<  enumerator ``rocblas_gemm_algo_standard`` */
} hipblasGemmAlgo_t;

/*! \brief Indicates whether atomics operations are allowed. Not allowing atomic operations can generally improve determinism and repeatability of results at a cost of performance.
 *         By default, the rocBLAS backend allows atomic operations, while the cuBLAS backend disallows atomic operations. See the backend documentation
 *         for more details. */
typedef enum
{
    HIPBLAS_ATOMICS_NOT_ALLOWED = 0, /**<  Algorithms will refrain from atomics where applicable. */
    HIPBLAS_ATOMICS_ALLOWED = 1 /**< Algorithms will take advantage of atomics where applicable. */
} hipblasAtomicsMode_t;

/*! \brief Control flags passed into gemm ex with flags algorithms. Only relevant with rocBLAS backend. See rocBLAS documentation
 *         for more information.*/
typedef enum
{
    HIPBLAS_GEMM_FLAGS_NONE = 0x0, /**< Default empty flags */
    HIPBLAS_GEMM_FLAGS_USE_CU_EFFICIENCY
    = 0x2, /**< enumerator rocblas_gemm_flags_use_cu_efficiency */
    HIPBLAS_GEMM_FLAGS_FP16_ALT_IMPL = 0x4, /**< enumerator rocblas_gemm_flags_fp16_alt_impl */
    HIPBLAS_GEMM_FLAGS_CHECK_SOLUTION_INDEX
    = 0x8, /**< enumerator rocblas_gemm_flags_check_solution_index */
    HIPBLAS_GEMM_FLAGS_FP16_ALT_IMPL_RNZ
    = 0x10 /**< enumerator rocblas_gemm_flags_fp16_alt_impl_rnz */
} hipblasGemmFlags_t;

#ifdef __cplusplus
extern "C" {
#endif

/*! \brief Create the hipBLAS handle. */
HIPBLAS_EXPORT hipblasStatus_t hipblasCreate(hipblasHandle_t* handle);

/*! \brief Destroys the library context created using hipblasCreate(). */
HIPBLAS_EXPORT hipblasStatus_t hipblasDestroy(hipblasHandle_t handle);

/*! \brief Sets the stream for the handle */
HIPBLAS_EXPORT hipblasStatus_t hipblasSetStream(hipblasHandle_t handle, hipStream_t streamId);

/*! \brief Gets stream[0] for the handle */
HIPBLAS_EXPORT hipblasStatus_t hipblasGetStream(hipblasHandle_t handle, hipStream_t* streamId);

/*! \brief Sets hipBLAS pointer mode */
HIPBLAS_EXPORT hipblasStatus_t hipblasSetPointerMode(hipblasHandle_t      handle,
                                                     hipblasPointerMode_t mode);
/*! \brief Gets hipBLAS pointer mode */
HIPBLAS_EXPORT hipblasStatus_t hipblasGetPointerMode(hipblasHandle_t       handle,
                                                     hipblasPointerMode_t* mode);

/*! \brief Set hipblas math mode */
HIPBLAS_EXPORT hipblasStatus_t hipblasSetMathMode(hipblasHandle_t handle, hipblasMath_t mode);

/*! \brief Get hipblas math mode */
HIPBLAS_EXPORT hipblasStatus_t hipblasGetMathMode(hipblasHandle_t handle, hipblasMath_t* mode);

/*! \brief Set hipblas workspace to user-owned device buffer */
HIPBLAS_EXPORT hipblasStatus_t hipblasSetWorkspace(hipblasHandle_t handle,
                                                   void*           workspace,
                                                   size_t          workspaceSizeInBytes);

/*! \brief Copy vector from host to device.
    @param[in]
    n           [int]
                number of elements in the vector.
    @param[in]
    elemSize    [int]
                Size of both vectors in bytes.
    @param[in]
    x           pointer to vector on the host.
    @param[in]
    incx        [int]
                specifies the increment for the elements of the vector.
    @param[out]
    y           pointer to vector on the device.
    @param[in]
    incy        [int]
                specifies the increment for the elements of the vector.
     ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t
    hipblasSetVector(int n, int elemSize, const void* x, int incx, void* y, int incy);

/*! \brief Copy vector from device to host.
    @param[in]
    n           [int]
                number of elements in the vector.
    @param[in]
    elemSize    [int]
                Size of both vectors in bytes.
    @param[in]
    x           pointer to vector on the device.
    @param[in]
    incx        [int]
                specifies the increment for the elements of the vector.
    @param[out]
    y           pointer to vector on the host.
    @param[in]
    incy        [int]
                specifies the increment for the elements of the vector.
     ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t
    hipblasGetVector(int n, int elemSize, const void* x, int incx, void* y, int incy);

/*! \brief Copy matrix from host to device.
    @param[in]
    rows        [int]
                number of rows in the matrix.
    @param[in]
    cols        [int]
                number of columns in the matrix.
    @param[in]
    elemSize   [int]
                number of bytes per element in the matrix.
    @param[in]
    AP          pointer to matrix on the host.
    @param[in]
    lda         [int]
                specifies the leading dimension of A. lda >= rows.
    @param[out]
    BP           pointer to matrix on the GPU.
    @param[in]
    ldb         [int]
                specifies the leading dimension of B. ldb >= rows.
     ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t
    hipblasSetMatrix(int rows, int cols, int elemSize, const void* AP, int lda, void* BP, int ldb);

/*! \brief Copy matrix from device to host.
    @param[in]
    rows        [int]
                number of rows in the matrix.
    @param[in]
    cols        [int]
                number of columns in the matrix.
    @param[in]
    elemSize   [int]
                number of bytes per element in the matrix.
    @param[in]
    AP          pointer to matrix on the GPU.
    @param[in]
    lda         [int]
                specifies the leading dimension of A. lda >= rows.
    @param[out]
    BP          pointer to matrix on the host.
    @param[in]
    ldb         [int]
                specifies the leading dimension of B. ldb >= rows.
     ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t
    hipblasGetMatrix(int rows, int cols, int elemSize, const void* AP, int lda, void* BP, int ldb);

/*! \brief Asynchronously copy vector from host to device.
    \details
    ``hipblasSetVectorAsync`` copies a vector from pinned host memory to device memory asynchronously.
    Memory on the host must be allocated with ``hipHostMalloc`` or the transfer will be synchronous.
    @param[in]
    n           [int]
                number of elements in the vector.
    @param[in]
    elemSize   [int]
                number of bytes per element in the matrix.
    @param[in]
    x           pointer to vector on the host.
    @param[in]
    incx        [int]
                specifies the increment for the elements of the vector.
    @param[out]
    y           pointer to vector on the device.
    @param[in]
    incy        [int]
                specifies the increment for the elements of the vector.
    @param[in]
    stream      specifies the stream into which this transfer request is queued.
     ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t hipblasSetVectorAsync(
    int n, int elemSize, const void* x, int incx, void* y, int incy, hipStream_t stream);

/*! \brief Asynchronously copy vector from device to host.
    \details
    ``hipblasGetVectorAsync`` copies a vector from pinned host memory to device memory asynchronously.
    Memory on the host must be allocated with ``hipHostMalloc`` or the transfer will be synchronous.
    @param[in]
    n           [int]
                number of elements in the vector.
    @param[in]
    elemSize   [int]
                number of bytes per element in the matrix.
    @param[in]
    x           pointer to vector on the device.
    @param[in]
    incx        [int]
                specifies the increment for the elements of the vector.
    @param[out]
    y           pointer to vector on the host.
    @param[in]
    incy        [int]
                specifies the increment for the elements of the vector.
    @param[in]
    stream      specifies the stream into which this transfer request is queued.
     ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t hipblasGetVectorAsync(
    int n, int elemSize, const void* x, int incx, void* y, int incy, hipStream_t stream);

/*! \brief Asynchronously copy matrix from host to device.
    \details
    ``hipblasSetMatrixAsync`` copies a matrix from pinned host memory to device memory asynchronously.
    Memory on the host must be allocated with ``hipHostMalloc`` or the transfer will be synchronous.
    @param[in]
    rows        [int]
                number of rows in matrices.
    @param[in]
    cols        [int]
                number of columns in matrices.
    @param[in]
    elemSize   [int]
                number of bytes per element in the matrix.
    @param[in]
    AP           pointer to matrix on the host.
    @param[in]
    lda         [int]
                specifies the leading dimension of A. lda >= rows.
    @param[out]
    BP           pointer to matrix on the GPU.
    @param[in]
    ldb         [int]
                specifies the leading dimension of B. ldb >= rows.
    @param[in]
    stream      specifies the stream into which this transfer request is queued.
     ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t hipblasSetMatrixAsync(int         rows,
                                                     int         cols,
                                                     int         elemSize,
                                                     const void* AP,
                                                     int         lda,
                                                     void*       BP,
                                                     int         ldb,
                                                     hipStream_t stream);
/*! \brief Asynchronously copy matrix from device to host.
    \details
    ``hipblasGetMatrixAsync`` copies a matrix from device memory to pinned host memory asynchronously.
    Memory on the host must be allocated with ``hipHostMalloc`` or the transfer will be synchronous.
    @param[in]
    rows        [int]
                number of rows in matrices.
    @param[in]
    cols        [int]
                number of columns in matrices.
    @param[in]
    elemSize   [int]
                number of bytes per element in the matrix.
    @param[in]
    AP          pointer to matrix on the GPU.
    @param[in]
    lda         [int]
                specifies the leading dimension of A. lda >= rows.
    @param[out]
    BP           pointer to matrix on the host.
    @param[in]
    ldb         [int]
                specifies the leading dimension of B. ldb >= rows.
    @param[in]
    stream      specifies the stream into which this transfer request is queued.
     ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t hipblasGetMatrixAsync(int         rows,
                                                     int         cols,
                                                     int         elemSize,
                                                     const void* AP,
                                                     int         lda,
                                                     void*       BP,
                                                     int         ldb,
                                                     hipStream_t stream);

/*! \brief Sets hipblasSetAtomicsMode*/
HIPBLAS_EXPORT hipblasStatus_t hipblasSetAtomicsMode(hipblasHandle_t      handle,
                                                     hipblasAtomicsMode_t atomics_mode);

/*! \brief Gets hipblasSetAtomicsMode*/
HIPBLAS_EXPORT hipblasStatus_t hipblasGetAtomicsMode(hipblasHandle_t       handle,
                                                     hipblasAtomicsMode_t* atomics_mode);

/*
 * ===========================================================================
 *    level 1 BLAS
 * ===========================================================================
 */

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The amax functions find the first index of the element of maximum magnitude of a vector ``x``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in x.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of y.
    @param[inout]
    result
              device pointer or host pointer to store the amax index.
              Return value is 0.0 if n, incx<=0.

    ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t
    hipblasIsamax(hipblasHandle_t handle, int n, const float* x, int incx, int* result);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasIdamax(hipblasHandle_t handle, int n, const double* x, int incx, int* result);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasIcamax(hipblasHandle_t handle, int n, const hipComplex* x, int incx, int* result);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasIzamax(hipblasHandle_t handle, int n, const hipDoubleComplex* x, int incx, int* result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasIsamax_64(
    hipblasHandle_t handle, int64_t n, const float* x, int64_t incx, int64_t* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIdamax_64(
    hipblasHandle_t handle, int64_t n, const double* x, int64_t incx, int64_t* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIcamax_64(
    hipblasHandle_t handle, int64_t n, const hipComplex* x, int64_t incx, int64_t* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIzamax_64(
    hipblasHandle_t handle, int64_t n, const hipDoubleComplex* x, int64_t incx, int64_t* result);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
     The amaxBatched functions find the first index of the element of maximum magnitude of each vector ``x_i`` in a batch, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              number of elements in each vector x_i.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i. incx must be > 0.
    @param[in]
    batchCount [int]
              number of instances in the batch. Must be > 0.
    @param[out]
    result
              device or host array of pointers of batchCount size for results.
              Return value is 0 if n, incx<=0.

    ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t hipblasIsamaxBatched(
    hipblasHandle_t handle, int n, const float* const x[], int incx, int batchCount, int* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIdamaxBatched(
    hipblasHandle_t handle, int n, const double* const x[], int incx, int batchCount, int* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIcamaxBatched(hipblasHandle_t         handle,
                                                    int                     n,
                                                    const hipComplex* const x[],
                                                    int                     incx,
                                                    int                     batchCount,
                                                    int*                    result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIzamaxBatched(hipblasHandle_t               handle,
                                                    int                           n,
                                                    const hipDoubleComplex* const x[],
                                                    int                           incx,
                                                    int                           batchCount,
                                                    int*                          result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasIsamaxBatched_64(hipblasHandle_t    handle,
                                                       int64_t            n,
                                                       const float* const x[],
                                                       int64_t            incx,
                                                       int64_t            batchCount,
                                                       int64_t*           result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIdamaxBatched_64(hipblasHandle_t     handle,
                                                       int64_t             n,
                                                       const double* const x[],
                                                       int64_t             incx,
                                                       int64_t             batchCount,
                                                       int64_t*            result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIcamaxBatched_64(hipblasHandle_t         handle,
                                                       int64_t                 n,
                                                       const hipComplex* const x[],
                                                       int64_t                 incx,
                                                       int64_t                 batchCount,
                                                       int64_t*                result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIzamaxBatched_64(hipblasHandle_t               handle,
                                                       int64_t                       n,
                                                       const hipDoubleComplex* const x[],
                                                       int64_t                       incx,
                                                       int64_t                       batchCount,
                                                       int64_t*                      result);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
     The amaxStridedBatched functions find the first index of the element of maximum magnitude of each vector ``x_i`` in a batch, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              number of elements in each vector x_i.
    @param[in]
    x         device pointer to the first vector x_1.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i. incx must be > 0.
    @param[in]
    stridex   [hipblasStride]
              specifies the pointer increment between one x_i and the next x_(i + 1).
    @param[in]
    batchCount [int]
              number of instances in the batch.
    @param[out]
    result
              device or host pointer for storing contiguous batchCount results.
              Return value is 0 if n <= 0, incx<=0.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasIsamaxStridedBatched(hipblasHandle_t handle,
                                                           int             n,
                                                           const float*    x,
                                                           int             incx,
                                                           hipblasStride   stridex,
                                                           int             batchCount,
                                                           int*            result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIdamaxStridedBatched(hipblasHandle_t handle,
                                                           int             n,
                                                           const double*   x,
                                                           int             incx,
                                                           hipblasStride   stridex,
                                                           int             batchCount,
                                                           int*            result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIcamaxStridedBatched(hipblasHandle_t   handle,
                                                           int               n,
                                                           const hipComplex* x,
                                                           int               incx,
                                                           hipblasStride     stridex,
                                                           int               batchCount,
                                                           int*              result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIzamaxStridedBatched(hipblasHandle_t         handle,
                                                           int                     n,
                                                           const hipDoubleComplex* x,
                                                           int                     incx,
                                                           hipblasStride           stridex,
                                                           int                     batchCount,
                                                           int*                    result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasIsamaxStridedBatched_64(hipblasHandle_t handle,
                                                              int64_t         n,
                                                              const float*    x,
                                                              int64_t         incx,
                                                              hipblasStride   stridex,
                                                              int64_t         batchCount,
                                                              int64_t*        result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIdamaxStridedBatched_64(hipblasHandle_t handle,
                                                              int64_t         n,
                                                              const double*   x,
                                                              int64_t         incx,
                                                              hipblasStride   stridex,
                                                              int64_t         batchCount,
                                                              int64_t*        result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIcamaxStridedBatched_64(hipblasHandle_t   handle,
                                                              int64_t           n,
                                                              const hipComplex* x,
                                                              int64_t           incx,
                                                              hipblasStride     stridex,
                                                              int64_t           batchCount,
                                                              int64_t*          result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIzamaxStridedBatched_64(hipblasHandle_t         handle,
                                                              int64_t                 n,
                                                              const hipDoubleComplex* x,
                                                              int64_t                 incx,
                                                              hipblasStride           stridex,
                                                              int64_t                 batchCount,
                                                              int64_t*                result);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The amin functions find the first index of the element of minimum magnitude of a vector ``x``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in x.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of y.
    @param[inout]
    result
              device pointer or host pointer to store the amin index.
              Return value is 0.0 if n, incx<=0.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t
    hipblasIsamin(hipblasHandle_t handle, int n, const float* x, int incx, int* result);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasIdamin(hipblasHandle_t handle, int n, const double* x, int incx, int* result);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasIcamin(hipblasHandle_t handle, int n, const hipComplex* x, int incx, int* result);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasIzamin(hipblasHandle_t handle, int n, const hipDoubleComplex* x, int incx, int* result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasIsamin_64(
    hipblasHandle_t handle, int64_t n, const float* x, int64_t incx, int64_t* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIdamin_64(
    hipblasHandle_t handle, int64_t n, const double* x, int64_t incx, int64_t* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIcamin_64(
    hipblasHandle_t handle, int64_t n, const hipComplex* x, int64_t incx, int64_t* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIzamin_64(
    hipblasHandle_t handle, int64_t n, const hipDoubleComplex* x, int64_t incx, int64_t* result);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The aminBatched functions find the first index of the element of minimum magnitude of each vector ``x_i`` in a batch, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              number of elements in each vector x_i.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i. incx must be > 0.
    @param[in]
    batchCount [int]
              number of instances in the batch. Must be > 0.
    @param[out]
    result
              device or host pointers to array of batchCount size for results.
              Return value is 0 if n, incx<=0.

    ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t hipblasIsaminBatched(
    hipblasHandle_t handle, int n, const float* const x[], int incx, int batchCount, int* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIdaminBatched(
    hipblasHandle_t handle, int n, const double* const x[], int incx, int batchCount, int* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIcaminBatched(hipblasHandle_t         handle,
                                                    int                     n,
                                                    const hipComplex* const x[],
                                                    int                     incx,
                                                    int                     batchCount,
                                                    int*                    result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIzaminBatched(hipblasHandle_t               handle,
                                                    int                           n,
                                                    const hipDoubleComplex* const x[],
                                                    int                           incx,
                                                    int                           batchCount,
                                                    int*                          result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasIsaminBatched_64(hipblasHandle_t    handle,
                                                       int64_t            n,
                                                       const float* const x[],
                                                       int64_t            incx,
                                                       int64_t            batchCount,
                                                       int64_t*           result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIdaminBatched_64(hipblasHandle_t     handle,
                                                       int64_t             n,
                                                       const double* const x[],
                                                       int64_t             incx,
                                                       int64_t             batchCount,
                                                       int64_t*            result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIcaminBatched_64(hipblasHandle_t         handle,
                                                       int64_t                 n,
                                                       const hipComplex* const x[],
                                                       int64_t                 incx,
                                                       int64_t                 batchCount,
                                                       int64_t*                result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIzaminBatched_64(hipblasHandle_t               handle,
                                                       int64_t                       n,
                                                       const hipDoubleComplex* const x[],
                                                       int64_t                       incx,
                                                       int64_t                       batchCount,
                                                       int64_t*                      result);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
     The aminStridedBatched functions find the first index of the element of minimum magnitude of each vector ``x_i`` in a batch, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              number of elements in each vector x_i.
    @param[in]
    x         device pointer to the first vector x_1.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i. incx must be > 0.
    @param[in]
    stridex   [hipblasStride]
              specifies the pointer increment between one x_i and the next x_(i + 1).
    @param[in]
    batchCount [int]
              number of instances in the batch.
    @param[out]
    result
              device or host pointer to array for storing contiguous batchCount results.
              Return value is 0 if n <= 0, incx<=0.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasIsaminStridedBatched(hipblasHandle_t handle,
                                                           int             n,
                                                           const float*    x,
                                                           int             incx,
                                                           hipblasStride   stridex,
                                                           int             batchCount,
                                                           int*            result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIdaminStridedBatched(hipblasHandle_t handle,
                                                           int             n,
                                                           const double*   x,
                                                           int             incx,
                                                           hipblasStride   stridex,
                                                           int             batchCount,
                                                           int*            result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIcaminStridedBatched(hipblasHandle_t   handle,
                                                           int               n,
                                                           const hipComplex* x,
                                                           int               incx,
                                                           hipblasStride     stridex,
                                                           int               batchCount,
                                                           int*              result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIzaminStridedBatched(hipblasHandle_t         handle,
                                                           int                     n,
                                                           const hipDoubleComplex* x,
                                                           int                     incx,
                                                           hipblasStride           stridex,
                                                           int                     batchCount,
                                                           int*                    result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasIsaminStridedBatched_64(hipblasHandle_t handle,
                                                              int64_t         n,
                                                              const float*    x,
                                                              int64_t         incx,
                                                              hipblasStride   stridex,
                                                              int64_t         batchCount,
                                                              int64_t*        result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIdaminStridedBatched_64(hipblasHandle_t handle,
                                                              int64_t         n,
                                                              const double*   x,
                                                              int64_t         incx,
                                                              hipblasStride   stridex,
                                                              int64_t         batchCount,
                                                              int64_t*        result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIcaminStridedBatched_64(hipblasHandle_t   handle,
                                                              int64_t           n,
                                                              const hipComplex* x,
                                                              int64_t           incx,
                                                              hipblasStride     stridex,
                                                              int64_t           batchCount,
                                                              int64_t*          result);

HIPBLAS_EXPORT hipblasStatus_t hipblasIzaminStridedBatched_64(hipblasHandle_t         handle,
                                                              int64_t                 n,
                                                              const hipDoubleComplex* x,
                                                              int64_t                 incx,
                                                              hipblasStride           stridex,
                                                              int64_t                 batchCount,
                                                              int64_t*                result);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The asum functions compute the sum of the magnitudes of elements of a real vector ``x``,
         or the sum of the magnitudes of the real and imaginary parts of elements if ``x`` is a complex vector.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in x and y.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x. incx must be > 0.
    @param[inout]
    result
              device pointer or host pointer to store the asum product.
              Return value is 0.0 if n <= 0.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t
    hipblasSasum(hipblasHandle_t handle, int n, const float* x, int incx, float* result);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasDasum(hipblasHandle_t handle, int n, const double* x, int incx, double* result);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasScasum(hipblasHandle_t handle, int n, const hipComplex* x, int incx, float* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDzasum(
    hipblasHandle_t handle, int n, const hipDoubleComplex* x, int incx, double* result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t
    hipblasSasum_64(hipblasHandle_t handle, int64_t n, const float* x, int64_t incx, float* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDasum_64(
    hipblasHandle_t handle, int64_t n, const double* x, int64_t incx, double* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasScasum_64(
    hipblasHandle_t handle, int64_t n, const hipComplex* x, int64_t incx, float* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDzasum_64(
    hipblasHandle_t handle, int64_t n, const hipDoubleComplex* x, int64_t incx, double* result);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The asumBatched functions computes the sum of the magnitudes of the elements in a batch of real vectors ``x_i``,
        or the sum of the magnitudes of the real and imaginary parts of elements if ``x_i`` is a complex
        vector, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              number of elements in each vector x_i.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i. incx must be > 0.
    @param[in]
    batchCount [int]
              number of instances in the batch.
    @param[out]
    result
              device array or host array of batchCount size for results.
              Return value is 0.0 if n, incx<=0.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSasumBatched(
    hipblasHandle_t handle, int n, const float* const x[], int incx, int batchCount, float* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDasumBatched(hipblasHandle_t     handle,
                                                   int                 n,
                                                   const double* const x[],
                                                   int                 incx,
                                                   int                 batchCount,
                                                   double*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasScasumBatched(hipblasHandle_t         handle,
                                                    int                     n,
                                                    const hipComplex* const x[],
                                                    int                     incx,
                                                    int                     batchCount,
                                                    float*                  result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDzasumBatched(hipblasHandle_t               handle,
                                                    int                           n,
                                                    const hipDoubleComplex* const x[],
                                                    int                           incx,
                                                    int                           batchCount,
                                                    double*                       result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSasumBatched_64(hipblasHandle_t    handle,
                                                      int64_t            n,
                                                      const float* const x[],
                                                      int64_t            incx,
                                                      int64_t            batchCount,
                                                      float*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDasumBatched_64(hipblasHandle_t     handle,
                                                      int64_t             n,
                                                      const double* const x[],
                                                      int64_t             incx,
                                                      int64_t             batchCount,
                                                      double*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasScasumBatched_64(hipblasHandle_t         handle,
                                                       int64_t                 n,
                                                       const hipComplex* const x[],
                                                       int64_t                 incx,
                                                       int64_t                 batchCount,
                                                       float*                  result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDzasumBatched_64(hipblasHandle_t               handle,
                                                       int64_t                       n,
                                                       const hipDoubleComplex* const x[],
                                                       int64_t                       incx,
                                                       int64_t                       batchCount,
                                                       double*                       result);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The asumStridedBatched functions compute the sum of the magnitudes of elements of real vectors ``x_i``,
        or the sum of the magnitudes of the real and imaginary parts of elements if ``x_i`` is a complex
        vector, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              number of elements in each vector x_i.
    @param[in]
    x         device pointer to the first vector x_1.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i. incx must be > 0.
    @param[in]
    stridex   [hipblasStride]
              stride from the start of one vector (x_i) to the next one (x_i+1).
              There are no restrictions placed on stride_x. However, the user should
              take care to ensure that stride_x is of an appropriate size. For a typical
              case, this means stride_x >= n * incx.
    @param[in]
    batchCount [int]
              number of instances in the batch.
    @param[out]
    result
              device pointer or host pointer to array for storing contiguous batchCount results.
              Return value is 0.0 if n, incx<=0.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSasumStridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          const float*    x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          int             batchCount,
                                                          float*          result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDasumStridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          const double*   x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          int             batchCount,
                                                          double*         result);

HIPBLAS_EXPORT hipblasStatus_t hipblasScasumStridedBatched(hipblasHandle_t   handle,
                                                           int               n,
                                                           const hipComplex* x,
                                                           int               incx,
                                                           hipblasStride     stridex,
                                                           int               batchCount,
                                                           float*            result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDzasumStridedBatched(hipblasHandle_t         handle,
                                                           int                     n,
                                                           const hipDoubleComplex* x,
                                                           int                     incx,
                                                           hipblasStride           stridex,
                                                           int                     batchCount,
                                                           double*                 result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSasumStridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             const float*    x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             int64_t         batchCount,
                                                             float*          result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDasumStridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             const double*   x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             int64_t         batchCount,
                                                             double*         result);

HIPBLAS_EXPORT hipblasStatus_t hipblasScasumStridedBatched_64(hipblasHandle_t   handle,
                                                              int64_t           n,
                                                              const hipComplex* x,
                                                              int64_t           incx,
                                                              hipblasStride     stridex,
                                                              int64_t           batchCount,
                                                              float*            result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDzasumStridedBatched_64(hipblasHandle_t         handle,
                                                              int64_t                 n,
                                                              const hipDoubleComplex* x,
                                                              int64_t                 incx,
                                                              hipblasStride           stridex,
                                                              int64_t                 batchCount,
                                                              double*                 result);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The axpy functions compute a constant ``alpha`` multiplied by vector ``x`` plus vector ``y``.

        y := alpha * x + y

    - Supported precisions in rocBLAS : ``h``, ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in x and y.
    @param[in]
    alpha     device pointer or host pointer to specify the scalar alpha.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[out]
    y         device pointer storing vector y.
    @param[inout]
    incy      [int]
              specifies the increment for the elements of y.

    ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t hipblasHaxpy(hipblasHandle_t    handle,
                                            int                n,
                                            const hipblasHalf* alpha,
                                            const hipblasHalf* x,
                                            int                incx,
                                            hipblasHalf*       y,
                                            int                incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasSaxpy(hipblasHandle_t handle,
                                            int             n,
                                            const float*    alpha,
                                            const float*    x,
                                            int             incx,
                                            float*          y,
                                            int             incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasDaxpy(hipblasHandle_t handle,
                                            int             n,
                                            const double*   alpha,
                                            const double*   x,
                                            int             incx,
                                            double*         y,
                                            int             incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasCaxpy(hipblasHandle_t   handle,
                                            int               n,
                                            const hipComplex* alpha,
                                            const hipComplex* x,
                                            int               incx,
                                            hipComplex*       y,
                                            int               incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZaxpy(hipblasHandle_t         handle,
                                            int                     n,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            hipDoubleComplex*       y,
                                            int                     incy);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasHaxpy_64(hipblasHandle_t    handle,
                                               int64_t            n,
                                               const hipblasHalf* alpha,
                                               const hipblasHalf* x,
                                               int64_t            incx,
                                               hipblasHalf*       y,
                                               int64_t            incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasSaxpy_64(hipblasHandle_t handle,
                                               int64_t         n,
                                               const float*    alpha,
                                               const float*    x,
                                               int64_t         incx,
                                               float*          y,
                                               int64_t         incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasDaxpy_64(hipblasHandle_t handle,
                                               int64_t         n,
                                               const double*   alpha,
                                               const double*   x,
                                               int64_t         incx,
                                               double*         y,
                                               int64_t         incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasCaxpy_64(hipblasHandle_t   handle,
                                               int64_t           n,
                                               const hipComplex* alpha,
                                               const hipComplex* x,
                                               int64_t           incx,
                                               hipComplex*       y,
                                               int64_t           incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZaxpy_64(hipblasHandle_t         handle,
                                               int64_t                 n,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               hipDoubleComplex*       y,
                                               int64_t                 incy);

//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The axpyBatched functions compute ``y := alpha * x + y`` over a set of batched vectors.

    - Supported precisions in rocBLAS : ``h``, ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in x and y.
    @param[in]
    alpha     specifies the scalar alpha.
    @param[in]
    x         pointer storing vector x on the GPU.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[out]
    y         pointer storing vector y on the GPU.
    @param[inout]
    incy      [int]
              specifies the increment for the elements of y.

    @param[in]
    batchCount [int]
              number of instances in the batch.

    ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t hipblasHaxpyBatched(hipblasHandle_t          handle,
                                                   int                      n,
                                                   const hipblasHalf*       alpha,
                                                   const hipblasHalf* const x[],
                                                   int                      incx,
                                                   hipblasHalf* const       y[],
                                                   int                      incy,
                                                   int                      batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasSaxpyBatched(hipblasHandle_t    handle,
                                                   int                n,
                                                   const float*       alpha,
                                                   const float* const x[],
                                                   int                incx,
                                                   float* const       y[],
                                                   int                incy,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDaxpyBatched(hipblasHandle_t     handle,
                                                   int                 n,
                                                   const double*       alpha,
                                                   const double* const x[],
                                                   int                 incx,
                                                   double* const       y[],
                                                   int                 incy,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCaxpyBatched(hipblasHandle_t         handle,
                                                   int                     n,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   hipComplex* const       y[],
                                                   int                     incy,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZaxpyBatched(hipblasHandle_t               handle,
                                                   int                           n,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   hipDoubleComplex* const       y[],
                                                   int                           incy,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasHaxpyBatched_64(hipblasHandle_t          handle,
                                                      int64_t                  n,
                                                      const hipblasHalf*       alpha,
                                                      const hipblasHalf* const x[],
                                                      int64_t                  incx,
                                                      hipblasHalf* const       y[],
                                                      int64_t                  incy,
                                                      int64_t                  batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasSaxpyBatched_64(hipblasHandle_t    handle,
                                                      int64_t            n,
                                                      const float*       alpha,
                                                      const float* const x[],
                                                      int64_t            incx,
                                                      float* const       y[],
                                                      int64_t            incy,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDaxpyBatched_64(hipblasHandle_t     handle,
                                                      int64_t             n,
                                                      const double*       alpha,
                                                      const double* const x[],
                                                      int64_t             incx,
                                                      double* const       y[],
                                                      int64_t             incy,
                                                      int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCaxpyBatched_64(hipblasHandle_t         handle,
                                                      int64_t                 n,
                                                      const hipComplex*       alpha,
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      hipComplex* const       y[],
                                                      int64_t                 incy,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZaxpyBatched_64(hipblasHandle_t               handle,
                                                      int64_t                       n,
                                                      const hipDoubleComplex*       alpha,
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      hipDoubleComplex* const       y[],
                                                      int64_t                       incy,
                                                      int64_t                       batchCount);

//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The axpyStridedBatched  functions compute ``y := alpha * x + y`` over a set of strided batched vectors.

    - Supported precisions in rocBLAS : ``h``, ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
    @param[in]
    alpha     specifies the scalar alpha.
    @param[in]
    x         pointer storing vector x on the GPU.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    stridex   [hipblasStride]
              specifies the increment between vectors of x.
    @param[out]
    y         pointer storing vector y on the GPU.
    @param[inout]
    incy      [int]
              specifies the increment for the elements of y.
    @param[in]
    stridey   [hipblasStride]
              specifies the increment between vectors of y.

    @param[in]
    batchCount [int]
              number of instances in the batch.

            ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t hipblasHaxpyStridedBatched(hipblasHandle_t    handle,
                                                          int                n,
                                                          const hipblasHalf* alpha,
                                                          const hipblasHalf* x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          hipblasHalf*       y,
                                                          int                incy,
                                                          hipblasStride      stridey,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasSaxpyStridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          const float*    alpha,
                                                          const float*    x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          float*          y,
                                                          int             incy,
                                                          hipblasStride   stridey,
                                                          int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDaxpyStridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          const double*   alpha,
                                                          const double*   x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          double*         y,
                                                          int             incy,
                                                          hipblasStride   stridey,
                                                          int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCaxpyStridedBatched(hipblasHandle_t   handle,
                                                          int               n,
                                                          const hipComplex* alpha,
                                                          const hipComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          hipComplex*       y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZaxpyStridedBatched(hipblasHandle_t         handle,
                                                          int                     n,
                                                          const hipDoubleComplex* alpha,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          hipDoubleComplex*       y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasHaxpyStridedBatched_64(hipblasHandle_t    handle,
                                                             int64_t            n,
                                                             const hipblasHalf* alpha,
                                                             const hipblasHalf* x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             hipblasHalf*       y,
                                                             int64_t            incy,
                                                             hipblasStride      stridey,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasSaxpyStridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             const float*    alpha,
                                                             const float*    x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             float*          y,
                                                             int64_t         incy,
                                                             hipblasStride   stridey,
                                                             int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDaxpyStridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             const double*   alpha,
                                                             const double*   x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             double*         y,
                                                             int64_t         incy,
                                                             hipblasStride   stridey,
                                                             int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCaxpyStridedBatched_64(hipblasHandle_t   handle,
                                                             int64_t           n,
                                                             const hipComplex* alpha,
                                                             const hipComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             hipComplex*       y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZaxpyStridedBatched_64(hipblasHandle_t         handle,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* alpha,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             hipDoubleComplex*       y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             int64_t                 batchCount);

//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The copy functions copy each element ``x[i]`` into ``y[i]``, for  ``i`` = 1 , ... , ``n``.

        y := x,

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in x to be copied to y.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[out]
    y         device pointer storing vector y.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t
    hipblasScopy(hipblasHandle_t handle, int n, const float* x, int incx, float* y, int incy);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasDcopy(hipblasHandle_t handle, int n, const double* x, int incx, double* y, int incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasCcopy(
    hipblasHandle_t handle, int n, const hipComplex* x, int incx, hipComplex* y, int incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZcopy(hipblasHandle_t         handle,
                                            int                     n,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            hipDoubleComplex*       y,
                                            int                     incy);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasScopy_64(
    hipblasHandle_t handle, int64_t n, const float* x, int64_t incx, float* y, int64_t incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasDcopy_64(
    hipblasHandle_t handle, int64_t n, const double* x, int64_t incx, double* y, int64_t incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasCcopy_64(hipblasHandle_t   handle,
                                               int64_t           n,
                                               const hipComplex* x,
                                               int64_t           incx,
                                               hipComplex*       y,
                                               int64_t           incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZcopy_64(hipblasHandle_t         handle,
                                               int64_t                 n,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               hipDoubleComplex*       y,
                                               int64_t                 incy);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The copyBatched functions copy each element ``x_i[j]`` into ``y_i[j]``, for  ``j`` = 1 , ... , ``n``; ``i`` = 1 , ... , ``batchCount``.

        y_i := x_i,

    where (``x_i``, ``y_i``) is the ``i``-th instance of the batch.
    ``x_i`` and ``y_i`` are vectors.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in each x_i to be copied to y_i.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each vector x_i.
    @param[out]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each vector y_i.
    @param[in]
    batchCount [int]
                number of instances in the batch.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasScopyBatched(hipblasHandle_t    handle,
                                                   int                n,
                                                   const float* const x[],
                                                   int                incx,
                                                   float* const       y[],
                                                   int                incy,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDcopyBatched(hipblasHandle_t     handle,
                                                   int                 n,
                                                   const double* const x[],
                                                   int                 incx,
                                                   double* const       y[],
                                                   int                 incy,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCcopyBatched(hipblasHandle_t         handle,
                                                   int                     n,
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   hipComplex* const       y[],
                                                   int                     incy,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZcopyBatched(hipblasHandle_t               handle,
                                                   int                           n,
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   hipDoubleComplex* const       y[],
                                                   int                           incy,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasScopyBatched_64(hipblasHandle_t    handle,
                                                      int64_t            n,
                                                      const float* const x[],
                                                      int64_t            incx,
                                                      float* const       y[],
                                                      int64_t            incy,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDcopyBatched_64(hipblasHandle_t     handle,
                                                      int64_t             n,
                                                      const double* const x[],
                                                      int64_t             incx,
                                                      double* const       y[],
                                                      int64_t             incy,
                                                      int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCcopyBatched_64(hipblasHandle_t         handle,
                                                      int64_t                 n,
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      hipComplex* const       y[],
                                                      int64_t                 incy,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZcopyBatched_64(hipblasHandle_t               handle,
                                                      int64_t                       n,
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      hipDoubleComplex* const       y[],
                                                      int64_t                       incy,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief  <b> BLAS Level 1 API </b>

    \details
    The copyStridedBatched functions copy each element ``x_i[j]`` into ``y_i[j]``, for  ``j`` = 1 , ... , ``n``; ``i`` = 1 , ... , ``batchCount``.

        y_i := x_i,

    where (``x_i``, ``y_i``) is the ``i``-th instance of the batch.
    ``x_i`` and ``y_i`` are vectors.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in each x_i to be copied to y_i.
    @param[in]
    x         device pointer to the first vector (x_1) in the batch.
    @param[in]
    incx      [int]
              specifies the increments for the elements of vectors x_i.
    @param[in]
    stridex     [hipblasStride]
                stride from the start of one vector (x_i) to the next one (x_i+1).
                There are no restrictions placed on stridex. However, the user should
                ensure that stridex is of an appropriate size. For a typical
                case, this means stridex >= n * incx.
    @param[out]
    y         device pointer to the first vector (y_1) in the batch.
    @param[in]
    incy      [int]
              specifies the increment for the elements of vectors y_i.
    @param[in]
    stridey     [hipblasStride]
                stride from the start of one vector (y_i) to the next one (y_i+1).
                There are no restrictions placed on stridey. However, the user should
                ensure that stridey is of an appropriate size. For a typical
                case this means stridey >= n * incy. stridey should be non zero.
    @param[in]
    batchCount [int]
                number of instances in the batch.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasScopyStridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          const float*    x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          float*          y,
                                                          int             incy,
                                                          hipblasStride   stridey,
                                                          int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDcopyStridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          const double*   x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          double*         y,
                                                          int             incy,
                                                          hipblasStride   stridey,
                                                          int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCcopyStridedBatched(hipblasHandle_t   handle,
                                                          int               n,
                                                          const hipComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          hipComplex*       y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZcopyStridedBatched(hipblasHandle_t         handle,
                                                          int                     n,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          hipDoubleComplex*       y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasScopyStridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             const float*    x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             float*          y,
                                                             int64_t         incy,
                                                             hipblasStride   stridey,
                                                             int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDcopyStridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             const double*   x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             double*         y,
                                                             int64_t         incy,
                                                             hipblasStride   stridey,
                                                             int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCcopyStridedBatched_64(hipblasHandle_t   handle,
                                                             int64_t           n,
                                                             const hipComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             hipComplex*       y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZcopyStridedBatched_64(hipblasHandle_t         handle,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             hipDoubleComplex*       y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The dot(u) functions performs the dot product of vectors ``x`` and ``y``.

        result = x * y;

    The dotc functions performs the dot product of the conjugate of complex vector ``x`` and complex vector ``y``.

        result = conjugate (x) * y;

    - Supported precisions in rocBLAS : ``h``, ``bf``, ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in x and y.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of y.
    @param[in]
    y         device pointer storing vector y.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.
    @param[inout]
    result
              device pointer or host pointer to store the dot product.
              Return value is 0.0 if n <= 0.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasHdot(hipblasHandle_t    handle,
                                           int                n,
                                           const hipblasHalf* x,
                                           int                incx,
                                           const hipblasHalf* y,
                                           int                incy,
                                           hipblasHalf*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasBfdot(hipblasHandle_t        handle,
                                            int                    n,
                                            const hipblasBfloat16* x,
                                            int                    incx,
                                            const hipblasBfloat16* y,
                                            int                    incy,
                                            hipblasBfloat16*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasSdot(hipblasHandle_t handle,
                                           int             n,
                                           const float*    x,
                                           int             incx,
                                           const float*    y,
                                           int             incy,
                                           float*          result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDdot(hipblasHandle_t handle,
                                           int             n,
                                           const double*   x,
                                           int             incx,
                                           const double*   y,
                                           int             incy,
                                           double*         result);

HIPBLAS_EXPORT hipblasStatus_t hipblasCdotc(hipblasHandle_t   handle,
                                            int               n,
                                            const hipComplex* x,
                                            int               incx,
                                            const hipComplex* y,
                                            int               incy,
                                            hipComplex*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasCdotu(hipblasHandle_t   handle,
                                            int               n,
                                            const hipComplex* x,
                                            int               incx,
                                            const hipComplex* y,
                                            int               incy,
                                            hipComplex*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdotc(hipblasHandle_t         handle,
                                            int                     n,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            const hipDoubleComplex* y,
                                            int                     incy,
                                            hipDoubleComplex*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdotu(hipblasHandle_t         handle,
                                            int                     n,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            const hipDoubleComplex* y,
                                            int                     incy,
                                            hipDoubleComplex*       result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasHdot_64(hipblasHandle_t    handle,
                                              int64_t            n,
                                              const hipblasHalf* x,
                                              int64_t            incx,
                                              const hipblasHalf* y,
                                              int64_t            incy,
                                              hipblasHalf*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasBfdot_64(hipblasHandle_t        handle,
                                               int64_t                n,
                                               const hipblasBfloat16* x,
                                               int64_t                incx,
                                               const hipblasBfloat16* y,
                                               int64_t                incy,
                                               hipblasBfloat16*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasSdot_64(hipblasHandle_t handle,
                                              int64_t         n,
                                              const float*    x,
                                              int64_t         incx,
                                              const float*    y,
                                              int64_t         incy,
                                              float*          result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDdot_64(hipblasHandle_t handle,
                                              int64_t         n,
                                              const double*   x,
                                              int64_t         incx,
                                              const double*   y,
                                              int64_t         incy,
                                              double*         result);

HIPBLAS_EXPORT hipblasStatus_t hipblasCdotc_64(hipblasHandle_t   handle,
                                               int64_t           n,
                                               const hipComplex* x,
                                               int64_t           incx,
                                               const hipComplex* y,
                                               int64_t           incy,
                                               hipComplex*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasCdotu_64(hipblasHandle_t   handle,
                                               int64_t           n,
                                               const hipComplex* x,
                                               int64_t           incx,
                                               const hipComplex* y,
                                               int64_t           incy,
                                               hipComplex*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdotc_64(hipblasHandle_t         handle,
                                               int64_t                 n,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               const hipDoubleComplex* y,
                                               int64_t                 incy,
                                               hipDoubleComplex*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdotu_64(hipblasHandle_t         handle,
                                               int64_t                 n,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               const hipDoubleComplex* y,
                                               int64_t                 incy,
                                               hipDoubleComplex*       result);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The dot(u)Batched functions perform a batch of dot products of vectors ``x`` and ``y``.

        result_i = x_i * y_i;

    The dotcBatched functions performs a batch of dot products of the conjugate of complex vector ``x`` and complex vector ``y``.

        result_i = conjugate (x_i) * y_i;

    where (``x_i``, ``y_i``) is the ``i``-th instance of the batch and
    ``x_i`` and ``y_i`` are vectors, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``h``, ``bf``, ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in each x_i and y_i.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each y_i.
    @param[in]
    batchCount [int]
                number of instances in the batch.
    @param[inout]
    result
              device array or host array of batchCount size to store the dot products of each batch.
              Returns 0.0 for each element if n <= 0.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasHdotBatched(hipblasHandle_t          handle,
                                                  int                      n,
                                                  const hipblasHalf* const x[],
                                                  int                      incx,
                                                  const hipblasHalf* const y[],
                                                  int                      incy,
                                                  int                      batchCount,
                                                  hipblasHalf*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasBfdotBatched(hipblasHandle_t              handle,
                                                   int                          n,
                                                   const hipblasBfloat16* const x[],
                                                   int                          incx,
                                                   const hipblasBfloat16* const y[],
                                                   int                          incy,
                                                   int                          batchCount,
                                                   hipblasBfloat16*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasSdotBatched(hipblasHandle_t    handle,
                                                  int                n,
                                                  const float* const x[],
                                                  int                incx,
                                                  const float* const y[],
                                                  int                incy,
                                                  int                batchCount,
                                                  float*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDdotBatched(hipblasHandle_t     handle,
                                                  int                 n,
                                                  const double* const x[],
                                                  int                 incx,
                                                  const double* const y[],
                                                  int                 incy,
                                                  int                 batchCount,
                                                  double*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasCdotcBatched(hipblasHandle_t         handle,
                                                   int                     n,
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   const hipComplex* const y[],
                                                   int                     incy,
                                                   int                     batchCount,
                                                   hipComplex*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasCdotuBatched(hipblasHandle_t         handle,
                                                   int                     n,
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   const hipComplex* const y[],
                                                   int                     incy,
                                                   int                     batchCount,
                                                   hipComplex*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdotcBatched(hipblasHandle_t               handle,
                                                   int                           n,
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   const hipDoubleComplex* const y[],
                                                   int                           incy,
                                                   int                           batchCount,
                                                   hipDoubleComplex*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdotuBatched(hipblasHandle_t               handle,
                                                   int                           n,
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   const hipDoubleComplex* const y[],
                                                   int                           incy,
                                                   int                           batchCount,
                                                   hipDoubleComplex*             result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasHdotBatched_64(hipblasHandle_t          handle,
                                                     int64_t                  n,
                                                     const hipblasHalf* const x[],
                                                     int64_t                  incx,
                                                     const hipblasHalf* const y[],
                                                     int64_t                  incy,
                                                     int64_t                  batchCount,
                                                     hipblasHalf*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasBfdotBatched_64(hipblasHandle_t              handle,
                                                      int64_t                      n,
                                                      const hipblasBfloat16* const x[],
                                                      int64_t                      incx,
                                                      const hipblasBfloat16* const y[],
                                                      int64_t                      incy,
                                                      int64_t                      batchCount,
                                                      hipblasBfloat16*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasSdotBatched_64(hipblasHandle_t    handle,
                                                     int64_t            n,
                                                     const float* const x[],
                                                     int64_t            incx,
                                                     const float* const y[],
                                                     int64_t            incy,
                                                     int64_t            batchCount,
                                                     float*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDdotBatched_64(hipblasHandle_t     handle,
                                                     int64_t             n,
                                                     const double* const x[],
                                                     int64_t             incx,
                                                     const double* const y[],
                                                     int64_t             incy,
                                                     int64_t             batchCount,
                                                     double*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasCdotcBatched_64(hipblasHandle_t         handle,
                                                      int64_t                 n,
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      const hipComplex* const y[],
                                                      int64_t                 incy,
                                                      int64_t                 batchCount,
                                                      hipComplex*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasCdotuBatched_64(hipblasHandle_t         handle,
                                                      int64_t                 n,
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      const hipComplex* const y[],
                                                      int64_t                 incy,
                                                      int64_t                 batchCount,
                                                      hipComplex*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdotcBatched_64(hipblasHandle_t               handle,
                                                      int64_t                       n,
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      const hipDoubleComplex* const y[],
                                                      int64_t                       incy,
                                                      int64_t                       batchCount,
                                                      hipDoubleComplex*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdotuBatched_64(hipblasHandle_t               handle,
                                                      int64_t                       n,
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      const hipDoubleComplex* const y[],
                                                      int64_t                       incy,
                                                      int64_t                       batchCount,
                                                      hipDoubleComplex*             result);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The dot(u)StridedBatched functions perform a batch of dot products of vectors ``x`` and ``y``.

        result_i = x_i * y_i;

    The dotcStridedBatched functions perform a batch of dot products of the conjugate of complex vector ``x`` and complex vector ``y``.

        result_i = conjugate (x_i) * y_i;

    where (``x_i``, ``y_i``) is the ``i``-th instance of the batch and
    ``x_i`` and ``y_i`` are vectors, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``h``, ``bf``, ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in each x_i and y_i.
    @param[in]
    x         device pointer to the first vector (x_1) in the batch.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex     [hipblasStride]
                stride from the start of one vector (x_i) to the next one (x_i+1).
    @param[in]
    y         device pointer to the first vector (y_1) in the batch.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each y_i.
    @param[in]
    stridey     [hipblasStride]
                stride from the start of one vector (y_i) to the next one (y_i+1).
    @param[in]
    batchCount [int]
                number of instances in the batch.
    @param[inout]
    result
              device array or host array of batchCount size to store the dot products of each batch.
              Returns 0.0 for each element if n <= 0.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasHdotStridedBatched(hipblasHandle_t    handle,
                                                         int                n,
                                                         const hipblasHalf* x,
                                                         int                incx,
                                                         hipblasStride      stridex,
                                                         const hipblasHalf* y,
                                                         int                incy,
                                                         hipblasStride      stridey,
                                                         int                batchCount,
                                                         hipblasHalf*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasBfdotStridedBatched(hipblasHandle_t        handle,
                                                          int                    n,
                                                          const hipblasBfloat16* x,
                                                          int                    incx,
                                                          hipblasStride          stridex,
                                                          const hipblasBfloat16* y,
                                                          int                    incy,
                                                          hipblasStride          stridey,
                                                          int                    batchCount,
                                                          hipblasBfloat16*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasSdotStridedBatched(hipblasHandle_t handle,
                                                         int             n,
                                                         const float*    x,
                                                         int             incx,
                                                         hipblasStride   stridex,
                                                         const float*    y,
                                                         int             incy,
                                                         hipblasStride   stridey,
                                                         int             batchCount,
                                                         float*          result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDdotStridedBatched(hipblasHandle_t handle,
                                                         int             n,
                                                         const double*   x,
                                                         int             incx,
                                                         hipblasStride   stridex,
                                                         const double*   y,
                                                         int             incy,
                                                         hipblasStride   stridey,
                                                         int             batchCount,
                                                         double*         result);

HIPBLAS_EXPORT hipblasStatus_t hipblasCdotcStridedBatched(hipblasHandle_t   handle,
                                                          int               n,
                                                          const hipComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const hipComplex* y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount,
                                                          hipComplex*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasCdotuStridedBatched(hipblasHandle_t   handle,
                                                          int               n,
                                                          const hipComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const hipComplex* y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount,
                                                          hipComplex*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdotcStridedBatched(hipblasHandle_t         handle,
                                                          int                     n,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          const hipDoubleComplex* y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          int                     batchCount,
                                                          hipDoubleComplex*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdotuStridedBatched(hipblasHandle_t         handle,
                                                          int                     n,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          const hipDoubleComplex* y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          int                     batchCount,
                                                          hipDoubleComplex*       result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasHdotStridedBatched_64(hipblasHandle_t    handle,
                                                            int64_t            n,
                                                            const hipblasHalf* x,
                                                            int64_t            incx,
                                                            hipblasStride      stridex,
                                                            const hipblasHalf* y,
                                                            int64_t            incy,
                                                            hipblasStride      stridey,
                                                            int64_t            batchCount,
                                                            hipblasHalf*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasBfdotStridedBatched_64(hipblasHandle_t        handle,
                                                             int64_t                n,
                                                             const hipblasBfloat16* x,
                                                             int64_t                incx,
                                                             hipblasStride          stridex,
                                                             const hipblasBfloat16* y,
                                                             int64_t                incy,
                                                             hipblasStride          stridey,
                                                             int64_t                batchCount,
                                                             hipblasBfloat16*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasSdotStridedBatched_64(hipblasHandle_t handle,
                                                            int64_t         n,
                                                            const float*    x,
                                                            int64_t         incx,
                                                            hipblasStride   stridex,
                                                            const float*    y,
                                                            int64_t         incy,
                                                            hipblasStride   stridey,
                                                            int64_t         batchCount,
                                                            float*          result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDdotStridedBatched_64(hipblasHandle_t handle,
                                                            int64_t         n,
                                                            const double*   x,
                                                            int64_t         incx,
                                                            hipblasStride   stridex,
                                                            const double*   y,
                                                            int64_t         incy,
                                                            hipblasStride   stridey,
                                                            int64_t         batchCount,
                                                            double*         result);

HIPBLAS_EXPORT hipblasStatus_t hipblasCdotcStridedBatched_64(hipblasHandle_t   handle,
                                                             int64_t           n,
                                                             const hipComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const hipComplex* y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount,
                                                             hipComplex*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasCdotuStridedBatched_64(hipblasHandle_t   handle,
                                                             int64_t           n,
                                                             const hipComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const hipComplex* y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount,
                                                             hipComplex*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdotcStridedBatched_64(hipblasHandle_t         handle,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             const hipDoubleComplex* y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             int64_t                 batchCount,
                                                             hipDoubleComplex*       result);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdotuStridedBatched_64(hipblasHandle_t         handle,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             const hipDoubleComplex* y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             int64_t                 batchCount,
                                                             hipDoubleComplex*       result);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The nrm2 functions compute the Euclidean norm of a real or complex vector.

              result := sqrt( x'*x ) for real vectors
              result := sqrt( x**H*x ) for complex vectors

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, ``z``, ``sc``, and ``dz``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``sc``, and ``dz``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in x.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of y.
    @param[inout]
    result
              device pointer or host pointer to store the nrm2 product.
              Return value is 0.0 if n, incx<=0.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t
    hipblasSnrm2(hipblasHandle_t handle, int n, const float* x, int incx, float* result);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasDnrm2(hipblasHandle_t handle, int n, const double* x, int incx, double* result);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasScnrm2(hipblasHandle_t handle, int n, const hipComplex* x, int incx, float* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDznrm2(
    hipblasHandle_t handle, int n, const hipDoubleComplex* x, int incx, double* result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t
    hipblasSnrm2_64(hipblasHandle_t handle, int64_t n, const float* x, int64_t incx, float* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDnrm2_64(
    hipblasHandle_t handle, int64_t n, const double* x, int64_t incx, double* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasScnrm2_64(
    hipblasHandle_t handle, int64_t n, const hipComplex* x, int64_t incx, float* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDznrm2_64(
    hipblasHandle_t handle, int64_t n, const hipDoubleComplex* x, int64_t incx, double* result);

//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The nrm2Batched functions compute the Euclidean norm over a batch of real or complex vectors.

              result := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batchCount
              result := sqrt( x_i**H*x_i ) for complex vectors x, for i = 1, ..., batchCount

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, ``z``, ``sc``, and ``dz``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              number of elements in each x_i.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i. incx must be > 0.
    @param[in]
    batchCount [int]
              number of instances in the batch.
    @param[out]
    result
              device pointer or host pointer to array of batchCount size for nrm2 results.
              Return value is 0.0 for each element if n <= 0, incx<=0.

            ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t hipblasSnrm2Batched(
    hipblasHandle_t handle, int n, const float* const x[], int incx, int batchCount, float* result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDnrm2Batched(hipblasHandle_t     handle,
                                                   int                 n,
                                                   const double* const x[],
                                                   int                 incx,
                                                   int                 batchCount,
                                                   double*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasScnrm2Batched(hipblasHandle_t         handle,
                                                    int                     n,
                                                    const hipComplex* const x[],
                                                    int                     incx,
                                                    int                     batchCount,
                                                    float*                  result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDznrm2Batched(hipblasHandle_t               handle,
                                                    int                           n,
                                                    const hipDoubleComplex* const x[],
                                                    int                           incx,
                                                    int                           batchCount,
                                                    double*                       result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSnrm2Batched_64(hipblasHandle_t    handle,
                                                      int64_t            n,
                                                      const float* const x[],
                                                      int64_t            incx,
                                                      int64_t            batchCount,
                                                      float*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDnrm2Batched_64(hipblasHandle_t     handle,
                                                      int64_t             n,
                                                      const double* const x[],
                                                      int64_t             incx,
                                                      int64_t             batchCount,
                                                      double*             result);

HIPBLAS_EXPORT hipblasStatus_t hipblasScnrm2Batched_64(hipblasHandle_t         handle,
                                                       int64_t                 n,
                                                       const hipComplex* const x[],
                                                       int64_t                 incx,
                                                       int64_t                 batchCount,
                                                       float*                  result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDznrm2Batched_64(hipblasHandle_t               handle,
                                                       int64_t                       n,
                                                       const hipDoubleComplex* const x[],
                                                       int64_t                       incx,
                                                       int64_t                       batchCount,
                                                       double*                       result);

//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The nrm2StridedBatched functions compute the Euclidean norm over a batch of real or complex vectors.

              := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batchCount
              := sqrt( x_i**H*x_i ) for complex vectors, for i = 1, ..., batchCount

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, ``z``, ``sc``, and ``dz``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              number of elements in each x_i.
    @param[in]
    x         device pointer to the first vector x_1.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i. incx must be > 0.
    @param[in]
    stridex   [hipblasStride]
              stride from the start of one vector (x_i) to the next one (x_i+1).
              There are no restrictions placed on stridex. However, the user should
              ensure that stridex is of an appropriate size. For a typical
              case, this means stridex >= n * incx.
    @param[in]
    batchCount [int]
              number of instances in the batch.
    @param[out]
    result
              device pointer or host pointer to array for storing contiguous batchCount results.
              Return value is 0.0 for each element if n <= 0, incx<=0.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSnrm2StridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          const float*    x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          int             batchCount,
                                                          float*          result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDnrm2StridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          const double*   x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          int             batchCount,
                                                          double*         result);

HIPBLAS_EXPORT hipblasStatus_t hipblasScnrm2StridedBatched(hipblasHandle_t   handle,
                                                           int               n,
                                                           const hipComplex* x,
                                                           int               incx,
                                                           hipblasStride     stridex,
                                                           int               batchCount,
                                                           float*            result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDznrm2StridedBatched(hipblasHandle_t         handle,
                                                           int                     n,
                                                           const hipDoubleComplex* x,
                                                           int                     incx,
                                                           hipblasStride           stridex,
                                                           int                     batchCount,
                                                           double*                 result);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSnrm2StridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             const float*    x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             int64_t         batchCount,
                                                             float*          result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDnrm2StridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             const double*   x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             int64_t         batchCount,
                                                             double*         result);

HIPBLAS_EXPORT hipblasStatus_t hipblasScnrm2StridedBatched_64(hipblasHandle_t   handle,
                                                              int64_t           n,
                                                              const hipComplex* x,
                                                              int64_t           incx,
                                                              hipblasStride     stridex,
                                                              int64_t           batchCount,
                                                              float*            result);

HIPBLAS_EXPORT hipblasStatus_t hipblasDznrm2StridedBatched_64(hipblasHandle_t         handle,
                                                              int64_t                 n,
                                                              const hipDoubleComplex* x,
                                                              int64_t                 incx,
                                                              hipblasStride           stridex,
                                                              int64_t                 batchCount,
                                                              double*                 result);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The rot functions apply the Givens rotation matrix defined by ``c=cos(alpha)`` and ``s=sin(alpha)`` to vectors ``x`` and ``y``.
        Scalars ``c`` and ``s`` can be stored in either host or device memory. The location is specified by calling ``hipblasSetPointerMode``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, ``z``, ``sc``, and ``dz``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, ``z``, ``cs``, and ``zd``.

    @param[in]
    handle  [hipblasHandle_t]
            handle to the hipBLAS library context queue.
    @param[in]
    n       [int]
            number of elements in the x and y vectors.
    @param[inout]
    x       device pointer storing vector x.
    @param[in]
    incx    [int]
            specifies the increment between elements of x.
    @param[inout]
    y       device pointer storing vector y.
    @param[in]
    incy    [int]
            specifies the increment between elements of y.
    @param[in]
    c       device pointer or host pointer storing the scalar cosine component of the rotation matrix.
    @param[in]
    s       device pointer or host pointer storing the scalar sine component of the rotation matrix.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSrot(hipblasHandle_t handle,
                                           int             n,
                                           float*          x,
                                           int             incx,
                                           float*          y,
                                           int             incy,
                                           const float*    c,
                                           const float*    s);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrot(hipblasHandle_t handle,
                                           int             n,
                                           double*         x,
                                           int             incx,
                                           double*         y,
                                           int             incy,
                                           const double*   c,
                                           const double*   s);

HIPBLAS_EXPORT hipblasStatus_t hipblasCrot(hipblasHandle_t   handle,
                                           int               n,
                                           hipComplex*       x,
                                           int               incx,
                                           hipComplex*       y,
                                           int               incy,
                                           const float*      c,
                                           const hipComplex* s);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsrot(hipblasHandle_t handle,
                                            int             n,
                                            hipComplex*     x,
                                            int             incx,
                                            hipComplex*     y,
                                            int             incy,
                                            const float*    c,
                                            const float*    s);

HIPBLAS_EXPORT hipblasStatus_t hipblasZrot(hipblasHandle_t         handle,
                                           int                     n,
                                           hipDoubleComplex*       x,
                                           int                     incx,
                                           hipDoubleComplex*       y,
                                           int                     incy,
                                           const double*           c,
                                           const hipDoubleComplex* s);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdrot(hipblasHandle_t   handle,
                                            int               n,
                                            hipDoubleComplex* x,
                                            int               incx,
                                            hipDoubleComplex* y,
                                            int               incy,
                                            const double*     c,
                                            const double*     s);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSrot_64(hipblasHandle_t handle,
                                              int64_t         n,
                                              float*          x,
                                              int64_t         incx,
                                              float*          y,
                                              int64_t         incy,
                                              const float*    c,
                                              const float*    s);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrot_64(hipblasHandle_t handle,
                                              int64_t         n,
                                              double*         x,
                                              int64_t         incx,
                                              double*         y,
                                              int64_t         incy,
                                              const double*   c,
                                              const double*   s);

HIPBLAS_EXPORT hipblasStatus_t hipblasCrot_64(hipblasHandle_t   handle,
                                              int64_t           n,
                                              hipComplex*       x,
                                              int64_t           incx,
                                              hipComplex*       y,
                                              int64_t           incy,
                                              const float*      c,
                                              const hipComplex* s);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsrot_64(hipblasHandle_t handle,
                                               int64_t         n,
                                               hipComplex*     x,
                                               int64_t         incx,
                                               hipComplex*     y,
                                               int64_t         incy,
                                               const float*    c,
                                               const float*    s);

HIPBLAS_EXPORT hipblasStatus_t hipblasZrot_64(hipblasHandle_t         handle,
                                              int64_t                 n,
                                              hipDoubleComplex*       x,
                                              int64_t                 incx,
                                              hipDoubleComplex*       y,
                                              int64_t                 incy,
                                              const double*           c,
                                              const hipDoubleComplex* s);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdrot_64(hipblasHandle_t   handle,
                                               int64_t           n,
                                               hipDoubleComplex* x,
                                               int64_t           incx,
                                               hipDoubleComplex* y,
                                               int64_t           incy,
                                               const double*     c,
                                               const double*     s);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The rotBatched functions apply the Givens rotation matrix defined by ``c=cos(alpha)`` and ``s=sin(alpha)`` to batched vectors ``x_i`` and ``y_i``, for ``i`` = 1, ..., ``batchCount``.
        Scalars ``c`` and ``s`` can be stored in either host or device memory. The location is specified by calling ``hipblasSetPointerMode``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``sc``, and ``dz``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle  [hipblasHandle_t]
            handle to the hipBLAS library context queue.
    @param[in]
    n       [int]
            number of elements in each x_i and y_i vectors.
    @param[inout]
    x       device array of device pointers storing each vector x_i.
    @param[in]
    incx    [int]
            specifies the increment between elements of each x_i.
    @param[inout]
    y       device array of device pointers storing each vector y_i.
    @param[in]
    incy    [int]
            specifies the increment between elements of each y_i.
    @param[in]
    c       device pointer or host pointer to the scalar cosine component of the rotation matrix.
    @param[in]
    s       device pointer or host pointer to the scalar sine component of the rotation matrix.
    @param[in]
    batchCount [int]
                the number of x and y arrays, that is, the number of batches.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSrotBatched(hipblasHandle_t handle,
                                                  int             n,
                                                  float* const    x[],
                                                  int             incx,
                                                  float* const    y[],
                                                  int             incy,
                                                  const float*    c,
                                                  const float*    s,
                                                  int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotBatched(hipblasHandle_t handle,
                                                  int             n,
                                                  double* const   x[],
                                                  int             incx,
                                                  double* const   y[],
                                                  int             incy,
                                                  const double*   c,
                                                  const double*   s,
                                                  int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCrotBatched(hipblasHandle_t   handle,
                                                  int               n,
                                                  hipComplex* const x[],
                                                  int               incx,
                                                  hipComplex* const y[],
                                                  int               incy,
                                                  const float*      c,
                                                  const hipComplex* s,
                                                  int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsrotBatched(hipblasHandle_t   handle,
                                                   int               n,
                                                   hipComplex* const x[],
                                                   int               incx,
                                                   hipComplex* const y[],
                                                   int               incy,
                                                   const float*      c,
                                                   const float*      s,
                                                   int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZrotBatched(hipblasHandle_t         handle,
                                                  int                     n,
                                                  hipDoubleComplex* const x[],
                                                  int                     incx,
                                                  hipDoubleComplex* const y[],
                                                  int                     incy,
                                                  const double*           c,
                                                  const hipDoubleComplex* s,
                                                  int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdrotBatched(hipblasHandle_t         handle,
                                                   int                     n,
                                                   hipDoubleComplex* const x[],
                                                   int                     incx,
                                                   hipDoubleComplex* const y[],
                                                   int                     incy,
                                                   const double*           c,
                                                   const double*           s,
                                                   int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSrotBatched_64(hipblasHandle_t handle,
                                                     int64_t         n,
                                                     float* const    x[],
                                                     int64_t         incx,
                                                     float* const    y[],
                                                     int64_t         incy,
                                                     const float*    c,
                                                     const float*    s,
                                                     int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotBatched_64(hipblasHandle_t handle,
                                                     int64_t         n,
                                                     double* const   x[],
                                                     int64_t         incx,
                                                     double* const   y[],
                                                     int64_t         incy,
                                                     const double*   c,
                                                     const double*   s,
                                                     int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCrotBatched_64(hipblasHandle_t   handle,
                                                     int64_t           n,
                                                     hipComplex* const x[],
                                                     int64_t           incx,
                                                     hipComplex* const y[],
                                                     int64_t           incy,
                                                     const float*      c,
                                                     const hipComplex* s,
                                                     int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsrotBatched_64(hipblasHandle_t   handle,
                                                      int64_t           n,
                                                      hipComplex* const x[],
                                                      int64_t           incx,
                                                      hipComplex* const y[],
                                                      int64_t           incy,
                                                      const float*      c,
                                                      const float*      s,
                                                      int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZrotBatched_64(hipblasHandle_t         handle,
                                                     int64_t                 n,
                                                     hipDoubleComplex* const x[],
                                                     int64_t                 incx,
                                                     hipDoubleComplex* const y[],
                                                     int64_t                 incy,
                                                     const double*           c,
                                                     const hipDoubleComplex* s,
                                                     int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdrotBatched_64(hipblasHandle_t         handle,
                                                      int64_t                 n,
                                                      hipDoubleComplex* const x[],
                                                      int64_t                 incx,
                                                      hipDoubleComplex* const y[],
                                                      int64_t                 incy,
                                                      const double*           c,
                                                      const double*           s,
                                                      int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The rotStridedBatched functions apply the Givens rotation matrix defined by ``c=cos(alpha)`` and ``s=sin(alpha)`` to strided batched vectors ``x_i`` and ``y_i``, for ``i`` = 1, ..., ``batchCount``.
        Scalars ``c`` and ``s`` can be stored in either host or device memory. The location is specified by calling ``hipblasSetPointerMode``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``sc``, and ``dz``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle  [hipblasHandle_t]
            handle to the hipBLAS library context queue.
    @param[in]
    n       [int]
            number of elements in each x_i and y_i vectors.
    @param[inout]
    x       device pointer to the first vector x_1.
    @param[in]
    incx    [int]
            specifies the increment between elements of each x_i.
    @param[in]
    stridex [hipblasStride]
             specifies the increment from the beginning of x_i to the beginning of x_(i+1).
    @param[inout]
    y       device pointer to the first vector y_1.
    @param[in]
    incy    [int]
            specifies the increment between elements of each y_i.
    @param[in]
    stridey  [hipblasStride]
             specifies the increment from the beginning of y_i to the beginning of y_(i+1).
    @param[in]
    c       device pointer or host pointer to the scalar cosine component of the rotation matrix.
    @param[in]
    s       device pointer or host pointer to the scalar sine component of the rotation matrix.
    @param[in]
    batchCount [int]
            the number of x and y arrays, that is, the number of batches.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSrotStridedBatched(hipblasHandle_t handle,
                                                         int             n,
                                                         float*          x,
                                                         int             incx,
                                                         hipblasStride   stridex,
                                                         float*          y,
                                                         int             incy,
                                                         hipblasStride   stridey,
                                                         const float*    c,
                                                         const float*    s,
                                                         int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotStridedBatched(hipblasHandle_t handle,
                                                         int             n,
                                                         double*         x,
                                                         int             incx,
                                                         hipblasStride   stridex,
                                                         double*         y,
                                                         int             incy,
                                                         hipblasStride   stridey,
                                                         const double*   c,
                                                         const double*   s,
                                                         int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCrotStridedBatched(hipblasHandle_t   handle,
                                                         int               n,
                                                         hipComplex*       x,
                                                         int               incx,
                                                         hipblasStride     stridex,
                                                         hipComplex*       y,
                                                         int               incy,
                                                         hipblasStride     stridey,
                                                         const float*      c,
                                                         const hipComplex* s,
                                                         int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsrotStridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          hipComplex*     x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          hipComplex*     y,
                                                          int             incy,
                                                          hipblasStride   stridey,
                                                          const float*    c,
                                                          const float*    s,
                                                          int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZrotStridedBatched(hipblasHandle_t         handle,
                                                         int                     n,
                                                         hipDoubleComplex*       x,
                                                         int                     incx,
                                                         hipblasStride           stridex,
                                                         hipDoubleComplex*       y,
                                                         int                     incy,
                                                         hipblasStride           stridey,
                                                         const double*           c,
                                                         const hipDoubleComplex* s,
                                                         int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdrotStridedBatched(hipblasHandle_t   handle,
                                                          int               n,
                                                          hipDoubleComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          hipDoubleComplex* y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          const double*     c,
                                                          const double*     s,
                                                          int               batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSrotStridedBatched_64(hipblasHandle_t handle,
                                                            int64_t         n,
                                                            float*          x,
                                                            int64_t         incx,
                                                            hipblasStride   stridex,
                                                            float*          y,
                                                            int64_t         incy,
                                                            hipblasStride   stridey,
                                                            const float*    c,
                                                            const float*    s,
                                                            int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotStridedBatched_64(hipblasHandle_t handle,
                                                            int64_t         n,
                                                            double*         x,
                                                            int64_t         incx,
                                                            hipblasStride   stridex,
                                                            double*         y,
                                                            int64_t         incy,
                                                            hipblasStride   stridey,
                                                            const double*   c,
                                                            const double*   s,
                                                            int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCrotStridedBatched_64(hipblasHandle_t   handle,
                                                            int64_t           n,
                                                            hipComplex*       x,
                                                            int64_t           incx,
                                                            hipblasStride     stridex,
                                                            hipComplex*       y,
                                                            int64_t           incy,
                                                            hipblasStride     stridey,
                                                            const float*      c,
                                                            const hipComplex* s,
                                                            int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsrotStridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             hipComplex*     x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             hipComplex*     y,
                                                             int64_t         incy,
                                                             hipblasStride   stridey,
                                                             const float*    c,
                                                             const float*    s,
                                                             int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZrotStridedBatched_64(hipblasHandle_t         handle,
                                                            int64_t                 n,
                                                            hipDoubleComplex*       x,
                                                            int64_t                 incx,
                                                            hipblasStride           stridex,
                                                            hipDoubleComplex*       y,
                                                            int64_t                 incy,
                                                            hipblasStride           stridey,
                                                            const double*           c,
                                                            const hipDoubleComplex* s,
                                                            int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdrotStridedBatched_64(hipblasHandle_t   handle,
                                                             int64_t           n,
                                                             hipDoubleComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             hipDoubleComplex* y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             const double*     c,
                                                             const double*     s,
                                                             int64_t           batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The rotg functions create the Givens rotation matrix for the vector ``(a b)``.
         Scalars ``c`` and ``s`` and arrays ``a`` and ``b`` can be stored in either host or device memory. The location is specified by calling ``hipblasSetPointerMode``.
         If the pointer mode is set to ``HIPBLAS_POINTER_MODE_HOST``, this function blocks the CPU until the GPU has finished and the results are available in host memory.
         If the pointer mode is set to ``HIPBLAS_POINTER_MODE_DEVICE``, this function returns immediately and synchronization is required to read the results.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle  [hipblasHandle_t]
            handle to the hipBLAS library context queue.
    @param[inout]
    a       device pointer or host pointer to the input vector element, overwritten with r.
    @param[inout]
    b       device pointer or host pointer to the input vector element, overwritten with z.
    @param[inout]
    c       device pointer or host pointer to the cosine element of the Givens rotation.
    @param[inout]
    s       device pointer or host pointer to the sine element of the Givens rotation.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t
    hipblasSrotg(hipblasHandle_t handle, float* a, float* b, float* c, float* s);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasDrotg(hipblasHandle_t handle, double* a, double* b, double* c, double* s);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasCrotg(hipblasHandle_t handle, hipComplex* a, hipComplex* b, float* c, hipComplex* s);

HIPBLAS_EXPORT hipblasStatus_t hipblasZrotg(hipblasHandle_t   handle,
                                            hipDoubleComplex* a,
                                            hipDoubleComplex* b,
                                            double*           c,
                                            hipDoubleComplex* s);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t
    hipblasSrotg_64(hipblasHandle_t handle, float* a, float* b, float* c, float* s);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasDrotg_64(hipblasHandle_t handle, double* a, double* b, double* c, double* s);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasCrotg_64(hipblasHandle_t handle, hipComplex* a, hipComplex* b, float* c, hipComplex* s);

HIPBLAS_EXPORT hipblasStatus_t hipblasZrotg_64(hipblasHandle_t   handle,
                                               hipDoubleComplex* a,
                                               hipDoubleComplex* b,
                                               double*           c,
                                               hipDoubleComplex* s);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The rotgBatched functions create the Givens rotation matrix for the batched vectors ``(a_i b_i)``, for ``i`` = 1, ..., ``batchCount``.
         ``a``, ``b``, ``c``, and ``s`` can be stored in either host or device memory. The location is specified by calling ``hipblasSetPointerMode``.
         If the pointer mode is set to ``HIPBLAS_POINTER_MODE_HOST``, this function blocks the CPU until the GPU has finished and the results are available in host memory.
         If the pointer mode is set to ``HIPBLAS_POINTER_MODE_DEVICE``, this function returns immediately and synchronization is required to read the results.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle  [hipblasHandle_t]
            handle to the hipBLAS library context queue.
    @param[inout]
    a       device array of device pointers storing each single input vector element a_i, overwritten with r_i.
    @param[inout]
    b       device array of device pointers storing each single input vector element b_i, overwritten with z_i.
    @param[inout]
    c       device array of device pointers storing each cosine element of the Givens rotation for the batch.
    @param[inout]
    s       device array of device pointers storing each sine element of the Givens rotation for the batch.
    @param[in]
    batchCount [int]
                number of batches (length of arrays a, b, c, and s).

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSrotgBatched(hipblasHandle_t handle,
                                                   float* const    a[],
                                                   float* const    b[],
                                                   float* const    c[],
                                                   float* const    s[],
                                                   int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotgBatched(hipblasHandle_t handle,
                                                   double* const   a[],
                                                   double* const   b[],
                                                   double* const   c[],
                                                   double* const   s[],
                                                   int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCrotgBatched(hipblasHandle_t   handle,
                                                   hipComplex* const a[],
                                                   hipComplex* const b[],
                                                   float* const      c[],
                                                   hipComplex* const s[],
                                                   int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZrotgBatched(hipblasHandle_t         handle,
                                                   hipDoubleComplex* const a[],
                                                   hipDoubleComplex* const b[],
                                                   double* const           c[],
                                                   hipDoubleComplex* const s[],
                                                   int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSrotgBatched_64(hipblasHandle_t handle,
                                                      float* const    a[],
                                                      float* const    b[],
                                                      float* const    c[],
                                                      float* const    s[],
                                                      int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotgBatched_64(hipblasHandle_t handle,
                                                      double* const   a[],
                                                      double* const   b[],
                                                      double* const   c[],
                                                      double* const   s[],
                                                      int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCrotgBatched_64(hipblasHandle_t   handle,
                                                      hipComplex* const a[],
                                                      hipComplex* const b[],
                                                      float* const      c[],
                                                      hipComplex* const s[],
                                                      int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZrotgBatched_64(hipblasHandle_t         handle,
                                                      hipDoubleComplex* const a[],
                                                      hipDoubleComplex* const b[],
                                                      double* const           c[],
                                                      hipDoubleComplex* const s[],
                                                      int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The rotgStridedBatched functions create the Givens rotation matrix for the strided batched vectors ``(a_i b_i)``, for ``i`` = 1, ..., ``batchCount``.
         ``a``, ``b``, ``c``, and ``s`` can be stored in either host or device memory. The location is specified by calling ``hipblasSetPointerMode``.
         If the pointer mode is set to ``HIPBLAS_POINTER_MODE_HOST``, this function blocks the CPU until the GPU has finished and the results are available in host memory.
         If the pointer mode is set to ``HIPBLAS_POINTER_MODE_HOST``, this function returns immediately and synchronization is required to read the results.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle  [hipblasHandle_t]
            handle to the hipBLAS library context queue.
    @param[inout]
    a       device strided_batched pointer or host strided_batched pointer to the first single input vector element a_1, overwritten with r.
    @param[in]
    stridea [hipblasStride]
             distance between elements of a in batch (distance between a_i and a_(i + 1)).
    @param[inout]
    b       device strided_batched pointer or host strided_batched pointer to the first single input vector element b_1, overwritten with z.
    @param[in]
    strideb [hipblasStride]
             distance between elements of b in batch (distance between b_i and b_(i + 1)).
    @param[inout]
    c       device strided_batched pointer or host strided_batched pointer to the first cosine element of the Givens rotations c_1.
    @param[in]
    stridec [hipblasStride]
             distance between elements of c in batch (distance between c_i and c_(i + 1)).
    @param[inout]
    s       device strided_batched pointer or host strided_batched pointer to the sine element of the Givens rotations s_1.
    @param[in]
    strides [hipblasStride]
             distance between elements of s in batch (distance between s_i and s_(i + 1)).
    @param[in]
    batchCount [int]
                number of batches (length of arrays a, b, c, and s).

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSrotgStridedBatched(hipblasHandle_t handle,
                                                          float*          a,
                                                          hipblasStride   stridea,
                                                          float*          b,
                                                          hipblasStride   strideb,
                                                          float*          c,
                                                          hipblasStride   stridec,
                                                          float*          s,
                                                          hipblasStride   strides,
                                                          int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotgStridedBatched(hipblasHandle_t handle,
                                                          double*         a,
                                                          hipblasStride   stridea,
                                                          double*         b,
                                                          hipblasStride   strideb,
                                                          double*         c,
                                                          hipblasStride   stridec,
                                                          double*         s,
                                                          hipblasStride   strides,
                                                          int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCrotgStridedBatched(hipblasHandle_t handle,
                                                          hipComplex*     a,
                                                          hipblasStride   stridea,
                                                          hipComplex*     b,
                                                          hipblasStride   strideb,
                                                          float*          c,
                                                          hipblasStride   stridec,
                                                          hipComplex*     s,
                                                          hipblasStride   strides,
                                                          int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZrotgStridedBatched(hipblasHandle_t   handle,
                                                          hipDoubleComplex* a,
                                                          hipblasStride     stridea,
                                                          hipDoubleComplex* b,
                                                          hipblasStride     strideb,
                                                          double*           c,
                                                          hipblasStride     stridec,
                                                          hipDoubleComplex* s,
                                                          hipblasStride     strides,
                                                          int               batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSrotgStridedBatched_64(hipblasHandle_t handle,
                                                             float*          a,
                                                             hipblasStride   stridea,
                                                             float*          b,
                                                             hipblasStride   strideb,
                                                             float*          c,
                                                             hipblasStride   stridec,
                                                             float*          s,
                                                             hipblasStride   strides,
                                                             int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotgStridedBatched_64(hipblasHandle_t handle,
                                                             double*         a,
                                                             hipblasStride   stridea,
                                                             double*         b,
                                                             hipblasStride   strideb,
                                                             double*         c,
                                                             hipblasStride   stridec,
                                                             double*         s,
                                                             hipblasStride   strides,
                                                             int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCrotgStridedBatched_64(hipblasHandle_t handle,
                                                             hipComplex*     a,
                                                             hipblasStride   stridea,
                                                             hipComplex*     b,
                                                             hipblasStride   strideb,
                                                             float*          c,
                                                             hipblasStride   stridec,
                                                             hipComplex*     s,
                                                             hipblasStride   strides,
                                                             int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZrotgStridedBatched_64(hipblasHandle_t   handle,
                                                             hipDoubleComplex* a,
                                                             hipblasStride     stridea,
                                                             hipDoubleComplex* b,
                                                             hipblasStride     strideb,
                                                             double*           c,
                                                             hipblasStride     stridec,
                                                             hipDoubleComplex* s,
                                                             hipblasStride     strides,
                                                             int64_t           batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The rotm functions apply the modified Givens rotation matrix defined by ``param`` to vectors ``x`` and ``y``.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : ``s`` and ``d``.

    @param[in]
    handle  [hipblasHandle_t]
            handle to the hipBLAS library context queue.
    @param[in]
    n       [int]
            number of elements in the x and y vectors.
    @param[inout]
    x       device pointer storing vector x.
    @param[in]
    incx    [int]
            specifies the increment between elements of x.
    @param[inout]
    y       device pointer storing vector y.
    @param[in]
    incy    [int]
            specifies the increment between elements of y.
    @param[in]
    param   device vector or host vector of five elements defining the rotation.
            param can be stored in either the host or device memory. The location is specified by calling hipblasSetPointerMode.
            - param[0] = flag
            - param[1] = H11
            - param[2] = H21
            - param[3] = H12
            - param[4] = H22  
            The flag parameter defines the form of H:
            - flag = -1 => H = ( H11 H12 H21 H22 )
            - flag =  0 => H = ( 1.0 H12 H21 1.0 )
            - flag =  1 => H = ( H11 1.0 -1.0 H22 )
            - flag = -2 => H = ( 1.0 0.0 0.0 1.0 )
            

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSrotm(
    hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy, const float* param);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotm(
    hipblasHandle_t handle, int n, double* x, int incx, double* y, int incy, const double* param);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSrotm_64(hipblasHandle_t handle,
                                               int64_t         n,
                                               float*          x,
                                               int64_t         incx,
                                               float*          y,
                                               int64_t         incy,
                                               const float*    param);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotm_64(hipblasHandle_t handle,
                                               int64_t         n,
                                               double*         x,
                                               int64_t         incx,
                                               double*         y,
                                               int64_t         incy,
                                               const double*   param);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The rotmBatched functions apply the modified Givens rotation matrix defined by ``param_i`` to batched vectors ``x_i`` and ``y_i``, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle  [hipblasHandle_t]
            handle to the hipBLAS library context queue.
    @param[in]
    n       [int]
            number of elements in the x and y vectors.
    @param[inout]
    x       device array of device pointers storing each vector x_i.
    @param[in]
    incx    [int]
            specifies the increment between elements of each x_i.
    @param[inout]
    y       device array of device pointers storing each vector y_1.
    @param[in]
    incy    [int]
            specifies the increment between elements of each y_i.
    @param[in]
    param   device array of device vectors of five elements defining the rotation.
            param can ONLY be stored on the device for the batched version of this function.
            - param[0] = flag
            - param[1] = H11
            - param[2] = H21
            - param[3] = H12
            - param[4] = H22  
            The flag parameter defines the form of H:
            - flag = -1 => H = ( H11 H12 H21 H22 )
            - flag =  0 => H = ( 1.0 H12 H21 1.0 )
            - flag =  1 => H = ( H11 1.0 -1.0 H22 )
            - flag = -2 => H = ( 1.0 0.0 0.0 1.0 )
    @param[in]
    batchCount [int]
                the number of x and y arrays, that is, the number of batches.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSrotmBatched(hipblasHandle_t    handle,
                                                   int                n,
                                                   float* const       x[],
                                                   int                incx,
                                                   float* const       y[],
                                                   int                incy,
                                                   const float* const param[],
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotmBatched(hipblasHandle_t     handle,
                                                   int                 n,
                                                   double* const       x[],
                                                   int                 incx,
                                                   double* const       y[],
                                                   int                 incy,
                                                   const double* const param[],
                                                   int                 batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSrotmBatched_64(hipblasHandle_t    handle,
                                                      int64_t            n,
                                                      float* const       x[],
                                                      int64_t            incx,
                                                      float* const       y[],
                                                      int64_t            incy,
                                                      const float* const param[],
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotmBatched_64(hipblasHandle_t     handle,
                                                      int64_t             n,
                                                      double* const       x[],
                                                      int64_t             incx,
                                                      double* const       y[],
                                                      int64_t             incy,
                                                      const double* const param[],
                                                      int64_t             batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The rotmStridedBatched functions apply the modified Givens rotation matrix defined by ``param_i`` to strided batched vectors ``x_i`` and ``y_i``, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle  [hipblasHandle_t]
            handle to the hipBLAS library context queue.
    @param[in]
    n       [int]
            number of elements in the x and y vectors.
    @param[inout]
    x       device pointer pointing to first strided batched vector x_1.
    @param[in]
    incx    [int]
            specifies the increment between elements of each x_i.
    @param[in]
    stridex [hipblasStride]
             specifies the increment between the beginning of x_i and x_(i + 1).
    @param[inout]
    y       device pointer pointing to the first strided batched vector y_1.
    @param[in]
    incy    [int]
            specifies the increment between elements of each y_i.
    @param[in]
    stridey  [hipblasStride]
             specifies the increment between the beginning of y_i and y_(i + 1).
    @param[in]
    param   device pointer pointing to first array of five elements defining the rotation (param_1).
            param can ONLY be stored on the device for the strided_batched version of this function.
            - param[0] = flag
            - param[1] = H11
            - param[2] = H21
            - param[3] = H12
            - param[4] = H22  
            The flag parameter defines the form of H:
            - flag = -1 => H = ( H11 H12 H21 H22 )
            - flag =  0 => H = ( 1.0 H12 H21 1.0 )
            - flag =  1 => H = ( H11 1.0 -1.0 H22 )
            - flag = -2 => H = ( 1.0 0.0 0.0 1.0 )
    @param[in]
    strideParam [hipblasStride]
                 specifies the increment between the beginning of param_i and param_(i + 1).
    @param[in]
    batchCount [int]
                the number of x and y arrays, that is, the number of batches.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSrotmStridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          float*          x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          float*          y,
                                                          int             incy,
                                                          hipblasStride   stridey,
                                                          const float*    param,
                                                          hipblasStride   strideParam,
                                                          int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotmStridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          double*         x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          double*         y,
                                                          int             incy,
                                                          hipblasStride   stridey,
                                                          const double*   param,
                                                          hipblasStride   strideParam,
                                                          int             batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSrotmStridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             float*          x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             float*          y,
                                                             int64_t         incy,
                                                             hipblasStride   stridey,
                                                             const float*    param,
                                                             hipblasStride   strideParam,
                                                             int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotmStridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             double*         x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             double*         y,
                                                             int64_t         incy,
                                                             hipblasStride   stridey,
                                                             const double*   param,
                                                             hipblasStride   strideParam,
                                                             int64_t         batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The rotmg functions create the modified Givens rotation matrix for the vector ``(d1 * x1, d2 * y1)``.
          Parameters can be stored in either host or device memory. The location is specified by calling ``hipblasSetPointerMode``.
          If the pointer mode is set to ``HIPBLAS_POINTER_MODE_HOST``, this function blocks the CPU until the GPU has finished and the results are available in host memory.
          If the pointer mode is set to ``HIPBLAS_POINTER_MODE_DEVICE``, this function returns immediately and synchronization is required to read the results.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : ``s`` and ``d``.

    @param[in]
    handle  [hipblasHandle_t]
            handle to the hipBLAS library context queue.
    @param[inout]
    d1      device pointer or host pointer to input scalar that is overwritten.
    @param[inout]
    d2      device pointer or host pointer to input scalar that is overwritten.
    @param[inout]
    x1      device pointer or host pointer to input scalar that is overwritten.
    @param[in]
    y1      device pointer or host pointer to input scalar.
    @param[out]
    param   device vector or host vector of five elements defining the rotation.
            param can be stored in either host or device memory. The location is specified by calling hipblasSetPointerMode.
            - param[0] = flag
            - param[1] = H11
            - param[2] = H21
            - param[3] = H12
            - param[4] = H22  
            The flag parameter defines the form of H:
            - flag = -1 => H = ( H11 H12 H21 H22 )
            - flag =  0 => H = ( 1.0 H12 H21 1.0 )
            - flag =  1 => H = ( H11 1.0 -1.0 H22 )
            - flag = -2 => H = ( 1.0 0.0 0.0 1.0 )

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSrotmg(
    hipblasHandle_t handle, float* d1, float* d2, float* x1, const float* y1, float* param);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotmg(
    hipblasHandle_t handle, double* d1, double* d2, double* x1, const double* y1, double* param);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSrotmg_64(
    hipblasHandle_t handle, float* d1, float* d2, float* x1, const float* y1, float* param);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotmg_64(
    hipblasHandle_t handle, double* d1, double* d2, double* x1, const double* y1, double* param);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The rotmgBatched functions create the modified Givens rotation matrix for the batched vectors ``(d1_i * x1_i, d2_i * y1_i)``, for ``i`` = 1, ..., ``batchCount``.
          Parameters can be stored in either host or device memory. The location is specified by calling ``hipblasSetPointerMode``.
          If the pointer mode is set to ``HIPBLAS_POINTER_MODE_HOST``, this function blocks the CPU until the GPU has finished and the results are available in host memory.
          If the pointer mode is set to ``HIPBLAS_POINTER_MODE_DEVICE``, this function returns immediately and synchronization is required to read the results.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle  [hipblasHandle_t]
            handle to the hipBLAS library context queue.
    @param[inout]
    d1      device batched array or host batched array of input scalars that is overwritten.
    @param[inout]
    d2      device batched array or host batched array of input scalars that is overwritten.
    @param[inout]
    x1      device batched array or host batched array of input scalars that is overwritten.
    @param[in]
    y1      device batched array or host batched array of input scalars.
    @param[out]
    param   device batched array or host batched array of vectors of five elements defining the rotation.
            param can be stored in either host or device memory. The location is specified by calling hipblasSetPointerMode.
            - param[0] = flag
            - param[1] = H11
            - param[2] = H21
            - param[3] = H12
            - param[4] = H22  
            The flag parameter defines the form of H:
            - flag = -1 => H = ( H11 H12 H21 H22 )
            - flag =  0 => H = ( 1.0 H12 H21 1.0 )
            - flag =  1 => H = ( H11 1.0 -1.0 H22 )
            - flag = -2 => H = ( 1.0 0.0 0.0 1.0 )
    @param[in]
    batchCount [int]
                the number of instances in the batch.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSrotmgBatched(hipblasHandle_t    handle,
                                                    float* const       d1[],
                                                    float* const       d2[],
                                                    float* const       x1[],
                                                    const float* const y1[],
                                                    float* const       param[],
                                                    int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotmgBatched(hipblasHandle_t     handle,
                                                    double* const       d1[],
                                                    double* const       d2[],
                                                    double* const       x1[],
                                                    const double* const y1[],
                                                    double* const       param[],
                                                    int                 batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSrotmgBatched_64(hipblasHandle_t    handle,
                                                       float* const       d1[],
                                                       float* const       d2[],
                                                       float* const       x1[],
                                                       const float* const y1[],
                                                       float* const       param[],
                                                       int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotmgBatched_64(hipblasHandle_t     handle,
                                                       double* const       d1[],
                                                       double* const       d2[],
                                                       double* const       x1[],
                                                       const double* const y1[],
                                                       double* const       param[],
                                                       int64_t             batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The rotmgStridedBatched functions create the modified Givens rotation matrix for the strided batched vectors ``(d1_i * x1_i, d2_i * y1_i)``, for ``i`` = 1, ..., ``batchCount``.
          Parameters can be stored in either host or device memory. The location is specified by calling ``hipblasSetPointerMode``.
          If the pointer mode is set to ``HIPBLAS_POINTER_MODE_HOST``, this function blocks the CPU until the GPU has finished and the results are available in host memory.
          If the pointer mode is set to ``HIPBLAS_POINTER_MODE_DEVICE``, this function returns immediately and synchronization is required to read the results.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle  [hipblasHandle_t]
            handle to the hipBLAS library context queue.
    @param[inout]
    d1      device strided_batched array or host strided_batched array of input scalars that is overwritten.
    @param[in]
    strided1 [hipblasStride]
              specifies the increment between the beginning of d1_i and d1_(i+1).
    @param[inout]
    d2      device strided_batched array or host strided_batched array of input scalars that is overwritten.
    @param[in]
    strided2 [hipblasStride]
              specifies the increment between the beginning of d2_i and d2_(i+1).
    @param[inout]
    x1      device strided_batched array or host strided_batched array of input scalars that is overwritten.
    @param[in]
    stridex1 [hipblasStride]
              specifies the increment between the beginning of x1_i and x1_(i+1).
    @param[in]
    y1      device strided_batched array or host strided_batched array of input scalars.
    @param[in]
    stridey1 [hipblasStride]
              specifies the increment between the beginning of y1_i and y1_(i+1).
    @param[out]
    param   device stridedBatched array or host stridedBatched array of vectors of five elements defining the rotation.
            param can be stored in either host or device memory. The location is specified by calling hipblasSetPointerMode.
            - param[0] = flag
            - param[1] = H11
            - param[2] = H21
            - param[3] = H12
            - param[4] = H22  
            The flag parameter defines the form of H:
            - flag = -1 => H = ( H11 H12 H21 H22 )
            - flag =  0 => H = ( 1.0 H12 H21 1.0 )
            - flag =  1 => H = ( H11 1.0 -1.0 H22 )
            - flag = -2 => H = ( 1.0 0.0 0.0 1.0 )
    @param[in]
    strideParam [hipblasStride]
                 specifies the increment between the beginning of param_i and param_(i + 1).
    @param[in]
    batchCount [int]
                the number of instances in the batch.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSrotmgStridedBatched(hipblasHandle_t handle,
                                                           float*          d1,
                                                           hipblasStride   strided1,
                                                           float*          d2,
                                                           hipblasStride   strided2,
                                                           float*          x1,
                                                           hipblasStride   stridex1,
                                                           const float*    y1,
                                                           hipblasStride   stridey1,
                                                           float*          param,
                                                           hipblasStride   strideParam,
                                                           int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotmgStridedBatched(hipblasHandle_t handle,
                                                           double*         d1,
                                                           hipblasStride   strided1,
                                                           double*         d2,
                                                           hipblasStride   strided2,
                                                           double*         x1,
                                                           hipblasStride   stridex1,
                                                           const double*   y1,
                                                           hipblasStride   stridey1,
                                                           double*         param,
                                                           hipblasStride   strideParam,
                                                           int             batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSrotmgStridedBatched_64(hipblasHandle_t handle,
                                                              float*          d1,
                                                              hipblasStride   strided1,
                                                              float*          d2,
                                                              hipblasStride   strided2,
                                                              float*          x1,
                                                              hipblasStride   stridex1,
                                                              const float*    y1,
                                                              hipblasStride   stridey1,
                                                              float*          param,
                                                              hipblasStride   strideParam,
                                                              int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDrotmgStridedBatched_64(hipblasHandle_t handle,
                                                              double*         d1,
                                                              hipblasStride   strided1,
                                                              double*         d2,
                                                              hipblasStride   strided2,
                                                              double*         x1,
                                                              hipblasStride   stridex1,
                                                              const double*   y1,
                                                              hipblasStride   stridey1,
                                                              double*         param,
                                                              hipblasStride   strideParam,
                                                              int64_t         batchCount);
//! @}

/*! @{
    \brief  <b> BLAS Level 1 API </b>

    \details
    The scal functions scales each element of vector ``x`` with scalar ``alpha``.

        x := alpha * x

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, ``z``, ``cs``, and ``zd``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, ``z``, ``cs``, and ``zd``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in x.
    @param[in]
    alpha     device pointer or host pointer for the scalar alpha.
    @param[inout]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.

            ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t
    hipblasSscal(hipblasHandle_t handle, int n, const float* alpha, float* x, int incx);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasDscal(hipblasHandle_t handle, int n, const double* alpha, double* x, int incx);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasCscal(hipblasHandle_t handle, int n, const hipComplex* alpha, hipComplex* x, int incx);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasCsscal(hipblasHandle_t handle, int n, const float* alpha, hipComplex* x, int incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZscal(
    hipblasHandle_t handle, int n, const hipDoubleComplex* alpha, hipDoubleComplex* x, int incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdscal(
    hipblasHandle_t handle, int n, const double* alpha, hipDoubleComplex* x, int incx);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t
    hipblasSscal_64(hipblasHandle_t handle, int64_t n, const float* alpha, float* x, int64_t incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasDscal_64(
    hipblasHandle_t handle, int64_t n, const double* alpha, double* x, int64_t incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasCscal_64(
    hipblasHandle_t handle, int64_t n, const hipComplex* alpha, hipComplex* x, int64_t incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsscal_64(
    hipblasHandle_t handle, int64_t n, const float* alpha, hipComplex* x, int64_t incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZscal_64(hipblasHandle_t         handle,
                                               int64_t                 n,
                                               const hipDoubleComplex* alpha,
                                               hipDoubleComplex*       x,
                                               int64_t                 incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdscal_64(
    hipblasHandle_t handle, int64_t n, const double* alpha, hipDoubleComplex* x, int64_t incx);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>
    \details
    The scalBatched functions scale each element of vector ``x_i`` with scalar ``alpha``, for ``i`` = 1, ... , ``batchCount``.

         x_i := alpha * x_i

     where (``x_i``) is the ``i``-th instance of the batch.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, ``z``, ``cs``, and ``zd``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle      [hipblasHandle_t]
                handle to the hipBLAS library context queue.
    @param[in]
    n           [int]
                the number of elements in each x_i.
    @param[in]
    alpha       host pointer or device pointer for the scalar alpha.
    @param[inout]
    x           device array of device pointers storing each vector x_i.
    @param[in]
    incx        [int]
                specifies the increment for the elements of each x_i.
    @param[in]
    batchCount [int]
                specifies the number of batches in x.

             ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSscalBatched(
    hipblasHandle_t handle, int n, const float* alpha, float* const x[], int incx, int batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDscalBatched(hipblasHandle_t handle,
                                                   int             n,
                                                   const double*   alpha,
                                                   double* const   x[],
                                                   int             incx,
                                                   int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCscalBatched(hipblasHandle_t   handle,
                                                   int               n,
                                                   const hipComplex* alpha,
                                                   hipComplex* const x[],
                                                   int               incx,
                                                   int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZscalBatched(hipblasHandle_t         handle,
                                                   int                     n,
                                                   const hipDoubleComplex* alpha,
                                                   hipDoubleComplex* const x[],
                                                   int                     incx,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsscalBatched(hipblasHandle_t   handle,
                                                    int               n,
                                                    const float*      alpha,
                                                    hipComplex* const x[],
                                                    int               incx,
                                                    int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdscalBatched(hipblasHandle_t         handle,
                                                    int                     n,
                                                    const double*           alpha,
                                                    hipDoubleComplex* const x[],
                                                    int                     incx,
                                                    int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSscalBatched_64(hipblasHandle_t handle,
                                                      int64_t         n,
                                                      const float*    alpha,
                                                      float* const    x[],
                                                      int64_t         incx,
                                                      int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDscalBatched_64(hipblasHandle_t handle,
                                                      int64_t         n,
                                                      const double*   alpha,
                                                      double* const   x[],
                                                      int64_t         incx,
                                                      int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCscalBatched_64(hipblasHandle_t   handle,
                                                      int64_t           n,
                                                      const hipComplex* alpha,
                                                      hipComplex* const x[],
                                                      int64_t           incx,
                                                      int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZscalBatched_64(hipblasHandle_t         handle,
                                                      int64_t                 n,
                                                      const hipDoubleComplex* alpha,
                                                      hipDoubleComplex* const x[],
                                                      int64_t                 incx,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsscalBatched_64(hipblasHandle_t   handle,
                                                       int64_t           n,
                                                       const float*      alpha,
                                                       hipComplex* const x[],
                                                       int64_t           incx,
                                                       int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdscalBatched_64(hipblasHandle_t         handle,
                                                       int64_t                 n,
                                                       const double*           alpha,
                                                       hipDoubleComplex* const x[],
                                                       int64_t                 incx,
                                                       int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>
    \details
    The scalStridedBatched functions scale each element of vector ``x_i`` with scalar ``alpha``, for ``i`` = 1, ... , ``batchCount``.

         x_i := alpha * x_i ,

     where ``(x_i)`` is the ``i``-th instance of the batch.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, ``z``, ``cs``, and ``zd``.
    - Supported precisions in cuBLAS  : No support.

     @param[in]
    handle      [hipblasHandle_t]
                handle to the hipBLAS library context queue.
    @param[in]
    n           [int]
                the number of elements in each x_i.
    @param[in]
    alpha       host pointer or device pointer for the scalar alpha.
    @param[inout]
    x           device pointer to the first vector (x_1) in the batch.
    @param[in]
    incx        [int]
                specifies the increment for the elements of x.
    @param[in]
    stridex     [hipblasStride]
                stride from the start of one vector (x_i) to the next one (x_i+1).
                There are no restrictions placed on stride_x. However, the user should
                ensure that stride_x is of an appropriate size. For a typical
                case, this means stride_x >= n * incx.
    @param[in]
    batchCount [int]
                specifies the number of batches in x.

             ********************************************************************/
HIPBLAS_EXPORT hipblasStatus_t hipblasSscalStridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          const float*    alpha,
                                                          float*          x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDscalStridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          const double*   alpha,
                                                          double*         x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCscalStridedBatched(hipblasHandle_t   handle,
                                                          int               n,
                                                          const hipComplex* alpha,
                                                          hipComplex*       x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZscalStridedBatched(hipblasHandle_t         handle,
                                                          int                     n,
                                                          const hipDoubleComplex* alpha,
                                                          hipDoubleComplex*       x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsscalStridedBatched(hipblasHandle_t handle,
                                                           int             n,
                                                           const float*    alpha,
                                                           hipComplex*     x,
                                                           int             incx,
                                                           hipblasStride   stridex,
                                                           int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdscalStridedBatched(hipblasHandle_t   handle,
                                                           int               n,
                                                           const double*     alpha,
                                                           hipDoubleComplex* x,
                                                           int               incx,
                                                           hipblasStride     stridex,
                                                           int               batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSscalStridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             const float*    alpha,
                                                             float*          x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDscalStridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             const double*   alpha,
                                                             double*         x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCscalStridedBatched_64(hipblasHandle_t   handle,
                                                             int64_t           n,
                                                             const hipComplex* alpha,
                                                             hipComplex*       x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZscalStridedBatched_64(hipblasHandle_t         handle,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* alpha,
                                                             hipDoubleComplex*       x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsscalStridedBatched_64(hipblasHandle_t handle,
                                                              int64_t         n,
                                                              const float*    alpha,
                                                              hipComplex*     x,
                                                              int64_t         incx,
                                                              hipblasStride   stridex,
                                                              int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZdscalStridedBatched_64(hipblasHandle_t   handle,
                                                              int64_t           n,
                                                              const double*     alpha,
                                                              hipDoubleComplex* x,
                                                              int64_t           incx,
                                                              hipblasStride     stridex,
                                                              int64_t           batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The swap functions interchange vectors ``x`` and ``y``.

        y := x; x := y

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in x and y.
    @param[inout]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[inout]
    y         device pointer storing vector y.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t
    hipblasSswap(hipblasHandle_t handle, int n, float* x, int incx, float* y, int incy);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasDswap(hipblasHandle_t handle, int n, double* x, int incx, double* y, int incy);

HIPBLAS_EXPORT hipblasStatus_t
    hipblasCswap(hipblasHandle_t handle, int n, hipComplex* x, int incx, hipComplex* y, int incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZswap(
    hipblasHandle_t handle, int n, hipDoubleComplex* x, int incx, hipDoubleComplex* y, int incy);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSswap_64(
    hipblasHandle_t handle, int64_t n, float* x, int64_t incx, float* y, int64_t incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasDswap_64(
    hipblasHandle_t handle, int64_t n, double* x, int64_t incx, double* y, int64_t incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasCswap_64(
    hipblasHandle_t handle, int64_t n, hipComplex* x, int64_t incx, hipComplex* y, int64_t incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZswap_64(hipblasHandle_t   handle,
                                               int64_t           n,
                                               hipDoubleComplex* x,
                                               int64_t           incx,
                                               hipDoubleComplex* y,
                                               int64_t           incy);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The swapBatched functions interchange vectors ``x_i`` and ``y_i``, for ``i`` = 1 , ... , ``batchCount``.

        y_i := x_i; x_i := y_i

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in each x_i and y_i.
    @param[inout]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[inout]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each y_i.
    @param[in]
    batchCount [int]
                number of instances in the batch.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSswapBatched(hipblasHandle_t handle,
                                                   int             n,
                                                   float* const    x[],
                                                   int             incx,
                                                   float* const    y[],
                                                   int             incy,
                                                   int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDswapBatched(hipblasHandle_t handle,
                                                   int             n,
                                                   double* const   x[],
                                                   int             incx,
                                                   double* const   y[],
                                                   int             incy,
                                                   int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCswapBatched(hipblasHandle_t   handle,
                                                   int               n,
                                                   hipComplex* const x[],
                                                   int               incx,
                                                   hipComplex* const y[],
                                                   int               incy,
                                                   int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZswapBatched(hipblasHandle_t         handle,
                                                   int                     n,
                                                   hipDoubleComplex* const x[],
                                                   int                     incx,
                                                   hipDoubleComplex* const y[],
                                                   int                     incy,
                                                   int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSswapBatched_64(hipblasHandle_t handle,
                                                      int64_t         n,
                                                      float* const    x[],
                                                      int64_t         incx,
                                                      float* const    y[],
                                                      int64_t         incy,
                                                      int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDswapBatched_64(hipblasHandle_t handle,
                                                      int64_t         n,
                                                      double* const   x[],
                                                      int64_t         incx,
                                                      double* const   y[],
                                                      int64_t         incy,
                                                      int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCswapBatched_64(hipblasHandle_t   handle,
                                                      int64_t           n,
                                                      hipComplex* const x[],
                                                      int64_t           incx,
                                                      hipComplex* const y[],
                                                      int64_t           incy,
                                                      int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZswapBatched_64(hipblasHandle_t         handle,
                                                      int64_t                 n,
                                                      hipDoubleComplex* const x[],
                                                      int64_t                 incx,
                                                      hipDoubleComplex* const y[],
                                                      int64_t                 incy,
                                                      int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 1 API </b>

    \details
    The swapStridedBatched functions interchange vectors ``x_i`` and ``y_i``, for ``i`` = 1 , ... , ``batchCount``.

        y_i := x_i; x_i := y_i

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    n         [int]
              the number of elements in each x_i and y_i.
    @param[inout]
    x         device pointer to the first vector x_1.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    stridex   [hipblasStride]
              stride from the start of one vector (x_i) to the next one (x_i+1).
              There are no restrictions placed on stridex. However, the user should
              ensure that stridex is of an appropriate size. For a typical
              case, this means stridex >= n * incx.
    @param[inout]
    y         device pointer to the first vector y_1.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.
    @param[in]
    stridey   [hipblasStride]
              stride from the start of one vector (y_i) to the next one (y_i+1).
              There are no restrictions placed on stridey. However, the user should
              ensure that stridey is of an appropriate size. For a typical
              case, this means stridey >= n * incy. stridey should be non zero.
     @param[in]
     batchCount [int]
                 number of instances in the batch.

            ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSswapStridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          float*          x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          float*          y,
                                                          int             incy,
                                                          hipblasStride   stridey,
                                                          int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDswapStridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          double*         x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          double*         y,
                                                          int             incy,
                                                          hipblasStride   stridey,
                                                          int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCswapStridedBatched(hipblasHandle_t handle,
                                                          int             n,
                                                          hipComplex*     x,
                                                          int             incx,
                                                          hipblasStride   stridex,
                                                          hipComplex*     y,
                                                          int             incy,
                                                          hipblasStride   stridey,
                                                          int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZswapStridedBatched(hipblasHandle_t   handle,
                                                          int               n,
                                                          hipDoubleComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          hipDoubleComplex* y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSswapStridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             float*          x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             float*          y,
                                                             int64_t         incy,
                                                             hipblasStride   stridey,
                                                             int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDswapStridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             double*         x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             double*         y,
                                                             int64_t         incy,
                                                             hipblasStride   stridey,
                                                             int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCswapStridedBatched_64(hipblasHandle_t handle,
                                                             int64_t         n,
                                                             hipComplex*     x,
                                                             int64_t         incx,
                                                             hipblasStride   stridex,
                                                             hipComplex*     y,
                                                             int64_t         incy,
                                                             hipblasStride   stridey,
                                                             int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZswapStridedBatched_64(hipblasHandle_t   handle,
                                                             int64_t           n,
                                                             hipDoubleComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             hipDoubleComplex* y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount);
//! @}

/*
 * ===========================================================================
 *    level 2 BLAS
 * ===========================================================================
 */

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The gbmv functions perform one of the matrix-vector operations:

        y := alpha*A*x    + beta*y,   or
        y := alpha*A**T*x + beta*y,   or
        y := alpha*A**H*x + beta*y,

    where ``alpha`` and ``beta`` are scalars, ``x`` and ``y`` are vectors, and ``A`` is an
    ``m`` by ``n`` banded matrix with ``kl`` sub-diagonals and ``ku`` super-diagonals.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    trans     [hipblasOperation_t]
              indicates whether matrix A is tranposed (conjugated) or not.
    @param[in]
    m         [int]
              number of rows of matrix A.
    @param[in]
    n         [int]
              number of columns of matrix A.
    @param[in]
    kl        [int]
              number of sub-diagonals of A.
    @param[in]
    ku        [int]
              number of super-diagonals of A.
    @param[in]
    alpha     device pointer or host pointer to scalar alpha.
    @param[in]
        AP    device pointer storing banded matrix A.
              The leading (kl + ku + 1) by n part of the matrix contains the coefficients
              of the banded matrix. The leading diagonal resides in row (ku + 1) with
              the first super-diagonal above on the RHS of row ku. The first sub-diagonal
              resides below on the LHS of row ku + 2. This propagates up and down across
              sub/super-diagonals.  
                Ex: (m = n = 7; ku = 2, kl = 2)  
                1 2 3 0 0 0 0    ->    0 0 3 3 3 3 3  
                4 1 2 3 0 0 0    ->    0 2 2 2 2 2 2  
                5 4 1 2 3 0 0    ->    1 1 1 1 1 1 1  
                0 5 4 1 2 3 0    ->    4 4 4 4 4 4 0  
                0 0 5 4 1 2 0    ->    5 5 5 5 5 0 0  
                0 0 0 5 4 1 2    ->    0 0 0 0 0 0 0  
                0 0 0 0 5 4 1    ->     0 0 0 0 0 0 0  
              Note that empty elements that don't correspond to data will not
              be referenced.
    @param[in]
    lda       [int]
              specifies the leading dimension of A. Must be >= (kl + ku + 1).
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[inout]
    y         device pointer storing vector y.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSgbmv(hipblasHandle_t    handle,
                                            hipblasOperation_t trans,
                                            int                m,
                                            int                n,
                                            int                kl,
                                            int                ku,
                                            const float*       alpha,
                                            const float*       AP,
                                            int                lda,
                                            const float*       x,
                                            int                incx,
                                            const float*       beta,
                                            float*             y,
                                            int                incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgbmv(hipblasHandle_t    handle,
                                            hipblasOperation_t trans,
                                            int                m,
                                            int                n,
                                            int                kl,
                                            int                ku,
                                            const double*      alpha,
                                            const double*      AP,
                                            int                lda,
                                            const double*      x,
                                            int                incx,
                                            const double*      beta,
                                            double*            y,
                                            int                incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgbmv(hipblasHandle_t    handle,
                                            hipblasOperation_t trans,
                                            int                m,
                                            int                n,
                                            int                kl,
                                            int                ku,
                                            const hipComplex*  alpha,
                                            const hipComplex*  AP,
                                            int                lda,
                                            const hipComplex*  x,
                                            int                incx,
                                            const hipComplex*  beta,
                                            hipComplex*        y,
                                            int                incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgbmv(hipblasHandle_t         handle,
                                            hipblasOperation_t      trans,
                                            int                     m,
                                            int                     n,
                                            int                     kl,
                                            int                     ku,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* AP,
                                            int                     lda,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            const hipDoubleComplex* beta,
                                            hipDoubleComplex*       y,
                                            int                     incy);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSgbmv_64(hipblasHandle_t    handle,
                                               hipblasOperation_t trans,
                                               int64_t            m,
                                               int64_t            n,
                                               int64_t            kl,
                                               int64_t            ku,
                                               const float*       alpha,
                                               const float*       AP,
                                               int64_t            lda,
                                               const float*       x,
                                               int64_t            incx,
                                               const float*       beta,
                                               float*             y,
                                               int64_t            incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgbmv_64(hipblasHandle_t    handle,
                                               hipblasOperation_t trans,
                                               int64_t            m,
                                               int64_t            n,
                                               int64_t            kl,
                                               int64_t            ku,
                                               const double*      alpha,
                                               const double*      AP,
                                               int64_t            lda,
                                               const double*      x,
                                               int64_t            incx,
                                               const double*      beta,
                                               double*            y,
                                               int64_t            incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgbmv_64(hipblasHandle_t    handle,
                                               hipblasOperation_t trans,
                                               int64_t            m,
                                               int64_t            n,
                                               int64_t            kl,
                                               int64_t            ku,
                                               const hipComplex*  alpha,
                                               const hipComplex*  AP,
                                               int64_t            lda,
                                               const hipComplex*  x,
                                               int64_t            incx,
                                               const hipComplex*  beta,
                                               hipComplex*        y,
                                               int64_t            incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgbmv_64(hipblasHandle_t         handle,
                                               hipblasOperation_t      trans,
                                               int64_t                 m,
                                               int64_t                 n,
                                               int64_t                 kl,
                                               int64_t                 ku,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* AP,
                                               int64_t                 lda,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               const hipDoubleComplex* beta,
                                               hipDoubleComplex*       y,
                                               int64_t                 incy);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The gbmvBatched functions perform one of the matrix-vector operations:

        y_i := alpha*A_i*x_i    + beta*y_i,   or
        y_i := alpha*A_i**T*x_i + beta*y_i,   or
        y_i := alpha*A_i**H*x_i + beta*y_i,

    where ``(A_i, x_i, y_i)`` is the ``i``-th instance of the batch,
    ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``m`` by ``n`` banded matrix with ``kl`` sub-diagonals and ``ku`` super-diagonals,
    for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    trans     [hipblasOperation_t]
              indicates whether matrix A is tranposed (conjugated) or not.
    @param[in]
    m         [int]
              number of rows of each matrix A_i.
    @param[in]
    n         [int]
              number of columns of each matrix A_i.
    @param[in]
    kl        [int]
              number of sub-diagonals of each A_i.
    @param[in]
    ku        [int]
              number of super-diagonals of each A_i.
    @param[in]
    alpha     device pointer or host pointer to scalar alpha.
    @param[in]
        AP    device array of device pointers storing each banded matrix A_i.
              The leading (kl + ku + 1) by n part of the matrix contains the coefficients
              of the banded matrix. The leading diagonal resides in row (ku + 1) with
              the first super-diagonal above on the RHS of row ku. The first sub-diagonal
              resides below on the LHS of row ku + 2. This propagates up and down across
              sub/super-diagonals.  
                Ex: (m = n = 7; ku = 2, kl = 2)  
                1 2 3 0 0 0 0    ->    0 0 3 3 3 3 3  
                4 1 2 3 0 0 0    ->    0 2 2 2 2 2 2  
                5 4 1 2 3 0 0    ->    1 1 1 1 1 1 1  
                0 5 4 1 2 3 0    ->    4 4 4 4 4 4 0  
                0 0 5 4 1 2 0    ->    5 5 5 5 5 0 0  
                0 0 0 5 4 1 2    ->    0 0 0 0 0 0 0  
                0 0 0 0 5 4 1    ->    0 0 0 0 0 0 0  
              Note that empty elements that don't correspond to data will not
              be referenced.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i. Must be >= (kl + ku + 1).
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[inout]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each y_i.
    @param[in]
    batchCount [int]
                specifies the number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSgbmvBatched(hipblasHandle_t    handle,
                                                   hipblasOperation_t trans,
                                                   int                m,
                                                   int                n,
                                                   int                kl,
                                                   int                ku,
                                                   const float*       alpha,
                                                   const float* const AP[],
                                                   int                lda,
                                                   const float* const x[],
                                                   int                incx,
                                                   const float*       beta,
                                                   float* const       y[],
                                                   int                incy,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgbmvBatched(hipblasHandle_t     handle,
                                                   hipblasOperation_t  trans,
                                                   int                 m,
                                                   int                 n,
                                                   int                 kl,
                                                   int                 ku,
                                                   const double*       alpha,
                                                   const double* const AP[],
                                                   int                 lda,
                                                   const double* const x[],
                                                   int                 incx,
                                                   const double*       beta,
                                                   double* const       y[],
                                                   int                 incy,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgbmvBatched(hipblasHandle_t         handle,
                                                   hipblasOperation_t      trans,
                                                   int                     m,
                                                   int                     n,
                                                   int                     kl,
                                                   int                     ku,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const AP[],
                                                   int                     lda,
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   const hipComplex*       beta,
                                                   hipComplex* const       y[],
                                                   int                     incy,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgbmvBatched(hipblasHandle_t               handle,
                                                   hipblasOperation_t            trans,
                                                   int                           m,
                                                   int                           n,
                                                   int                           kl,
                                                   int                           ku,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const AP[],
                                                   int                           lda,
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   const hipDoubleComplex*       beta,
                                                   hipDoubleComplex* const       y[],
                                                   int                           incy,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSgbmvBatched_64(hipblasHandle_t    handle,
                                                      hipblasOperation_t trans,
                                                      int64_t            m,
                                                      int64_t            n,
                                                      int64_t            kl,
                                                      int64_t            ku,
                                                      const float*       alpha,
                                                      const float* const AP[],
                                                      int64_t            lda,
                                                      const float* const x[],
                                                      int64_t            incx,
                                                      const float*       beta,
                                                      float* const       y[],
                                                      int64_t            incy,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgbmvBatched_64(hipblasHandle_t     handle,
                                                      hipblasOperation_t  trans,
                                                      int64_t             m,
                                                      int64_t             n,
                                                      int64_t             kl,
                                                      int64_t             ku,
                                                      const double*       alpha,
                                                      const double* const AP[],
                                                      int64_t             lda,
                                                      const double* const x[],
                                                      int64_t             incx,
                                                      const double*       beta,
                                                      double* const       y[],
                                                      int64_t             incy,
                                                      int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgbmvBatched_64(hipblasHandle_t         handle,
                                                      hipblasOperation_t      trans,
                                                      int64_t                 m,
                                                      int64_t                 n,
                                                      int64_t                 kl,
                                                      int64_t                 ku,
                                                      const hipComplex*       alpha,
                                                      const hipComplex* const AP[],
                                                      int64_t                 lda,
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      const hipComplex*       beta,
                                                      hipComplex* const       y[],
                                                      int64_t                 incy,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgbmvBatched_64(hipblasHandle_t               handle,
                                                      hipblasOperation_t            trans,
                                                      int64_t                       m,
                                                      int64_t                       n,
                                                      int64_t                       kl,
                                                      int64_t                       ku,
                                                      const hipDoubleComplex*       alpha,
                                                      const hipDoubleComplex* const AP[],
                                                      int64_t                       lda,
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      const hipDoubleComplex*       beta,
                                                      hipDoubleComplex* const       y[],
                                                      int64_t                       incy,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The gbmvStridedBatched functions perform one of the matrix-vector operations:

        y_i := alpha*A_i*x_i    + beta*y_i,   or
        y_i := alpha*A_i**T*x_i + beta*y_i,   or
        y_i := alpha*A_i**H*x_i + beta*y_i,

    where ``(A_i, x_i, y_i)`` is the ``i``-th instance of the batch,
    ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``m`` by ``n`` banded matrix with ``kl`` sub-diagonals and ``ku`` super-diagonals,
    for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    trans     [hipblasOperation_t]
              indicates whether matrix A is tranposed (conjugated) or not.
    @param[in]
    m         [int]
              number of rows of matrix A.
    @param[in]
    n         [int]
              number of columns of matrix A.
    @param[in]
    kl        [int]
              number of sub-diagonals of A.
    @param[in]
    ku        [int]
              number of super-diagonals of A.
    @param[in]
    alpha     device pointer or host pointer to scalar alpha.
    @param[in]
        AP    device pointer to first banded matrix (A_1).
              The leading (kl + ku + 1) by n part of the matrix contains the coefficients
              of the banded matrix. The leading diagonal resides in row (ku + 1) with
              the first super-diagonal above on the RHS of row ku. The first sub-diagonal
              resides below on the LHS of row ku + 2. This propagates up and down across
              sub/super-diagonals.  
                Ex: (m = n = 7; ku = 2, kl = 2)  
                1 2 3 0 0 0 0    ->   0 0 3 3 3 3 3  
                4 1 2 3 0 0 0    ->   0 2 2 2 2 2 2  
                5 4 1 2 3 0 0    ->   1 1 1 1 1 1 1  
                0 5 4 1 2 3 0    ->   4 4 4 4 4 4 0  
                0 0 5 4 1 2 0    ->   5 5 5 5 5 0 0  
                0 0 0 5 4 1 2    ->   0 0 0 0 0 0 0  
                0 0 0 0 5 4 1    ->   0 0 0 0 0 0 0  
              Note that empty elements that don't correspond to data will not
              be referenced.
    @param[in]
    lda       [int]
              specifies the leading dimension of A. Must be >= (kl + ku + 1).
    @param[in]
    strideA  [hipblasStride]
              stride from the start of one matrix (A_i) to the next one (A_i+1).
    @param[in]
    x         device pointer to first vector (x_1).
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    stridex  [hipblasStride]
              stride from the start of one vector (x_i) to the next one (x_i+1).
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[inout]
    y         device pointer to first vector (y_1).
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.
    @param[in]
    stridey  [hipblasStride]
              stride from the start of one vector (y_i) to the next one (x_i+1).
    @param[in]
    batchCount [int]
                specifies the number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSgbmvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasOperation_t trans,
                                                          int                m,
                                                          int                n,
                                                          int                kl,
                                                          int                ku,
                                                          const float*       alpha,
                                                          const float*       AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          const float*       x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          const float*       beta,
                                                          float*             y,
                                                          int                incy,
                                                          hipblasStride      stridey,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgbmvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasOperation_t trans,
                                                          int                m,
                                                          int                n,
                                                          int                kl,
                                                          int                ku,
                                                          const double*      alpha,
                                                          const double*      AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          const double*      x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          const double*      beta,
                                                          double*            y,
                                                          int                incy,
                                                          hipblasStride      stridey,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgbmvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasOperation_t trans,
                                                          int                m,
                                                          int                n,
                                                          int                kl,
                                                          int                ku,
                                                          const hipComplex*  alpha,
                                                          const hipComplex*  AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          const hipComplex*  x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          const hipComplex*  beta,
                                                          hipComplex*        y,
                                                          int                incy,
                                                          hipblasStride      stridey,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgbmvStridedBatched(hipblasHandle_t         handle,
                                                          hipblasOperation_t      trans,
                                                          int                     m,
                                                          int                     n,
                                                          int                     kl,
                                                          int                     ku,
                                                          const hipDoubleComplex* alpha,
                                                          const hipDoubleComplex* AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          const hipDoubleComplex* beta,
                                                          hipDoubleComplex*       y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSgbmvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasOperation_t trans,
                                                             int64_t            m,
                                                             int64_t            n,
                                                             int64_t            kl,
                                                             int64_t            ku,
                                                             const float*       alpha,
                                                             const float*       AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             const float*       x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             const float*       beta,
                                                             float*             y,
                                                             int64_t            incy,
                                                             hipblasStride      stridey,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgbmvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasOperation_t trans,
                                                             int64_t            m,
                                                             int64_t            n,
                                                             int64_t            kl,
                                                             int64_t            ku,
                                                             const double*      alpha,
                                                             const double*      AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             const double*      x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             const double*      beta,
                                                             double*            y,
                                                             int64_t            incy,
                                                             hipblasStride      stridey,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgbmvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasOperation_t trans,
                                                             int64_t            m,
                                                             int64_t            n,
                                                             int64_t            kl,
                                                             int64_t            ku,
                                                             const hipComplex*  alpha,
                                                             const hipComplex*  AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             const hipComplex*  x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             const hipComplex*  beta,
                                                             hipComplex*        y,
                                                             int64_t            incy,
                                                             hipblasStride      stridey,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgbmvStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasOperation_t      trans,
                                                             int64_t                 m,
                                                             int64_t                 n,
                                                             int64_t                 kl,
                                                             int64_t                 ku,
                                                             const hipDoubleComplex* alpha,
                                                             const hipDoubleComplex* AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             const hipDoubleComplex* beta,
                                                             hipDoubleComplex*       y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The gemv functions perform one of the matrix-vector operations:

        y := alpha*A*x    + beta*y,   or
        y := alpha*A**T*x + beta*y,   or
        y := alpha*A**H*x + beta*y,

    where ``alpha`` and ``beta`` are scalars, ``x`` and ``y`` are vectors, and ``A`` is an
    ``m`` by ``n`` matrix.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    trans     [hipblasOperation_t]
              indicates whether matrix A is tranposed (conjugated) or not.
    @param[in]
    m         [int]
              number of rows of matrix A.
    @param[in]
    n         [int]
              number of columns of matrix A.
    @param[in]
    alpha     device pointer or host pointer to scalar alpha.
    @param[in]
    AP        device pointer storing matrix A.
    @param[in]
    lda       [int]
              specifies the leading dimension of A.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[inout]
    y         device pointer storing vector y.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSgemv(hipblasHandle_t    handle,
                                            hipblasOperation_t trans,
                                            int                m,
                                            int                n,
                                            const float*       alpha,
                                            const float*       AP,
                                            int                lda,
                                            const float*       x,
                                            int                incx,
                                            const float*       beta,
                                            float*             y,
                                            int                incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgemv(hipblasHandle_t    handle,
                                            hipblasOperation_t trans,
                                            int                m,
                                            int                n,
                                            const double*      alpha,
                                            const double*      AP,
                                            int                lda,
                                            const double*      x,
                                            int                incx,
                                            const double*      beta,
                                            double*            y,
                                            int                incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgemv(hipblasHandle_t    handle,
                                            hipblasOperation_t trans,
                                            int                m,
                                            int                n,
                                            const hipComplex*  alpha,
                                            const hipComplex*  AP,
                                            int                lda,
                                            const hipComplex*  x,
                                            int                incx,
                                            const hipComplex*  beta,
                                            hipComplex*        y,
                                            int                incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgemv(hipblasHandle_t         handle,
                                            hipblasOperation_t      trans,
                                            int                     m,
                                            int                     n,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* AP,
                                            int                     lda,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            const hipDoubleComplex* beta,
                                            hipDoubleComplex*       y,
                                            int                     incy);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSgemv_64(hipblasHandle_t    handle,
                                               hipblasOperation_t trans,
                                               int64_t            m,
                                               int64_t            n,
                                               const float*       alpha,
                                               const float*       AP,
                                               int64_t            lda,
                                               const float*       x,
                                               int64_t            incx,
                                               const float*       beta,
                                               float*             y,
                                               int64_t            incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgemv_64(hipblasHandle_t    handle,
                                               hipblasOperation_t trans,
                                               int64_t            m,
                                               int64_t            n,
                                               const double*      alpha,
                                               const double*      AP,
                                               int64_t            lda,
                                               const double*      x,
                                               int64_t            incx,
                                               const double*      beta,
                                               double*            y,
                                               int64_t            incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgemv_64(hipblasHandle_t    handle,
                                               hipblasOperation_t trans,
                                               int64_t            m,
                                               int64_t            n,
                                               const hipComplex*  alpha,
                                               const hipComplex*  AP,
                                               int64_t            lda,
                                               const hipComplex*  x,
                                               int64_t            incx,
                                               const hipComplex*  beta,
                                               hipComplex*        y,
                                               int64_t            incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgemv_64(hipblasHandle_t         handle,
                                               hipblasOperation_t      trans,
                                               int64_t                 m,
                                               int64_t                 n,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* AP,
                                               int64_t                 lda,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               const hipDoubleComplex* beta,
                                               hipDoubleComplex*       y,
                                               int64_t                 incy);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The gemvBatched functions perform a batch of matrix-vector operations:

        y_i := alpha*A_i*x_i    + beta*y_i,   or
        y_i := alpha*A_i**T*x_i + beta*y_i,   or
        y_i := alpha*A_i**H*x_i + beta*y_i,

    where ``(A_i, x_i, y_i)`` is the ``i``-th instance of the batch,
    ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``m`` by ``n`` matrix, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle      [hipblasHandle_t]
                handle to the hipBLAS library context queue.
    @param[in]
    trans       [hipblasOperation_t]
                indicates whether matrices A_i are tranposed (conjugated) or not.
    @param[in]
    m           [int]
                number of rows of each matrix A_i.
    @param[in]
    n           [int]
                number of columns of each matrix A_i.
    @param[in]
    alpha       device pointer or host pointer to scalar alpha.
    @param[in]
    AP         device array of device pointers storing each matrix A_i.
    @param[in]
    lda         [int]
                specifies the leading dimension of each matrix A_i.
    @param[in]
    x           device array of device pointers storing each vector x_i.
    @param[in]
    incx        [int]
                specifies the increment for the elements of each vector x_i.
    @param[in]
    beta        device pointer or host pointer to scalar beta.
    @param[inout]
    y           device array of device pointers storing each vector y_i.
    @param[in]
    incy        [int]
                specifies the increment for the elements of each vector y_i.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSgemvBatched(hipblasHandle_t    handle,
                                                   hipblasOperation_t trans,
                                                   int                m,
                                                   int                n,
                                                   const float*       alpha,
                                                   const float* const AP[],
                                                   int                lda,
                                                   const float* const x[],
                                                   int                incx,
                                                   const float*       beta,
                                                   float* const       y[],
                                                   int                incy,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgemvBatched(hipblasHandle_t     handle,
                                                   hipblasOperation_t  trans,
                                                   int                 m,
                                                   int                 n,
                                                   const double*       alpha,
                                                   const double* const AP[],
                                                   int                 lda,
                                                   const double* const x[],
                                                   int                 incx,
                                                   const double*       beta,
                                                   double* const       y[],
                                                   int                 incy,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgemvBatched(hipblasHandle_t         handle,
                                                   hipblasOperation_t      trans,
                                                   int                     m,
                                                   int                     n,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const AP[],
                                                   int                     lda,
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   const hipComplex*       beta,
                                                   hipComplex* const       y[],
                                                   int                     incy,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgemvBatched(hipblasHandle_t               handle,
                                                   hipblasOperation_t            trans,
                                                   int                           m,
                                                   int                           n,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const AP[],
                                                   int                           lda,
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   const hipDoubleComplex*       beta,
                                                   hipDoubleComplex* const       y[],
                                                   int                           incy,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSgemvBatched_64(hipblasHandle_t    handle,
                                                      hipblasOperation_t trans,
                                                      int64_t            m,
                                                      int64_t            n,
                                                      const float*       alpha,
                                                      const float* const AP[],
                                                      int64_t            lda,
                                                      const float* const x[],
                                                      int64_t            incx,
                                                      const float*       beta,
                                                      float* const       y[],
                                                      int64_t            incy,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgemvBatched_64(hipblasHandle_t     handle,
                                                      hipblasOperation_t  trans,
                                                      int64_t             m,
                                                      int64_t             n,
                                                      const double*       alpha,
                                                      const double* const AP[],
                                                      int64_t             lda,
                                                      const double* const x[],
                                                      int64_t             incx,
                                                      const double*       beta,
                                                      double* const       y[],
                                                      int64_t             incy,
                                                      int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgemvBatched_64(hipblasHandle_t         handle,
                                                      hipblasOperation_t      trans,
                                                      int64_t                 m,
                                                      int64_t                 n,
                                                      const hipComplex*       alpha,
                                                      const hipComplex* const AP[],
                                                      int64_t                 lda,
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      const hipComplex*       beta,
                                                      hipComplex* const       y[],
                                                      int64_t                 incy,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgemvBatched_64(hipblasHandle_t               handle,
                                                      hipblasOperation_t            trans,
                                                      int64_t                       m,
                                                      int64_t                       n,
                                                      const hipDoubleComplex*       alpha,
                                                      const hipDoubleComplex* const AP[],
                                                      int64_t                       lda,
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      const hipDoubleComplex*       beta,
                                                      hipDoubleComplex* const       y[],
                                                      int64_t                       incy,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The gemvStridedBatched functions perform a batch of matrix-vector operations:

        y_i := alpha*A_i*x_i    + beta*y_i,   or
        y_i := alpha*A_i**T*x_i + beta*y_i,   or
        y_i := alpha*A_i**H*x_i + beta*y_i,

    where ``(A_i, x_i, y_i)`` is the ``i``-th instance of the batch,
    ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``m`` by ``n`` matrix, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle      [hipblasHandle_t]
                handle to the hipBLAS library context queue.
    @param[in]
    transA      [hipblasOperation_t]
                indicates whether matrices A_i are tranposed (conjugated) or not.
    @param[in]
    m           [int]
                number of rows of matrices A_i.
    @param[in]
    n           [int]
                number of columns of matrices A_i.
    @param[in]
    alpha       device pointer or host pointer to scalar alpha.
    @param[in]
    AP          device pointer to the first matrix (A_1) in the batch.
    @param[in]
    lda         [int]
                specifies the leading dimension of matrices A_i.
    @param[in]
    strideA     [hipblasStride]
                stride from the start of one matrix (A_i) to the next one (A_i+1).
    @param[in]
    x           device pointer to the first vector (x_1) in the batch.
    @param[in]
    incx        [int]
                specifies the increment for the elements of vectors x_i.
    @param[in]
    stridex     [hipblasStride]
                stride from the start of one vector (x_i) to the next one (x_i+1).
                There are no restrictions placed on stridex. However, the user should
                ensure that stridex is of an appropriate size. When trans equals HIPBLAS_OP_N,
                this typically means stridex >= n * incx. Otherwise, stridex >= m * incx.
    @param[in]
    beta        device pointer or host pointer to scalar beta.
    @param[inout]
    y           device pointer to the first vector (y_1) in the batch.
    @param[in]
    incy        [int]
                specifies the increment for the elements of vectors y_i.
    @param[in]
    stridey     [hipblasStride]
                stride from the start of one vector (y_i) to the next one (y_i+1).
                There are no restrictions placed on stridey. However, the user should
                ensure that stridey is of an appropriate size. When trans equals HIPBLAS_OP_N,
                this typically means stridey >= m * incy. Otherwise, stridey >= n * incy. stridey should be non zero.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSgemvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasOperation_t transA,
                                                          int                m,
                                                          int                n,
                                                          const float*       alpha,
                                                          const float*       AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          const float*       x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          const float*       beta,
                                                          float*             y,
                                                          int                incy,
                                                          hipblasStride      stridey,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgemvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasOperation_t transA,
                                                          int                m,
                                                          int                n,
                                                          const double*      alpha,
                                                          const double*      AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          const double*      x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          const double*      beta,
                                                          double*            y,
                                                          int                incy,
                                                          hipblasStride      stridey,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgemvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasOperation_t transA,
                                                          int                m,
                                                          int                n,
                                                          const hipComplex*  alpha,
                                                          const hipComplex*  AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          const hipComplex*  x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          const hipComplex*  beta,
                                                          hipComplex*        y,
                                                          int                incy,
                                                          hipblasStride      stridey,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgemvStridedBatched(hipblasHandle_t         handle,
                                                          hipblasOperation_t      transA,
                                                          int                     m,
                                                          int                     n,
                                                          const hipDoubleComplex* alpha,
                                                          const hipDoubleComplex* AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          const hipDoubleComplex* beta,
                                                          hipDoubleComplex*       y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSgemvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasOperation_t transA,
                                                             int64_t            m,
                                                             int64_t            n,
                                                             const float*       alpha,
                                                             const float*       AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             const float*       x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             const float*       beta,
                                                             float*             y,
                                                             int64_t            incy,
                                                             hipblasStride      stridey,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgemvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasOperation_t transA,
                                                             int64_t            m,
                                                             int64_t            n,
                                                             const double*      alpha,
                                                             const double*      AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             const double*      x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             const double*      beta,
                                                             double*            y,
                                                             int64_t            incy,
                                                             hipblasStride      stridey,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgemvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasOperation_t transA,
                                                             int64_t            m,
                                                             int64_t            n,
                                                             const hipComplex*  alpha,
                                                             const hipComplex*  AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             const hipComplex*  x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             const hipComplex*  beta,
                                                             hipComplex*        y,
                                                             int64_t            incy,
                                                             hipblasStride      stridey,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgemvStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasOperation_t      transA,
                                                             int64_t                 m,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* alpha,
                                                             const hipDoubleComplex* AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             const hipDoubleComplex* beta,
                                                             hipDoubleComplex*       y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief  <b> BLAS Level 2 API </b>

    \details
    The ger, geru, and gerc functions perform the matrix-vector operations:

        A := A + alpha*x*y**T , OR
        A := A + alpha*x*y**H for gerc

    where ``alpha`` is a scalar, ``x`` and ``y`` are vectors, and ``A`` is an
    ``m`` by ``n`` matrix.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    m         [int]
              the number of rows of the matrix A.
    @param[in]
    n         [int]
              the number of columns of the matrix A.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    y         device pointer storing vector y.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.
    @param[inout]
    AP         device pointer storing matrix A.
    @param[in]
    lda       [int]
              specifies the leading dimension of A.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSger(hipblasHandle_t handle,
                                           int             m,
                                           int             n,
                                           const float*    alpha,
                                           const float*    x,
                                           int             incx,
                                           const float*    y,
                                           int             incy,
                                           float*          AP,
                                           int             lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasDger(hipblasHandle_t handle,
                                           int             m,
                                           int             n,
                                           const double*   alpha,
                                           const double*   x,
                                           int             incx,
                                           const double*   y,
                                           int             incy,
                                           double*         AP,
                                           int             lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgeru(hipblasHandle_t   handle,
                                            int               m,
                                            int               n,
                                            const hipComplex* alpha,
                                            const hipComplex* x,
                                            int               incx,
                                            const hipComplex* y,
                                            int               incy,
                                            hipComplex*       AP,
                                            int               lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgerc(hipblasHandle_t   handle,
                                            int               m,
                                            int               n,
                                            const hipComplex* alpha,
                                            const hipComplex* x,
                                            int               incx,
                                            const hipComplex* y,
                                            int               incy,
                                            hipComplex*       AP,
                                            int               lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgeru(hipblasHandle_t         handle,
                                            int                     m,
                                            int                     n,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            const hipDoubleComplex* y,
                                            int                     incy,
                                            hipDoubleComplex*       AP,
                                            int                     lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgerc(hipblasHandle_t         handle,
                                            int                     m,
                                            int                     n,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            const hipDoubleComplex* y,
                                            int                     incy,
                                            hipDoubleComplex*       AP,
                                            int                     lda);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSger_64(hipblasHandle_t handle,
                                              int64_t         m,
                                              int64_t         n,
                                              const float*    alpha,
                                              const float*    x,
                                              int64_t         incx,
                                              const float*    y,
                                              int64_t         incy,
                                              float*          AP,
                                              int64_t         lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasDger_64(hipblasHandle_t handle,
                                              int64_t         m,
                                              int64_t         n,
                                              const double*   alpha,
                                              const double*   x,
                                              int64_t         incx,
                                              const double*   y,
                                              int64_t         incy,
                                              double*         AP,
                                              int64_t         lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgeru_64(hipblasHandle_t   handle,
                                               int64_t           m,
                                               int64_t           n,
                                               const hipComplex* alpha,
                                               const hipComplex* x,
                                               int64_t           incx,
                                               const hipComplex* y,
                                               int64_t           incy,
                                               hipComplex*       AP,
                                               int64_t           lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgerc_64(hipblasHandle_t   handle,
                                               int64_t           m,
                                               int64_t           n,
                                               const hipComplex* alpha,
                                               const hipComplex* x,
                                               int64_t           incx,
                                               const hipComplex* y,
                                               int64_t           incy,
                                               hipComplex*       AP,
                                               int64_t           lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgeru_64(hipblasHandle_t         handle,
                                               int64_t                 m,
                                               int64_t                 n,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               const hipDoubleComplex* y,
                                               int64_t                 incy,
                                               hipDoubleComplex*       AP,
                                               int64_t                 lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgerc_64(hipblasHandle_t         handle,
                                               int64_t                 m,
                                               int64_t                 n,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               const hipDoubleComplex* y,
                                               int64_t                 incy,
                                               hipDoubleComplex*       AP,
                                               int64_t                 lda);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The gerBatched, geruBatched, and gercBatched functions perform a batch of the matrix-vector operations:

        A := A + alpha*x*y**T , OR
        A := A + alpha*x*y**H for gerc

    where ``(A_i, x_i, y_i)`` is the ``i``-th instance of the batch,
    ``alpha`` is a scalar, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``m`` by ``n`` matrix, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    m         [int]
              the number of rows of each matrix A_i.
    @param[in]
    n         [int]
              the number of columns of each matrix A_i.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each vector x_i.
    @param[in]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each vector y_i.
    @param[inout]
    AP        device array of device pointers storing each matrix A_i.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSgerBatched(hipblasHandle_t    handle,
                                                  int                m,
                                                  int                n,
                                                  const float*       alpha,
                                                  const float* const x[],
                                                  int                incx,
                                                  const float* const y[],
                                                  int                incy,
                                                  float* const       AP[],
                                                  int                lda,
                                                  int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgerBatched(hipblasHandle_t     handle,
                                                  int                 m,
                                                  int                 n,
                                                  const double*       alpha,
                                                  const double* const x[],
                                                  int                 incx,
                                                  const double* const y[],
                                                  int                 incy,
                                                  double* const       AP[],
                                                  int                 lda,
                                                  int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgeruBatched(hipblasHandle_t         handle,
                                                   int                     m,
                                                   int                     n,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   const hipComplex* const y[],
                                                   int                     incy,
                                                   hipComplex* const       AP[],
                                                   int                     lda,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgercBatched(hipblasHandle_t         handle,
                                                   int                     m,
                                                   int                     n,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   const hipComplex* const y[],
                                                   int                     incy,
                                                   hipComplex* const       AP[],
                                                   int                     lda,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgeruBatched(hipblasHandle_t               handle,
                                                   int                           m,
                                                   int                           n,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   const hipDoubleComplex* const y[],
                                                   int                           incy,
                                                   hipDoubleComplex* const       AP[],
                                                   int                           lda,
                                                   int                           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgercBatched(hipblasHandle_t               handle,
                                                   int                           m,
                                                   int                           n,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   const hipDoubleComplex* const y[],
                                                   int                           incy,
                                                   hipDoubleComplex* const       AP[],
                                                   int                           lda,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSgerBatched_64(hipblasHandle_t    handle,
                                                     int64_t            m,
                                                     int64_t            n,
                                                     const float*       alpha,
                                                     const float* const x[],
                                                     int64_t            incx,
                                                     const float* const y[],
                                                     int64_t            incy,
                                                     float* const       AP[],
                                                     int64_t            lda,
                                                     int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgerBatched_64(hipblasHandle_t     handle,
                                                     int64_t             m,
                                                     int64_t             n,
                                                     const double*       alpha,
                                                     const double* const x[],
                                                     int64_t             incx,
                                                     const double* const y[],
                                                     int64_t             incy,
                                                     double* const       AP[],
                                                     int64_t             lda,
                                                     int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgeruBatched_64(hipblasHandle_t         handle,
                                                      int64_t                 m,
                                                      int64_t                 n,
                                                      const hipComplex*       alpha,
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      const hipComplex* const y[],
                                                      int64_t                 incy,
                                                      hipComplex* const       AP[],
                                                      int64_t                 lda,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgercBatched_64(hipblasHandle_t         handle,
                                                      int64_t                 m,
                                                      int64_t                 n,
                                                      const hipComplex*       alpha,
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      const hipComplex* const y[],
                                                      int64_t                 incy,
                                                      hipComplex* const       AP[],
                                                      int64_t                 lda,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgeruBatched_64(hipblasHandle_t               handle,
                                                      int64_t                       m,
                                                      int64_t                       n,
                                                      const hipDoubleComplex*       alpha,
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      const hipDoubleComplex* const y[],
                                                      int64_t                       incy,
                                                      hipDoubleComplex* const       AP[],
                                                      int64_t                       lda,
                                                      int64_t                       batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgercBatched_64(hipblasHandle_t               handle,
                                                      int64_t                       m,
                                                      int64_t                       n,
                                                      const hipDoubleComplex*       alpha,
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      const hipDoubleComplex* const y[],
                                                      int64_t                       incy,
                                                      hipDoubleComplex* const       AP[],
                                                      int64_t                       lda,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The gerStridedBatched, geruStridedBatched, and gercStridedBatched functions perform the matrix-vector operations:

        A_i := A_i + alpha*x_i*y_i**T, OR
        A_i := A_i + alpha*x_i*y_i**H  for gerc

    where ``(A_i, x_i, y_i)`` is the ``i``-th instance of the batch,
    ``alpha`` is a scalar, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``m`` by ``n`` matrix, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    m         [int]
              the number of rows of each matrix A_i.
    @param[in]
    n         [int]
              the number of columns of each matrix A_i.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer to the first vector (x_1) in the batch.
    @param[in]
    incx      [int]
              specifies the increments for the elements of each vector x_i.
    @param[in]
    stridex   [hipblasStride]
              stride from the start of one vector (x_i) to the next one (x_i+1).
              There are no restrictions placed on stridex. However, the user should
              ensure that stridex is of an appropriate size. For a typical
              case, this means stridex >= m * incx.
    @param[inout]
    y         device pointer to the first vector (y_1) in the batch.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each vector y_i.
    @param[in]
    stridey   [hipblasStride]
              stride from the start of one vector (y_i) to the next one (y_i+1).
              There are no restrictions placed on stridey. However, the user should
              ensure that stridey is of an appropriate size. For a typical
              case, this means stridey >= n * incy.
    @param[inout]
    AP        device pointer to the first matrix (A_1) in the batch.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i.
    @param[in]
    strideA     [hipblasStride]
                stride from the start of one matrix (A_i) to the next one (A_i+1)
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSgerStridedBatched(hipblasHandle_t handle,
                                                         int             m,
                                                         int             n,
                                                         const float*    alpha,
                                                         const float*    x,
                                                         int             incx,
                                                         hipblasStride   stridex,
                                                         const float*    y,
                                                         int             incy,
                                                         hipblasStride   stridey,
                                                         float*          AP,
                                                         int             lda,
                                                         hipblasStride   strideA,
                                                         int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgerStridedBatched(hipblasHandle_t handle,
                                                         int             m,
                                                         int             n,
                                                         const double*   alpha,
                                                         const double*   x,
                                                         int             incx,
                                                         hipblasStride   stridex,
                                                         const double*   y,
                                                         int             incy,
                                                         hipblasStride   stridey,
                                                         double*         AP,
                                                         int             lda,
                                                         hipblasStride   strideA,
                                                         int             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgeruStridedBatched(hipblasHandle_t   handle,
                                                          int               m,
                                                          int               n,
                                                          const hipComplex* alpha,
                                                          const hipComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const hipComplex* y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          hipComplex*       AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgercStridedBatched(hipblasHandle_t   handle,
                                                          int               m,
                                                          int               n,
                                                          const hipComplex* alpha,
                                                          const hipComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const hipComplex* y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          hipComplex*       AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgeruStridedBatched(hipblasHandle_t         handle,
                                                          int                     m,
                                                          int                     n,
                                                          const hipDoubleComplex* alpha,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          const hipDoubleComplex* y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          hipDoubleComplex*       AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgercStridedBatched(hipblasHandle_t         handle,
                                                          int                     m,
                                                          int                     n,
                                                          const hipDoubleComplex* alpha,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          const hipDoubleComplex* y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          hipDoubleComplex*       AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSgerStridedBatched_64(hipblasHandle_t handle,
                                                            int64_t         m,
                                                            int64_t         n,
                                                            const float*    alpha,
                                                            const float*    x,
                                                            int64_t         incx,
                                                            hipblasStride   stridex,
                                                            const float*    y,
                                                            int64_t         incy,
                                                            hipblasStride   stridey,
                                                            float*          AP,
                                                            int64_t         lda,
                                                            hipblasStride   strideA,
                                                            int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgerStridedBatched_64(hipblasHandle_t handle,
                                                            int64_t         m,
                                                            int64_t         n,
                                                            const double*   alpha,
                                                            const double*   x,
                                                            int64_t         incx,
                                                            hipblasStride   stridex,
                                                            const double*   y,
                                                            int64_t         incy,
                                                            hipblasStride   stridey,
                                                            double*         AP,
                                                            int64_t         lda,
                                                            hipblasStride   strideA,
                                                            int64_t         batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgeruStridedBatched_64(hipblasHandle_t   handle,
                                                             int64_t           m,
                                                             int64_t           n,
                                                             const hipComplex* alpha,
                                                             const hipComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const hipComplex* y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             hipComplex*       AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgercStridedBatched_64(hipblasHandle_t   handle,
                                                             int64_t           m,
                                                             int64_t           n,
                                                             const hipComplex* alpha,
                                                             const hipComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const hipComplex* y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             hipComplex*       AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgeruStridedBatched_64(hipblasHandle_t         handle,
                                                             int64_t                 m,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* alpha,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             const hipDoubleComplex* y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             hipDoubleComplex*       AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgercStridedBatched_64(hipblasHandle_t         handle,
                                                             int64_t                 m,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* alpha,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             const hipDoubleComplex* y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             hipDoubleComplex*       AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The hbmv functions perform the matrix-vector operations:

        y := alpha*A*x + beta*y

    where ``alpha`` and ``beta`` are scalars, ``x`` and ``y`` are ``n``-element vectors, and ``A ``is an
    ``n`` by ``n`` Hermitian band matrix with ``k`` super-diagonals.

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : ``c`` and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is being supplied.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is being supplied.
    @param[in]
    n         [int]
              the order of the matrix A.
    @param[in]
    k         [int]
              the number of super-diagonals of the matrix A. Must be >= 0.
    @param[in]
    alpha     device pointer or host pointer to scalar alpha.
    @param[in]
    AP        device pointer storing matrix A. Of dimension (lda, n).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The leading (k + 1) by n part of A must contain the upper
                triangular band part of the Hermitian matrix, with the leading
                diagonal in row (k + 1), the first super-diagonal on the RHS
                of row k, and so forth.
                The top left k by x triangle of A will not be referenced.  
                    Ex (upper, lda = n = 4, k = 1):  
                    A ->                            Represented matrix  
                    (0,0) (5,9) (6,8) (7,7) ->       (1, 0) (5, 9) (0, 0) (0, 0)  
                    (1,0) (2,0) (3,0) (4,0) ->      (5,-9) (2, 0) (6, 8) (0, 0)  
                    (0,0) (0,0) (0,0) (0,0) ->      (0, 0) (6,-8) (3, 0) (7, 7)  
                    (0,0) (0,0) (0,0) (0,0) ->      (0, 0) (0, 0) (7,-7) (4, 0)  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The leading (k + 1) by n part of A must contain the lower
                triangular band part of the Hermitian matrix, with the leading
                diagonal in row (1), the first sub-diagonal on the LHS of
                row 2, and so forth.
                The bottom right k by k triangle of A will not be referenced.  
                    Ex (lower, lda = 2, n = 4, k = 1):  
                    A ->                              Represented matrix  
                    (1,0) (2,0) (3,0) (4,0) ->        (1, 0) (5,-9) (0, 0) (0, 0)  
                    (5,9) (6,8) (7,7) (0,0) ->        (5, 9) (2, 0) (6,-8) (0, 0)  
                    ->                               (0, 0) (6, 8) (3, 0) (7,-7)  
                    ->                                (0, 0) (0, 0) (7, 7) (4, 0)  
              - As a Hermitian matrix, the imaginary part of the main diagonal
              of A will not be referenced and is assumed to be == 0.
    @param[in]
    lda       [int]
              specifies the leading dimension of A. Must be >= k + 1.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[inout]
    y         device pointer storing vector y.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChbmv(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            int               k,
                                            const hipComplex* alpha,
                                            const hipComplex* AP,
                                            int               lda,
                                            const hipComplex* x,
                                            int               incx,
                                            const hipComplex* beta,
                                            hipComplex*       y,
                                            int               incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhbmv(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            int                     n,
                                            int                     k,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* AP,
                                            int                     lda,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            const hipDoubleComplex* beta,
                                            hipDoubleComplex*       y,
                                            int                     incy);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChbmv_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               int64_t           k,
                                               const hipComplex* alpha,
                                               const hipComplex* AP,
                                               int64_t           lda,
                                               const hipComplex* x,
                                               int64_t           incx,
                                               const hipComplex* beta,
                                               hipComplex*       y,
                                               int64_t           incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhbmv_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               int64_t                 n,
                                               int64_t                 k,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* AP,
                                               int64_t                 lda,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               const hipDoubleComplex* beta,
                                               hipDoubleComplex*       y,
                                               int64_t                 incy);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The hbmvBatched functions perform one of the matrix-vector operations:

        y_i := alpha*A_i*x_i + beta*y_i

    where ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are ``n``-element vectors, and ``A_i`` is an
    ``n`` by ``n`` Hermitian band matrix with ``k`` super-diagonals, for each batch in ``i`` = [1, ``batchCount``].

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is being supplied.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is being supplied.
    @param[in]
    n         [int]
              the order of each matrix A_i.
    @param[in]
    k         [int]
              the number of super-diagonals of each matrix A_i. Must be >= 0.
    @param[in]
    alpha     device pointer or host pointer to scalar alpha.
    @param[in]
    AP        device array of device pointers storing each matrix_i A of dimension (lda, n).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The leading (k + 1) by n part of each A_i must contain the upper
                triangular band part of the Hermitian matrix, with the leading
                diagonal in row (k + 1), the first super-diagonal on the RHS
                of row k, and so forth.
                The top left k by x triangle of each A_i will not be referenced.  
                    Ex (upper, lda = n = 4, k = 1):  
                    A   ->                           Represented matrix  
                    (0,0) (5,9) (6,8) (7,7)  ->      (1, 0) (5, 9) (0, 0) (0, 0)  
                    (1,0) (2,0) (3,0) (4,0)  ->      (5,-9) (2, 0) (6, 8) (0, 0)  
                    (0,0) (0,0) (0,0) (0,0)  ->      (0, 0) (6,-8) (3, 0) (7, 7)  
                    (0,0) (0,0) (0,0) (0,0)  ->      (0, 0) (0, 0) (7,-7) (4, 0)  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The leading (k + 1) by n part of each A_i must contain the lower
                triangular band part of the Hermitian matrix, with the leading
                diagonal in row (1), the first sub-diagonal on the LHS of
                row 2, and so forth.
                The bottom right k by k triangle of each A_i will not be referenced.  
                    Ex (lower, lda = 2, n = 4, k = 1):  
                    A   ->                             Represented matrix  
                    (1,0) (2,0) (3,0) (4,0)  ->        (1, 0) (5,-9) (0, 0) (0, 0)  
                    (5,9) (6,8) (7,7) (0,0)  ->        (5, 9) (2, 0) (6,-8) (0, 0)  
                    ->                                (0, 0) (6, 8) (3, 0) (7,-7)  
                    ->                                (0, 0) (0, 0) (7, 7) (4, 0)
              - As a Hermitian matrix, the imaginary part of the main diagonal
              of each A_i will not be referenced and is assumed to be == 0.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i. Must be >= max(1, n).
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[inout]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChbmvBatched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   int                     n,
                                                   int                     k,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const AP[],
                                                   int                     lda,
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   const hipComplex*       beta,
                                                   hipComplex* const       y[],
                                                   int                     incy,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhbmvBatched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   int                           n,
                                                   int                           k,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const AP[],
                                                   int                           lda,
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   const hipDoubleComplex*       beta,
                                                   hipDoubleComplex* const       y[],
                                                   int                           incy,
                                                   int                           batchCount);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChbmvBatched_64(hipblasHandle_t         handle,
                                                      hipblasFillMode_t       uplo,
                                                      int64_t                 n,
                                                      int64_t                 k,
                                                      const hipComplex*       alpha,
                                                      const hipComplex* const AP[],
                                                      int64_t                 lda,
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      const hipComplex*       beta,
                                                      hipComplex* const       y[],
                                                      int64_t                 incy,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhbmvBatched_64(hipblasHandle_t               handle,
                                                      hipblasFillMode_t             uplo,
                                                      int64_t                       n,
                                                      int64_t                       k,
                                                      const hipDoubleComplex*       alpha,
                                                      const hipDoubleComplex* const AP[],
                                                      int64_t                       lda,
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      const hipDoubleComplex*       beta,
                                                      hipDoubleComplex* const       y[],
                                                      int64_t                       incy,
                                                      int64_t                       batchCount);

//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The hbmvStridedBatched functions perform one of the matrix-vector operations:

        y_i := alpha*A_i*x_i + beta*y_i

    where ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are ``n``-element vectors, and ``A_i`` is an
    ``n`` by ``n`` Hermitian band matrix with ``k`` super-diagonals, for each batch in ``i`` = [1, ``batchCount``].

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is being supplied.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is being supplied.
    @param[in]
    n         [int]
              the order of each matrix A_i.
    @param[in]
    k         [int]
              the number of super-diagonals of each matrix A_i. Must be >= 0.
    @param[in]
    alpha     device pointer or host pointer to scalar alpha.
    @param[in]
    AP        device array pointing to the first matrix A_1. Each A_i is of dimension (lda, n).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The leading (k + 1) by n part of each A_i must contain the upper
                triangular band part of the Hermitian matrix, with the leading
                diagonal in row (k + 1), the first super-diagonal on the RHS
                of row k, and so forth.
                The top left k by x triangle of each A_i will not be referenced.  
                    Ex (upper, lda = n = 4, k = 1):  
                    A   ->                           Represented matrix  
                    (0,0) (5,9) (6,8) (7,7)  ->      (1, 0) (5, 9) (0, 0) (0, 0)  
                    (1,0) (2,0) (3,0) (4,0)  ->      (5,-9) (2, 0) (6, 8) (0, 0)  
                    (0,0) (0,0) (0,0) (0,0) ->       (0, 0) (6,-8) (3, 0) (7, 7)  
                    (0,0) (0,0) (0,0) (0,0)  ->      (0, 0) (0, 0) (7,-7) (4, 0)
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The leading (k + 1) by n part of each A_i must contain the lower
                triangular band part of the Hermitian matrix, with the leading
                diagonal in row (1), the first sub-diagonal on the LHS of
                row 2, and so forth.
                The bottom right k by k triangle of each A_i will not be referenced.    
                    Ex (lower, lda = 2, n = 4, k = 1):  
                    A                               Represented matrix  
                    (1,0) (2,0) (3,0) (4,0)   ->       (1, 0) (5,-9) (0, 0) (0, 0)  
                    (5,9) (6,8) (7,7) (0,0)  ->        (5, 9) (2, 0) (6,-8) (0, 0)  
                    ->                               (0, 0) (6, 8) (3, 0) (7,-7)  
                    ->                              (0, 0) (0, 0) (7, 7) (4, 0)  
              - As a Hermitian matrix, the imaginary part of the main diagonal
              of each A_i will not be referenced and is assumed to be == 0.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i. Must be >= max(1, n).
    @param[in]
    strideA  [hipblasStride]
              stride from the start of one matrix (A_i) to the next one (A_i+1).
    @param[in]
    x         device array pointing to the first vector y_1.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex  [hipblasStride]
              stride from the start of one vector (x_i) to the next one (x_i+1).
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[inout]
    y         device array pointing to the first vector y_1.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.
    @param[in]
    stridey  [hipblasStride]
              stride from the start of one vector (y_i) to the next one (y_i+1).
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChbmvStridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          int               k,
                                                          const hipComplex* alpha,
                                                          const hipComplex* AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          const hipComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const hipComplex* beta,
                                                          hipComplex*       y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhbmvStridedBatched(hipblasHandle_t         handle,
                                                          hipblasFillMode_t       uplo,
                                                          int                     n,
                                                          int                     k,
                                                          const hipDoubleComplex* alpha,
                                                          const hipDoubleComplex* AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          const hipDoubleComplex* beta,
                                                          hipDoubleComplex*       y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          int                     batchCount);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChbmvStridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             int64_t           k,
                                                             const hipComplex* alpha,
                                                             const hipComplex* AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             const hipComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const hipComplex* beta,
                                                             hipComplex*       y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhbmvStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasFillMode_t       uplo,
                                                             int64_t                 n,
                                                             int64_t                 k,
                                                             const hipDoubleComplex* alpha,
                                                             const hipDoubleComplex* AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             const hipDoubleComplex* beta,
                                                             hipDoubleComplex*       y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             int64_t                 batchCount);

//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The hemv functions perform one of the matrix-vector operations:

        y := alpha*A*x + beta*y

    where ``alpha`` and ``beta`` are scalars, ``x`` and ``y`` are ``n``-element vectors, and ``A`` is an
    ``n`` by ``n`` Hermitian matrix.

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : ``c`` and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              - HIPBLAS_FILL_MODE_UPPER: the upper triangular part of the Hermitian matrix A is supplied.
              - HIPBLAS_FILL_MODE_LOWER: the lower triangular part of the Hermitian matrix A is supplied.
    @param[in]
    n         [int]
              the order of the matrix A.
    @param[in]
    alpha     device pointer or host pointer to scalar alpha.
    @param[in]
    AP        device pointer storing matrix A. Of dimension (lda, n).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular part of A must contain
                the upper triangular part of a Hermitian matrix. The lower
                triangular part of A will not be referenced.
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular part of A must contain
                the lower triangular part of a Hermitian matrix. The upper
                triangular part of A will not be referenced.
              - As a Hermitian matrix, the imaginary part of the main diagonal
              of A will not be referenced and is assumed to be == 0.
    @param[in]
    lda       [int]
              specifies the leading dimension of A. Must be >= max(1, n).
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[inout]
    y         device pointer storing vector y.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChemv(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            const hipComplex* alpha,
                                            const hipComplex* AP,
                                            int               lda,
                                            const hipComplex* x,
                                            int               incx,
                                            const hipComplex* beta,
                                            hipComplex*       y,
                                            int               incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhemv(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            int                     n,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* AP,
                                            int                     lda,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            const hipDoubleComplex* beta,
                                            hipDoubleComplex*       y,
                                            int                     incy);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChemv_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               const hipComplex* alpha,
                                               const hipComplex* AP,
                                               int64_t           lda,
                                               const hipComplex* x,
                                               int64_t           incx,
                                               const hipComplex* beta,
                                               hipComplex*       y,
                                               int64_t           incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhemv_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               int64_t                 n,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* AP,
                                               int64_t                 lda,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               const hipDoubleComplex* beta,
                                               hipDoubleComplex*       y,
                                               int64_t                 incy);

//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The hemvBatched functions perform one of the matrix-vector operations:

        y_i := alpha*A_i*x_i + beta*y_i

    where ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are ``n``-element vectors, and ``A_i`` is an
    ``n`` by ``n`` Hermitian matrix, for each batch in ``i`` = [1, ``batchCount``].

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              - HIPBLAS_FILL_MODE_UPPER: the upper triangular part of the Hermitian matrix A is supplied.
              - HIPBLAS_FILL_MODE_LOWER: the lower triangular part of the Hermitian matrix A is supplied.
    @param[in]
    n         [int]
              the order of each matrix A_i.
    @param[in]
    alpha     device pointer or host pointer to scalar alpha.
    @param[in]
    AP        device array of device pointers storing each matrix A_i of dimension (lda, n).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular part of each A_i must contain
                the upper triangular part of a Hermitian matrix. The lower
                triangular part of each A_i will not be referenced.
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular part of each A_i must contain
                the lower triangular part of a Hermitian matrix. The upper
                triangular part of each A_i will not be referenced.
              - As a Hermitian matrix, the imaginary part of the main diagonal
              of each A_i will not be referenced and is assumed to be == 0.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i. Must be >= max(1, n).
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[inout]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChemvBatched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   int                     n,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const AP[],
                                                   int                     lda,
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   const hipComplex*       beta,
                                                   hipComplex* const       y[],
                                                   int                     incy,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhemvBatched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   int                           n,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const AP[],
                                                   int                           lda,
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   const hipDoubleComplex*       beta,
                                                   hipDoubleComplex* const       y[],
                                                   int                           incy,
                                                   int                           batchCount);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChemvBatched_64(hipblasHandle_t         handle,
                                                      hipblasFillMode_t       uplo,
                                                      int64_t                 n,
                                                      const hipComplex*       alpha,
                                                      const hipComplex* const AP[],
                                                      int64_t                 lda,
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      const hipComplex*       beta,
                                                      hipComplex* const       y[],
                                                      int64_t                 incy,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhemvBatched_64(hipblasHandle_t               handle,
                                                      hipblasFillMode_t             uplo,
                                                      int64_t                       n,
                                                      const hipDoubleComplex*       alpha,
                                                      const hipDoubleComplex* const AP[],
                                                      int64_t                       lda,
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      const hipDoubleComplex*       beta,
                                                      hipDoubleComplex* const       y[],
                                                      int64_t                       incy,
                                                      int64_t                       batchCount);

//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The hemvStridedBatched functions perform one of the matrix-vector operations:

        y_i := alpha*A_i*x_i + beta*y_i

    where ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are ``n``-element vectors, and ``A_i`` is an
    ``n`` by ``n`` Hermitian matrix, for each batch in ``i`` = [1, ``batchCount``].

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              - HIPBLAS_FILL_MODE_UPPER: the upper triangular part of the Hermitian matrix A is supplied.
              - HIPBLAS_FILL_MODE_LOWER: the lower triangular part of the Hermitian matrix A is supplied.
    @param[in]
    n         [int]
              the order of each matrix A_i.
    @param[in]
    alpha     device pointer or host pointer to scalar alpha.
    @param[in]
    AP        device array of device pointers storing each matrix A_i of dimension (lda, n).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular part of each A_i must contain
                the upper triangular part of a Hermitian matrix. The lower
                triangular part of each A_i will not be referenced.
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular part of each A_i must contain
                the lower triangular part of a Hermitian matrix. The upper
                triangular part of each A_i will not be referenced.
              - As a Hermitian matrix, the imaginary part of the main diagonal
              of each A_i will not be referenced and is assumed to be == 0.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i. Must be >= max(1, n).
    @param[in]
    strideA    [hipblasStride]
                stride from the start of one (A_i) to the next (A_i+1).

    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex  [hipblasStride]
              stride from the start of one vector (x_i) to the next one (x_i+1).
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[inout]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.
    @param[in]
    stridey  [hipblasStride]
              stride from the start of one vector (y_i) to the next one (y_i+1).
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChemvStridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          const hipComplex* alpha,
                                                          const hipComplex* AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          const hipComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const hipComplex* beta,
                                                          hipComplex*       y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhemvStridedBatched(hipblasHandle_t         handle,
                                                          hipblasFillMode_t       uplo,
                                                          int                     n,
                                                          const hipDoubleComplex* alpha,
                                                          const hipDoubleComplex* AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          const hipDoubleComplex* beta,
                                                          hipDoubleComplex*       y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          int                     batchCount);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChemvStridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             const hipComplex* alpha,
                                                             const hipComplex* AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             const hipComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const hipComplex* beta,
                                                             hipComplex*       y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhemvStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasFillMode_t       uplo,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* alpha,
                                                             const hipDoubleComplex* AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             const hipDoubleComplex* beta,
                                                             hipDoubleComplex*       y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The her functions perform the matrix-vector operations:

        A := A + alpha*x*x**H

    where ``alpha`` is a real scalar, ``x`` is a vector, and ``A`` is an
    ``n`` by ``n`` Hermitian matrix.

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : ``c`` and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in A.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in A.
    @param[in]
    n         [int]
              the number of rows and columns of matrix A. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[inout]
    AP        device pointer storing the specified triangular portion of
              the Hermitian matrix A. Of size (lda * n).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of the Hermitian matrix A is supplied. The lower
                triangluar portion will not be touched.
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of the Hermitian matrix A is supplied. The upper
                triangular portion will not be touched.
              - Note that the imaginary parts of the diagonal elements are not accessed and are assumed
                to be 0.
    @param[in]
    lda       [int]
              specifies the leading dimension of A. Must be at least max(1, n).
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCher(hipblasHandle_t   handle,
                                           hipblasFillMode_t uplo,
                                           int               n,
                                           const float*      alpha,
                                           const hipComplex* x,
                                           int               incx,
                                           hipComplex*       AP,
                                           int               lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasZher(hipblasHandle_t         handle,
                                           hipblasFillMode_t       uplo,
                                           int                     n,
                                           const double*           alpha,
                                           const hipDoubleComplex* x,
                                           int                     incx,
                                           hipDoubleComplex*       AP,
                                           int                     lda);
//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCher_64(hipblasHandle_t   handle,
                                              hipblasFillMode_t uplo,
                                              int64_t           n,
                                              const float*      alpha,
                                              const hipComplex* x,
                                              int64_t           incx,
                                              hipComplex*       AP,
                                              int64_t           lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasZher_64(hipblasHandle_t         handle,
                                              hipblasFillMode_t       uplo,
                                              int64_t                 n,
                                              const double*           alpha,
                                              const hipDoubleComplex* x,
                                              int64_t                 incx,
                                              hipDoubleComplex*       AP,
                                              int64_t                 lda);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    herBatched performs the matrix-vector operations:

        A_i := A_i + alpha*x_i*x_i**H

    where ``alpha`` is a real scalar, ``x_i`` is a vector, and ``A_i`` is an
    ``n`` by ``n`` symmetric matrix, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in A.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in A.
    @param[in]
    n         [int]
              the number of rows and columns of each matrix A_i. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[inout]
    AP        device array of device pointers storing the specified triangular portion of
              each Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount.
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of each Hermitian matrix A_i is supplied. The lower triangular portion
                of each A_i will not be touched.
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of each Hermitian matrix A_i is supplied. The upper triangular portion
                of each A_i will not be touched.
              - Note that the imaginary parts of the diagonal elements are not accessed and are assumed
                to be 0.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i. Must be at least max(1, n).
    @param[in]
    batchCount [int]
                number of instances in the batch.
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCherBatched(hipblasHandle_t         handle,
                                                  hipblasFillMode_t       uplo,
                                                  int                     n,
                                                  const float*            alpha,
                                                  const hipComplex* const x[],
                                                  int                     incx,
                                                  hipComplex* const       AP[],
                                                  int                     lda,
                                                  int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherBatched(hipblasHandle_t               handle,
                                                  hipblasFillMode_t             uplo,
                                                  int                           n,
                                                  const double*                 alpha,
                                                  const hipDoubleComplex* const x[],
                                                  int                           incx,
                                                  hipDoubleComplex* const       AP[],
                                                  int                           lda,
                                                  int                           batchCount);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCherBatched_64(hipblasHandle_t         handle,
                                                     hipblasFillMode_t       uplo,
                                                     int64_t                 n,
                                                     const float*            alpha,
                                                     const hipComplex* const x[],
                                                     int64_t                 incx,
                                                     hipComplex* const       AP[],
                                                     int64_t                 lda,
                                                     int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherBatched_64(hipblasHandle_t               handle,
                                                     hipblasFillMode_t             uplo,
                                                     int64_t                       n,
                                                     const double*                 alpha,
                                                     const hipDoubleComplex* const x[],
                                                     int64_t                       incx,
                                                     hipDoubleComplex* const       AP[],
                                                     int64_t                       lda,
                                                     int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The herStridedBatched functions perform the matrix-vector operations:

        A_i := A_i + alpha*x_i*x_i**H

    where ``alpha`` is a real scalar, ``x_i`` is a vector, and ``A_i`` is an
    ``n`` by ``n`` Hermitian matrix, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in A.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in A.
    @param[in]
    n         [int]
              the number of rows and columns of each matrix A_i. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer pointing to the first vector (x_1).
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex  [hipblasStride]
              stride from the start of one vector (x_i) to the next one (x_i+1).
    @param[inout]
    AP        device array of device pointers storing the specified triangular portion of
              each Hermitian matrix A_i. Points to the first matrix (A_1).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of each Hermitian matrix A_i is supplied. The lower triangular
                portion of each A_i will not be touched.
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of each Hermitian matrix A_i is supplied. The upper triangular
                portion of each A_i will not be touched.
              - Note that the imaginary parts of the diagonal elements are not accessed and are assumed
                to be 0.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i.
    @param[in]
    strideA    [hipblasStride]
                stride from the start of one (A_i) to the next (A_i+1).
    @param[in]
    batchCount [int]
                number of instances in the batch.
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCherStridedBatched(hipblasHandle_t   handle,
                                                         hipblasFillMode_t uplo,
                                                         int               n,
                                                         const float*      alpha,
                                                         const hipComplex* x,
                                                         int               incx,
                                                         hipblasStride     stridex,
                                                         hipComplex*       AP,
                                                         int               lda,
                                                         hipblasStride     strideA,
                                                         int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherStridedBatched(hipblasHandle_t         handle,
                                                         hipblasFillMode_t       uplo,
                                                         int                     n,
                                                         const double*           alpha,
                                                         const hipDoubleComplex* x,
                                                         int                     incx,
                                                         hipblasStride           stridex,
                                                         hipDoubleComplex*       AP,
                                                         int                     lda,
                                                         hipblasStride           strideA,
                                                         int                     batchCount);
//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCherStridedBatched_64(hipblasHandle_t   handle,
                                                            hipblasFillMode_t uplo,
                                                            int64_t           n,
                                                            const float*      alpha,
                                                            const hipComplex* x,
                                                            int64_t           incx,
                                                            hipblasStride     stridex,
                                                            hipComplex*       AP,
                                                            int64_t           lda,
                                                            hipblasStride     strideA,
                                                            int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherStridedBatched_64(hipblasHandle_t         handle,
                                                            hipblasFillMode_t       uplo,
                                                            int64_t                 n,
                                                            const double*           alpha,
                                                            const hipDoubleComplex* x,
                                                            int64_t                 incx,
                                                            hipblasStride           stridex,
                                                            hipDoubleComplex*       AP,
                                                            int64_t                 lda,
                                                            hipblasStride           strideA,
                                                            int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The her2 functions perform the matrix-vector operations:

        A := A + alpha*x*y**H + conj(alpha)*y*x**H

    where ``alpha`` is a complex scalar, ``x`` and ``y`` are vectors, and ``A`` is an
    n by n Hermitian matrix.

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : ``c`` and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied.
    @param[in]
    n         [int]
              the number of rows and columns of matrix A. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    y         device pointer storing vector y.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.
    @param[inout]
    AP        device pointer storing the specified triangular portion of
              the Hermitian matrix A. Of size (lda, n).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of the Hermitian matrix A is supplied. The lower triangular
                portion of A will not be touched.
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of the Hermitian matrix A is supplied. The upper triangular
                portion of A will not be touched.
              - Note that the imaginary parts of the diagonal elements are not accessed and are assumed
                to be 0.
    @param[in]
    lda       [int]
              specifies the leading dimension of A. Must be at least max(lda, 1).
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCher2(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            const hipComplex* alpha,
                                            const hipComplex* x,
                                            int               incx,
                                            const hipComplex* y,
                                            int               incy,
                                            hipComplex*       AP,
                                            int               lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasZher2(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            int                     n,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            const hipDoubleComplex* y,
                                            int                     incy,
                                            hipDoubleComplex*       AP,
                                            int                     lda);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCher2_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               const hipComplex* alpha,
                                               const hipComplex* x,
                                               int64_t           incx,
                                               const hipComplex* y,
                                               int64_t           incy,
                                               hipComplex*       AP,
                                               int64_t           lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasZher2_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               int64_t                 n,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               const hipDoubleComplex* y,
                                               int64_t                 incy,
                                               hipDoubleComplex*       AP,
                                               int64_t                 lda);
//! @}

/*! @{
    \brief  <b> BLAS Level 2 API </b>

    \details
    The her2Batched functions perform the matrix-vector operations:

        A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H

    where ``alpha`` is a complex scalar, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``n`` by ``n`` Hermitian matrix for each batch in ``i`` = [1, ``batchCount``].

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied.
    @param[in]
    n         [int]
              the number of rows and columns of each matrix A_i. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each y_i.
    @param[inout]
    AP        device array of device pointers storing the specified triangular portion of
              each Hermitian matrix A_i of size (lda, n).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of each Hermitian matrix A_i is supplied. The lower triangular
                portion of each A_i will not be touched.
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of each Hermitian matrix A_i is supplied. The upper triangular
                portion of each A_i will not be touched.
              - Note that the imaginary parts of the diagonal elements are not accessed and are assumed
                to be 0.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i. Must be at least max(lda, 1).
    @param[in]
    batchCount [int]
                number of instances in the batch.
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCher2Batched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   int                     n,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   const hipComplex* const y[],
                                                   int                     incy,
                                                   hipComplex* const       AP[],
                                                   int                     lda,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZher2Batched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   int                           n,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   const hipDoubleComplex* const y[],
                                                   int                           incy,
                                                   hipDoubleComplex* const       AP[],
                                                   int                           lda,
                                                   int                           batchCount);
// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCher2Batched_64(hipblasHandle_t         handle,
                                                      hipblasFillMode_t       uplo,
                                                      int64_t                 n,
                                                      const hipComplex*       alpha,
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      const hipComplex* const y[],
                                                      int64_t                 incy,
                                                      hipComplex* const       AP[],
                                                      int64_t                 lda,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZher2Batched_64(hipblasHandle_t               handle,
                                                      hipblasFillMode_t             uplo,
                                                      int64_t                       n,
                                                      const hipDoubleComplex*       alpha,
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      const hipDoubleComplex* const y[],
                                                      int64_t                       incy,
                                                      hipDoubleComplex* const       AP[],
                                                      int64_t                       lda,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The her2StridedBatched functions perform the matrix-vector operations:

        A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H

    where ``alpha`` is a complex scalar, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``n`` by ``n`` Hermitian matrix for each batch in ``i`` = [1, ``batchCount``].

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied.
    @param[in]
    n         [int]
              the number of rows and columns of each matrix A_i. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer pointing to the first vector x_1.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex  [hipblasStride]
              specifies the stride between the beginning of one vector (x_i) and the next (x_i+1).
    @param[in]
    y         device pointer pointing to the first vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each y_i.
    @param[in]
    stridey  [hipblasStride]
              specifies the stride between the beginning of one vector (y_i) and the next (y_i+1).
    @param[inout]
    AP        device pointer pointing to the first matrix (A_1). Stores the specified triangular portion of
              each Hermitian matrix A_i.
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of each Hermitian matrix A_i is supplied. The lower triangular
                portion of each A_i will not be touched.
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of each Hermitian matrix A_i is supplied. The upper triangular
                portion of each A_i will not be touched.
              - Note that the imaginary part of the diagonal elements are not accessed and are assumed
                to be 0.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i. Must be at least max(lda, 1).
    @param[in]
    strideA  [hipblasStride]
              specifies the stride between the beginning of one matrix (A_i) and the next (A_i+1).
    @param[in]
    batchCount [int]
                number of instances in the batch.
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCher2StridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          const hipComplex* alpha,
                                                          const hipComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const hipComplex* y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          hipComplex*       AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZher2StridedBatched(hipblasHandle_t         handle,
                                                          hipblasFillMode_t       uplo,
                                                          int                     n,
                                                          const hipDoubleComplex* alpha,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          const hipDoubleComplex* y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          hipDoubleComplex*       AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCher2StridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             const hipComplex* alpha,
                                                             const hipComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const hipComplex* y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             hipComplex*       AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZher2StridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasFillMode_t       uplo,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* alpha,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             const hipDoubleComplex* y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             hipDoubleComplex*       AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The hpmv functions perform the matrix-vector operation:

        y := alpha*A*x + beta*y

    where ``alpha`` and ``beta`` are scalars, ``x`` and ``y`` are ``n``-element vectors and ``A`` is an
    ``n`` by ``n`` Hermitian matrix, supplied in packed form (see description below).

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : ``c`` and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              - HIPBLAS_FILL_MODE_UPPER: the upper triangular part of the Hermitian matrix A is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: the lower triangular part of the Hermitian matrix A is supplied in AP.
    @param[in]
    n         [int]
              the order of the matrix A. Must be >= 0.
    @param[in]
    alpha     device pointer or host pointer to scalar alpha.
    @param[in]
    AP        device pointer storing the packed version of the specified triangular portion of
              the Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of the Hermitian matrix A is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(0,1)  
                AP(2) = A(1,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3)  
                        (1, 0) (2, 1) (3, 2)  
                        (2,-1) (4, 0) (5,-1)    -> [(1,0), (2,1), (4,0), (3,2), (5,-1), (6,0)]  
                        (3,-2) (5, 1) (6, 0)  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of the Hermitian matrix A is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(1,0)  
                AP(2) = A(2,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3)  
                        (1, 0) (2, 1) (3, 2)  
                        (2,-1) (4, 0) (5,-1)    -> [(1,0), (2,-1), (3,-2), (4,0), (5,1), (6,0)]  
                        (3,-2) (5, 1) (6, 0)  
              - Note that the imaginary parts of the diagonal elements are not accessed and are assumed
              to be 0.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[inout]
    y         device pointer storing vector y.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChpmv(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            const hipComplex* alpha,
                                            const hipComplex* AP,
                                            const hipComplex* x,
                                            int               incx,
                                            const hipComplex* beta,
                                            hipComplex*       y,
                                            int               incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhpmv(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            int                     n,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* AP,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            const hipDoubleComplex* beta,
                                            hipDoubleComplex*       y,
                                            int                     incy);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChpmv_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               const hipComplex* alpha,
                                               const hipComplex* AP,
                                               const hipComplex* x,
                                               int64_t           incx,
                                               const hipComplex* beta,
                                               hipComplex*       y,
                                               int64_t           incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhpmv_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               int64_t                 n,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* AP,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               const hipDoubleComplex* beta,
                                               hipDoubleComplex*       y,
                                               int64_t                 incy);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The hpmvBatched functions performs the matrix-vector operation:

        y_i := alpha*A_i*x_i + beta*y_i

    where ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are ``n``-element vectors, and ``A_i`` is an
    ``n`` by ``n`` Hermitian matrix, supplied in packed form (see description below),
    for each batch in ``i`` = [1, ``batchCount``].

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              - HIPBLAS_FILL_MODE_UPPER: the upper triangular part of each Hermitian matrix A_i is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: the lower triangular part of each Hermitian matrix A_i is supplied in AP.
    @param[in]
    n         [int]
              the order of each matrix A_i.
    @param[in]
    alpha     device pointer or host pointer to scalar alpha.
    @param[in]
    AP      device pointer of device pointers storing the packed version of the specified triangular
            portion of each Hermitian matrix A_i. Each A_i is of at least size ((n * (n + 1)) / 2).
            - if uplo == HIPBLAS_FILL_MODE_UPPER:
            The upper triangular portion of each Hermitian matrix A_i is supplied.
            The matrix is compacted so that each AP_i contains the triangular portion column-by-column
            so that:  
            AP(0) = A(0,0)  
            AP(1) = A(0,1)  
            AP(2) = A(1,1), and so forth.  
                Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3)  
                    (1, 0) (2, 1) (3, 2)  
                    (2,-1) (4, 0) (5,-1)    -> [(1,0), (2,1), (4,0), (3,2), (5,-1), (6,0)]  
                    (3,-2) (5, 1) (6, 0)  
            - if uplo == HIPBLAS_FILL_MODE_LOWER:
            The lower triangular portion of each Hermitian matrix A_i is supplied.
            The matrix is compacted so that each AP_i contains the triangular portion column-by-column
            so that:  
            AP(0) = A(0,0)  
            AP(1) = A(1,0)  
            AP(2) = A(2,1), and so forth.  
                Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3)  
                    (1, 0) (2, 1) (3, 2)  
                    (2,-1) (4, 0) (5,-1)    -> [(1,0), (2,-1), (3,-2), (4,0), (5,1), (6,0)]  
                    (3,-2) (5, 1) (6, 0)  
            - Note that the imaginary parts of the diagonal elements are not accessed and are assumed
              to be 0.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[inout]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChpmvBatched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   int                     n,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const AP[],
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   const hipComplex*       beta,
                                                   hipComplex* const       y[],
                                                   int                     incy,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhpmvBatched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   int                           n,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const AP[],
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   const hipDoubleComplex*       beta,
                                                   hipDoubleComplex* const       y[],
                                                   int                           incy,
                                                   int                           batchCount);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChpmvBatched_64(hipblasHandle_t         handle,
                                                      hipblasFillMode_t       uplo,
                                                      int64_t                 n,
                                                      const hipComplex*       alpha,
                                                      const hipComplex* const AP[],
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      const hipComplex*       beta,
                                                      hipComplex* const       y[],
                                                      int64_t                 incy,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhpmvBatched_64(hipblasHandle_t               handle,
                                                      hipblasFillMode_t             uplo,
                                                      int64_t                       n,
                                                      const hipDoubleComplex*       alpha,
                                                      const hipDoubleComplex* const AP[],
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      const hipDoubleComplex*       beta,
                                                      hipDoubleComplex* const       y[],
                                                      int64_t                       incy,
                                                      int64_t                       batchCount);

//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The hpmvStridedBatched functions perform the matrix-vector operation:

        y_i := alpha*A_i*x_i + beta*y_i

    where ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are ``n``-element vectors, and ``A_i`` is an
    ``n`` by ``n`` Hermitian matrix, supplied in packed form (see description below),
    for each batch in ``i`` = [1, ``batchCount``].

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              - HIPBLAS_FILL_MODE_UPPER: the upper triangular part of each Hermitian matrix A_i is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: the lower triangular part of each Hermitian matrix A_i is supplied in AP.
    @param[in]
    n         [int]
              the order of each matrix A_i.
    @param[in]
    alpha     device pointer or host pointer to scalar alpha.
    @param[in]
    AP        device pointer pointing to the beginning of the first matrix (AP_1). Stores the packed
              version of the specified triangular portion of each Hermitian matrix AP_i of size ((n * (n + 1)) / 2).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of each Hermitian matrix A_i is supplied.
                The matrix is compacted so that each AP_i contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(0,1)  
                AP(2) = A(1,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3)  
                        (1, 0) (2, 1) (3, 2)  
                        (2,-1) (4, 0) (5,-1)    -> [(1,0), (2,1), (4,0), (3,2), (5,-1), (6,0)]  
                        (3,-2) (5, 1) (6, 0)  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of each Hermitian matrix A_i is supplied.
                The matrix is compacted so that each AP_i contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(1,0)  
                AP(2) = A(2,1), and so forth.   
                    Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3)  
                        (1, 0) (2, 1) (3, 2)  
                        (2,-1) (4, 0) (5,-1)    -> [(1,0), (2,-1), (3,-2), (4,0), (5,1), (6,0)]  
                        (3,-2) (5, 1) (6, 0)  
              - Note that the imaginary parts of the diagonal elements are not accessed and are assumed
                to be 0.
    @param[in]
    strideA  [hipblasStride]
              stride from the start of one matrix (AP_i) to the next one (AP_i+1).
    @param[in]
    x         device array pointing to the beginning of the first vector (x_1).
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex  [hipblasStride]
              stride from the start of one vector (x_i) to the next one (x_i+1).
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[inout]
    y         device array pointing to the beginning of the first vector (y_1).
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.
    @param[in]
    stridey  [hipblasStride]
              stride from the start of one vector (y_i) to the next one (y_i+1).
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChpmvStridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          const hipComplex* alpha,
                                                          const hipComplex* AP,
                                                          hipblasStride     strideA,
                                                          const hipComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const hipComplex* beta,
                                                          hipComplex*       y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhpmvStridedBatched(hipblasHandle_t         handle,
                                                          hipblasFillMode_t       uplo,
                                                          int                     n,
                                                          const hipDoubleComplex* alpha,
                                                          const hipDoubleComplex* AP,
                                                          hipblasStride           strideA,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          const hipDoubleComplex* beta,
                                                          hipDoubleComplex*       y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          int                     batchCount);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChpmvStridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             const hipComplex* alpha,
                                                             const hipComplex* AP,
                                                             hipblasStride     strideA,
                                                             const hipComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const hipComplex* beta,
                                                             hipComplex*       y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhpmvStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasFillMode_t       uplo,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* alpha,
                                                             const hipDoubleComplex* AP,
                                                             hipblasStride           strideA,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             const hipDoubleComplex* beta,
                                                             hipDoubleComplex*       y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The hpr functions perform the matrix-vector operations:

        A := A + alpha*x*x**H

    where ``alpha`` is a real scalar, ``x`` is a vector, and ``A`` is an
    ``n`` by ``n`` Hermitian matrix, supplied in packed form.

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : ``c`` and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in AP.
    @param[in]
    n         [int]
              the number of rows and columns of matrix A. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[inout]
    AP        device pointer storing the packed version of the specified triangular portion of
              the Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of the Hermitian matrix A is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(0,1)  
                AP(2) = A(1,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3)  
                        (1, 0) (2, 1) (4,9)  
                        (2,-1) (3, 0) (5,3)  -> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)]  
                        (4,-9) (5,-3) (6,0)  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of the Hermitian matrix A is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(1,0)  
                AP(2) = A(2,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3)  
                        (1, 0) (2, 1) (4,9)  
                        (2,-1) (3, 0) (5,3)  -> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)]  
                        (4,-9) (5,-3) (6,0)  
              - Note that the imaginary parts of the diagonal elements are not accessed and are assumed
                to be 0.
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChpr(hipblasHandle_t   handle,
                                           hipblasFillMode_t uplo,
                                           int               n,
                                           const float*      alpha,
                                           const hipComplex* x,
                                           int               incx,
                                           hipComplex*       AP);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhpr(hipblasHandle_t         handle,
                                           hipblasFillMode_t       uplo,
                                           int                     n,
                                           const double*           alpha,
                                           const hipDoubleComplex* x,
                                           int                     incx,
                                           hipDoubleComplex*       AP);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChpr_64(hipblasHandle_t   handle,
                                              hipblasFillMode_t uplo,
                                              int64_t           n,
                                              const float*      alpha,
                                              const hipComplex* x,
                                              int64_t           incx,
                                              hipComplex*       AP);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhpr_64(hipblasHandle_t         handle,
                                              hipblasFillMode_t       uplo,
                                              int64_t                 n,
                                              const double*           alpha,
                                              const hipDoubleComplex* x,
                                              int64_t                 incx,
                                              hipDoubleComplex*       AP);

//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The hprBatched functions perform the matrix-vector operations:

        A_i := A_i + alpha*x_i*x_i**H

    where ``alpha`` is a real scalar, ``x_i`` is a vector, and ``A_i`` is an
    ``n`` by ``n`` symmetric matrix, supplied in packed form, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP.
    @param[in]
    n         [int]
              the number of rows and columns of each matrix A_i. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[inout]
    AP        device array of device pointers storing the packed version of the specified triangular portion of
              each Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount.
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of each Hermitian matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(0,1)  
                AP(2) = A(1,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3)  
                        (1, 0) (2, 1) (4,9)  
                        (2,-1) (3, 0) (5,3)  -> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)]  
                        (4,-9) (5,-3) (6,0)  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of each Hermitian matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(1,0)  
                AP(2) = A(2,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3)  
                        (1, 0) (2, 1) (4,9)  
                        (2,-1) (3, 0) (5,3)  -> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)]  
                        (4,-9) (5,-3) (6,0)  
              - Note that the imaginary part of the diagonal elements are not accessed and are assumed
              to be 0.
    @param[in]
    batchCount [int]
                number of instances in the batch.
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChprBatched(hipblasHandle_t         handle,
                                                  hipblasFillMode_t       uplo,
                                                  int                     n,
                                                  const float*            alpha,
                                                  const hipComplex* const x[],
                                                  int                     incx,
                                                  hipComplex* const       AP[],
                                                  int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhprBatched(hipblasHandle_t               handle,
                                                  hipblasFillMode_t             uplo,
                                                  int                           n,
                                                  const double*                 alpha,
                                                  const hipDoubleComplex* const x[],
                                                  int                           incx,
                                                  hipDoubleComplex* const       AP[],
                                                  int                           batchCount);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChprBatched_64(hipblasHandle_t         handle,
                                                     hipblasFillMode_t       uplo,
                                                     int64_t                 n,
                                                     const float*            alpha,
                                                     const hipComplex* const x[],
                                                     int64_t                 incx,
                                                     hipComplex* const       AP[],
                                                     int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhprBatched_64(hipblasHandle_t               handle,
                                                     hipblasFillMode_t             uplo,
                                                     int64_t                       n,
                                                     const double*                 alpha,
                                                     const hipDoubleComplex* const x[],
                                                     int64_t                       incx,
                                                     hipDoubleComplex* const       AP[],
                                                     int64_t                       batchCount);
//! @}

/*! @{
    \brief  <b> BLAS Level 2 API </b>

    \details
    The hprStridedBatched functions perform the matrix-vector operations:

        A_i := A_i + alpha*x_i*x_i**H

    where ``alpha`` is a real scalar, ``x_i`` is a vector, and ``A_i`` is an
    ``n`` by ``n`` symmetric matrix, supplied in packed form, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP.
    @param[in]
    n         [int]
              the number of rows and columns of each matrix A_i. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer pointing to the first vector (x_1).
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex  [hipblasStride]
              stride from the start of one vector (x_i) to the next one (x_i+1).
    @param[inout]
    AP        device array of device pointers storing the packed version of the specified triangular portion of
              each Hermitian matrix A_i. Points to the first matrix (A_1).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of each Hermitian matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(0,1)  
                AP(2) = A(1,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3)  
                        (1, 0) (2, 1) (4,9)  
                        (2,-1) (3, 0) (5,3)  -> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)]  
                        (4,-9) (5,-3) (6,0)  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of each Hermitian matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(1,0)  
                AP(2) = A(2,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3)  
                        (1, 0) (2, 1) (4,9)  
                        (2,-1) (3, 0) (5,3)  -> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)]  
                        (4,-9) (5,-3) (6,0)  
              - Note that the imaginary parts of the diagonal elements are not accessed and are assumed
                to be 0.
    @param[in]
    strideA   [hipblasStride]
                stride from the start of one (A_i) to the next (A_i+1).
    @param[in]
    batchCount [int]
                number of instances in the batch.
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChprStridedBatched(hipblasHandle_t   handle,
                                                         hipblasFillMode_t uplo,
                                                         int               n,
                                                         const float*      alpha,
                                                         const hipComplex* x,
                                                         int               incx,
                                                         hipblasStride     stridex,
                                                         hipComplex*       AP,
                                                         hipblasStride     strideA,
                                                         int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhprStridedBatched(hipblasHandle_t         handle,
                                                         hipblasFillMode_t       uplo,
                                                         int                     n,
                                                         const double*           alpha,
                                                         const hipDoubleComplex* x,
                                                         int                     incx,
                                                         hipblasStride           stridex,
                                                         hipDoubleComplex*       AP,
                                                         hipblasStride           strideA,
                                                         int                     batchCount);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChprStridedBatched_64(hipblasHandle_t   handle,
                                                            hipblasFillMode_t uplo,
                                                            int64_t           n,
                                                            const float*      alpha,
                                                            const hipComplex* x,
                                                            int64_t           incx,
                                                            hipblasStride     stridex,
                                                            hipComplex*       AP,
                                                            hipblasStride     strideA,
                                                            int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhprStridedBatched_64(hipblasHandle_t         handle,
                                                            hipblasFillMode_t       uplo,
                                                            int64_t                 n,
                                                            const double*           alpha,
                                                            const hipDoubleComplex* x,
                                                            int64_t                 incx,
                                                            hipblasStride           stridex,
                                                            hipDoubleComplex*       AP,
                                                            hipblasStride           strideA,
                                                            int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The hpr2 functions perform the matrix-vector operations:

        A := A + alpha*x*y**H + conj(alpha)*y*x**H

    where ``alpha`` is a complex scalar, ``x`` and ``y`` are vectors, and ``A`` is an
    ``n`` by ``n`` Hermitian matrix, supplied in packed form.

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : ``c`` and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in AP.
    @param[in]
    n         [int]
              the number of rows and columns of matrix A. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    y         device pointer storing vector y.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.
    @param[inout]
    AP        device pointer storing the packed version of the specified triangular portion of
              the Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of the Hermitian matrix A is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(0,1)  
                AP(2) = A(1,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3)  
                        (1, 0) (2, 1) (4,9)  
                        (2,-1) (3, 0) (5,3)  -> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)]  
                        (4,-9) (5,-3) (6,0)  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of the Hermitian matrix A is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(1,0)  
                AP(2) = A(2,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3)  
                        (1, 0) (2, 1) (4,9)  
                        (2,-1) (3, 0) (5,3)  -> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)]  
                        (4,-9) (5,-3) (6,0)  
              - Note that the imaginary parts of the diagonal elements are not accessed and are assumed
                to be 0.
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChpr2(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            const hipComplex* alpha,
                                            const hipComplex* x,
                                            int               incx,
                                            const hipComplex* y,
                                            int               incy,
                                            hipComplex*       AP);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhpr2(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            int                     n,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            const hipDoubleComplex* y,
                                            int                     incy,
                                            hipDoubleComplex*       AP);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChpr2_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               const hipComplex* alpha,
                                               const hipComplex* x,
                                               int64_t           incx,
                                               const hipComplex* y,
                                               int64_t           incy,
                                               hipComplex*       AP);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhpr2_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               int64_t                 n,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               const hipDoubleComplex* y,
                                               int64_t                 incy,
                                               hipDoubleComplex*       AP);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The hpr2Batched functions perform the matrix-vector operations:

        A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H

    where ``alpha`` is a complex scalar, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``n`` by ``n`` symmetric matrix, supplied in packed form, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP.
    @param[in]
    n         [int]
              the number of rows and columns of each matrix A_i. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each y_i.
    @param[inout]
    AP        device array of device pointers storing the packed version of the specified triangular portion of
              each Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount.
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of each Hermitian matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(0,1)  
                AP(2) = A(1,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3)  
                        (1, 0) (2, 1) (4,9)  
                        (2,-1) (3, 0) (5,3)  -> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)]  
                        (4,-9) (5,-3) (6,0)  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of each Hermitian matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(1,0)  
                AP(2) = A(2,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3)  
                        (1, 0) (2, 1) (4,9)  
                        (2,-1) (3, 0) (5,3)  -> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)]  
                        (4,-9) (5,-3) (6,0)  
              - Note that the imaginary parts of the diagonal elements are not accessed and are assumed
                to be 0.
    @param[in]
    batchCount [int]
                number of instances in the batch.
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChpr2Batched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   int                     n,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   const hipComplex* const y[],
                                                   int                     incy,
                                                   hipComplex* const       AP[],
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhpr2Batched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   int                           n,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   const hipDoubleComplex* const y[],
                                                   int                           incy,
                                                   hipDoubleComplex* const       AP[],
                                                   int                           batchCount);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChpr2Batched_64(hipblasHandle_t         handle,
                                                      hipblasFillMode_t       uplo,
                                                      int64_t                 n,
                                                      const hipComplex*       alpha,
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      const hipComplex* const y[],
                                                      int64_t                 incy,
                                                      hipComplex* const       AP[],
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhpr2Batched_64(hipblasHandle_t               handle,
                                                      hipblasFillMode_t             uplo,
                                                      int64_t                       n,
                                                      const hipDoubleComplex*       alpha,
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      const hipDoubleComplex* const y[],
                                                      int64_t                       incy,
                                                      hipDoubleComplex* const       AP[],
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The hpr2StridedBatched functions perform the matrix-vector operations:

        A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H

    where ``alpha`` is a complex scalar, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``n`` by ``n`` symmetric matrix, supplied in packed form, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP.
    @param[in]
    n         [int]
              the number of rows and columns of each matrix A_i. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer pointing to the first vector (x_1).
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex  [hipblasStride]
              stride from the start of one vector (x_i) to the next one (x_i+1).
    @param[in]
    y         device pointer pointing to the first vector (y_1).
    @param[in]
    incy      [int]
              specifies the increment for the elements of each y_i.
    @param[in]
    stridey  [hipblasStride]
              stride from the start of one vector (y_i) to the next one (y_i+1).
    @param[inout]
    AP        device array of device pointers storing the packed version of the specified triangular portion of
              each Hermitian matrix A_i. Points to the first matrix (A_1).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of each Hermitian matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(0,1)  
                AP(2) = A(1,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_UPPER; n = 3)  
                        (1, 0) (2, 1) (4,9)  
                        (2,-1) (3, 0) (5,3)  -> [(1,0), (2,1), (3,0), (4,9), (5,3), (6,0)]  
                        (4,-9) (5,-3) (6,0)  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of each Hermitian matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(1,0)  
                AP(2) = A(2,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_LOWER; n = 3)  
                        (1, 0) (2, 1) (4,9)  
                        (2,-1) (3, 0) (5,3)  -> [(1,0), (2,-1), (4,-9), (3,0), (5,-3), (6,0)]  
                        (4,-9) (5,-3) (6,0)  
              - Note that the imaginary part of the diagonal elements are not accessed and are assumed
                to be 0.
    @param[in]
    strideA    [hipblasStride]
                stride from the start of one (A_i) to the next (A_i+1).
    @param[in]
    batchCount [int]
                number of instances in the batch.
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasChpr2StridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          const hipComplex* alpha,
                                                          const hipComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const hipComplex* y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          hipComplex*       AP,
                                                          hipblasStride     strideA,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhpr2StridedBatched(hipblasHandle_t         handle,
                                                          hipblasFillMode_t       uplo,
                                                          int                     n,
                                                          const hipDoubleComplex* alpha,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          const hipDoubleComplex* y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          hipDoubleComplex*       AP,
                                                          hipblasStride           strideA,
                                                          int                     batchCount);

//64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasChpr2StridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             const hipComplex* alpha,
                                                             const hipComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const hipComplex* y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             hipComplex*       AP,
                                                             hipblasStride     strideA,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZhpr2StridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasFillMode_t       uplo,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* alpha,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             const hipDoubleComplex* y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             hipDoubleComplex*       AP,
                                                             hipblasStride           strideA,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The sbmv functions perform the matrix-vector operation:

        y := alpha*A*x + beta*y,

    where ``alpha`` and ``beta`` are scalars, ``x`` and ``y`` are ``n``-element vectors, and
    ``A`` should contain an upper or lower triangular ``n`` by ``n`` symmetric banded matrix.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : ``s`` and ``d``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.
    @param[in]
    n         [int]
    @param[in]
    k         [int]
              specifies the number of sub- and super-diagonals.
    @param[in]
    alpha
              specifies the scalar alpha.
    @param[in]
    AP         pointer storing matrix A on the GPU.
    @param[in]
    lda       [int]
              specifies the leading dimension of matrix A.
    @param[in]
    x         pointer storing vector x on the GPU.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    beta      specifies the scalar beta.
    @param[out]
    y         pointer storing vector y on the GPU.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsbmv(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            int               k,
                                            const float*      alpha,
                                            const float*      AP,
                                            int               lda,
                                            const float*      x,
                                            int               incx,
                                            const float*      beta,
                                            float*            y,
                                            int               incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsbmv(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            int               k,
                                            const double*     alpha,
                                            const double*     AP,
                                            int               lda,
                                            const double*     x,
                                            int               incx,
                                            const double*     beta,
                                            double*           y,
                                            int               incy);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsbmv_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               int64_t           k,
                                               const float*      alpha,
                                               const float*      AP,
                                               int64_t           lda,
                                               const float*      x,
                                               int64_t           incx,
                                               const float*      beta,
                                               float*            y,
                                               int64_t           incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsbmv_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               int64_t           k,
                                               const double*     alpha,
                                               const double*     AP,
                                               int64_t           lda,
                                               const double*     x,
                                               int64_t           incx,
                                               const double*     beta,
                                               double*           y,
                                               int64_t           incy);

//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The sbmvBatched functions perform the matrix-vector operation:

        y_i := alpha*A_i*x_i + beta*y_i,

    where ``(A_i, x_i, y_i)`` is the ``i``-th instance of the batch,
    ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``n`` by ``n`` symmetric banded matrix, for ``i`` = 1, ..., ``batchCount``.
    ``A`` should contain an upper or lower triangular ``n`` by ``n`` symmetric banded matrix.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.
    @param[in]
    n         [int]
              number of rows and columns of each matrix A_i.
    @param[in]
    k         [int]
              specifies the number of sub- and super-diagonals.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    AP         device array of device pointers storing each matrix A_i.
    @param[in]
    lda       [int]
              specifies the leading dimension of each matrix A_i.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each vector x_i.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[out]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each vector y_i.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsbmvBatched(hipblasHandle_t    handle,
                                                   hipblasFillMode_t  uplo,
                                                   int                n,
                                                   int                k,
                                                   const float*       alpha,
                                                   const float* const AP[],
                                                   int                lda,
                                                   const float* const x[],
                                                   int                incx,
                                                   const float*       beta,
                                                   float* const       y[],
                                                   int                incy,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsbmvBatched(hipblasHandle_t     handle,
                                                   hipblasFillMode_t   uplo,
                                                   int                 n,
                                                   int                 k,
                                                   const double*       alpha,
                                                   const double* const AP[],
                                                   int                 lda,
                                                   const double* const x[],
                                                   int                 incx,
                                                   const double*       beta,
                                                   double* const       y[],
                                                   int                 incy,
                                                   int                 batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsbmvBatched_64(hipblasHandle_t    handle,
                                                      hipblasFillMode_t  uplo,
                                                      int64_t            n,
                                                      int64_t            k,
                                                      const float*       alpha,
                                                      const float* const AP[],
                                                      int64_t            lda,
                                                      const float* const x[],
                                                      int64_t            incx,
                                                      const float*       beta,
                                                      float* const       y[],
                                                      int64_t            incy,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsbmvBatched_64(hipblasHandle_t     handle,
                                                      hipblasFillMode_t   uplo,
                                                      int64_t             n,
                                                      int64_t             k,
                                                      const double*       alpha,
                                                      const double* const AP[],
                                                      int64_t             lda,
                                                      const double* const x[],
                                                      int64_t             incx,
                                                      const double*       beta,
                                                      double* const       y[],
                                                      int64_t             incy,
                                                      int64_t             batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The sbmvStridedBatched functions perform the matrix-vector operation:

        y_i := alpha*A_i*x_i + beta*y_i,

    where ``(A_i, x_i, y_i)`` is the ``i``-th instance of the batch,
    ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``n`` by ``n`` symmetric banded matrix, for ``i`` = 1, ..., ``batchCount``.
    ``A`` should contain an upper or lower triangular ``n`` by ``n`` symmetric banded matrix.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.
    @param[in]
    n         [int]
              number of rows and columns of each matrix A_i.
    @param[in]
    k         [int]
              specifies the number of sub- and super-diagonals.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    AP        device pointer to the first matrix A_1 on the GPU.
    @param[in]
    lda       [int]
              specifies the leading dimension of each matrix A_i.
    @param[in]
    strideA     [hipblasStride]
                stride from the start of one matrix (A_i) to the next one (A_i+1).
    @param[in]
    x         device pointer to the first vector x_1 on the GPU.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each vector x_i.
    @param[in]
    stridex     [hipblasStride]
                stride from the start of one vector (x_i) to the next one (x_i+1).
                There are no restrictions placed on stridex. However, the user should
                ensure that stridex is of an appropriate size.
                This typically means stridex >= n * incx. stridex should be non zero.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[out]
    y         device pointer to the first vector y_1 on the GPU.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each vector y_i.
    @param[in]
    stridey     [hipblasStride]
                stride from the start of one vector (y_i) to the next one (y_i+1).
                There are no restrictions placed on stridey. However, the user should
                ensure that stridey is of an appropriate size.
                This typically means stridey >= n * incy. stridey should be non zero.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsbmvStridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          int               k,
                                                          const float*      alpha,
                                                          const float*      AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          const float*      x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const float*      beta,
                                                          float*            y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsbmvStridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          int               k,
                                                          const double*     alpha,
                                                          const double*     AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          const double*     x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const double*     beta,
                                                          double*           y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsbmvStridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             int64_t           k,
                                                             const float*      alpha,
                                                             const float*      AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             const float*      x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const float*      beta,
                                                             float*            y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsbmvStridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             int64_t           k,
                                                             const double*     alpha,
                                                             const double*     AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             const double*     x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const double*     beta,
                                                             double*           y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The spmv functions perform the matrix-vector operation:

        y := alpha*A*x + beta*y,

    where ``alpha`` and ``beta`` are scalars, ``x`` and ``y`` are ``n``-element vectors, and
    ``A`` should contain an upper or lower triangular ``n`` by ``n`` packed symmetric matrix.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : ``s`` and ``d``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.
    @param[in]
    n         [int]
    @param[in]
    alpha
              specifies the scalar alpha.
    @param[in]
    AP         pointer storing matrix A on the GPU.
    @param[in]
    x         pointer storing vector x on the GPU.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    beta      specifies the scalar beta.
    @param[out]
    y         pointer storing vector y on the GPU.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSspmv(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            const float*      alpha,
                                            const float*      AP,
                                            const float*      x,
                                            int               incx,
                                            const float*      beta,
                                            float*            y,
                                            int               incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasDspmv(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            const double*     alpha,
                                            const double*     AP,
                                            const double*     x,
                                            int               incx,
                                            const double*     beta,
                                            double*           y,
                                            int               incy);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSspmv_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               const float*      alpha,
                                               const float*      AP,
                                               const float*      x,
                                               int64_t           incx,
                                               const float*      beta,
                                               float*            y,
                                               int64_t           incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasDspmv_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               const double*     alpha,
                                               const double*     AP,
                                               const double*     x,
                                               int64_t           incx,
                                               const double*     beta,
                                               double*           y,
                                               int64_t           incy);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The spmvBatched functions perform the matrix-vector operation:

        y_i := alpha*AP_i*x_i + beta*y_i,

    where ``(A_i, x_i, y_i)`` is the ``i``-th instance of the batch,
    ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``n`` by ``n`` symmetric matrix, for ``i`` = 1, ..., ``batchCount``.
    ``A`` should contain an upper or lower triangular ``n`` by ``n`` packed symmetric matrix.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.
    @param[in]
    n         [int]
              number of rows and columns of each matrix A_i.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    AP         device array of device pointers storing each matrix A_i.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each vector x_i.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[out]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each vector y_i.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSspmvBatched(hipblasHandle_t    handle,
                                                   hipblasFillMode_t  uplo,
                                                   int                n,
                                                   const float*       alpha,
                                                   const float* const AP[],
                                                   const float* const x[],
                                                   int                incx,
                                                   const float*       beta,
                                                   float* const       y[],
                                                   int                incy,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDspmvBatched(hipblasHandle_t     handle,
                                                   hipblasFillMode_t   uplo,
                                                   int                 n,
                                                   const double*       alpha,
                                                   const double* const AP[],
                                                   const double* const x[],
                                                   int                 incx,
                                                   const double*       beta,
                                                   double* const       y[],
                                                   int                 incy,
                                                   int                 batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSspmvBatched_64(hipblasHandle_t    handle,
                                                      hipblasFillMode_t  uplo,
                                                      int64_t            n,
                                                      const float*       alpha,
                                                      const float* const AP[],
                                                      const float* const x[],
                                                      int64_t            incx,
                                                      const float*       beta,
                                                      float* const       y[],
                                                      int64_t            incy,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDspmvBatched_64(hipblasHandle_t     handle,
                                                      hipblasFillMode_t   uplo,
                                                      int64_t             n,
                                                      const double*       alpha,
                                                      const double* const AP[],
                                                      const double* const x[],
                                                      int64_t             incx,
                                                      const double*       beta,
                                                      double* const       y[],
                                                      int64_t             incy,
                                                      int64_t             batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The spmvStridedBatched functions perform the matrix-vector operation:

        y_i := alpha*A_i*x_i + beta*y_i,

    where ``(A_i, x_i, y_i)`` is the ``i``-th instance of the batch,
    ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``n`` by ``n`` symmetric matrix, for ``i`` = 1, ..., ``batchCount``.
    ``A`` should contain an upper or lower triangular ``n`` by ``n`` packed symmetric matrix.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.
    @param[in]
    n         [int]
              number of rows and columns of each matrix A_i.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    AP        Device pointer to the first matrix A_1 on the GPU.
    @param[in]
    strideA    [hipblasStride]
                stride from the start of one matrix (A_i) to the next one (A_i+1).
    @param[in]
    x         Device pointer to the first vector x_1 on the GPU.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each vector x_i.
    @param[in]
    stridex     [hipblasStride]
                stride from the start of one vector (x_i) to the next one (x_i+1).
                There are no restrictions placed on stridex. However, the user should
                take care to ensure that stridex is of an appropriate size.
                This typically means stridex >= n * incx. stridex should be non zero.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[out]
    y         Device pointer to the first vector y_1 on the GPU.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each vector y_i.
    @param[in]
    stridey     [hipblasStride]
                stride from the start of one vector (y_i) to the next one (y_i+1).
                There are no restrictions placed on stridey. However, the user should
                take care to ensure that stridey is of an appropriate size.
                This typically means stridey >= n * incy. stridey should be non zero.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSspmvStridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          const float*      alpha,
                                                          const float*      AP,
                                                          hipblasStride     strideA,
                                                          const float*      x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const float*      beta,
                                                          float*            y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDspmvStridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          const double*     alpha,
                                                          const double*     AP,
                                                          hipblasStride     strideA,
                                                          const double*     x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const double*     beta,
                                                          double*           y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSspmvStridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             const float*      alpha,
                                                             const float*      AP,
                                                             hipblasStride     strideA,
                                                             const float*      x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const float*      beta,
                                                             float*            y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDspmvStridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             const double*     alpha,
                                                             const double*     AP,
                                                             hipblasStride     strideA,
                                                             const double*     x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const double*     beta,
                                                             double*           y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount);
//! @}

/*! @{
    \brief  <b> BLAS Level 2 API </b>

    \details
    The spr functions perform the matrix-vector operations:

        A := A + alpha*x*x**T

    where ``alpha`` is a scalar, ``x`` is a vector, and ``A`` is an
    ``n`` by ``n`` symmetric matrix, supplied in packed form.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in AP.
    @param[in]
    n         [int]
              the number of rows and columns of matrix A. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[inout]
    AP        device pointer storing the packed version of the specified triangular portion of
              the symmetric matrix A. Of at least size ((n * (n + 1)) / 2).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of the symmetric matrix A is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(0,1)  
                AP(2) = A(1,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4)  
                        1 2 4 7  
                        2 3 5 8   -> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]  
                        4 5 6 9  
                        7 8 9 0  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of the symmetric matrix A is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(1,0)  
                AP(2) = A(2,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4)  
                        1 2 3 4  
                        2 5 6 7    -> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]  
                        3 6 8 9  
                        4 7 9 0  
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSspr(hipblasHandle_t   handle,
                                           hipblasFillMode_t uplo,
                                           int               n,
                                           const float*      alpha,
                                           const float*      x,
                                           int               incx,
                                           float*            AP);

HIPBLAS_EXPORT hipblasStatus_t hipblasDspr(hipblasHandle_t   handle,
                                           hipblasFillMode_t uplo,
                                           int               n,
                                           const double*     alpha,
                                           const double*     x,
                                           int               incx,
                                           double*           AP);

HIPBLAS_EXPORT hipblasStatus_t hipblasCspr(hipblasHandle_t   handle,
                                           hipblasFillMode_t uplo,
                                           int               n,
                                           const hipComplex* alpha,
                                           const hipComplex* x,
                                           int               incx,
                                           hipComplex*       AP);

HIPBLAS_EXPORT hipblasStatus_t hipblasZspr(hipblasHandle_t         handle,
                                           hipblasFillMode_t       uplo,
                                           int                     n,
                                           const hipDoubleComplex* alpha,
                                           const hipDoubleComplex* x,
                                           int                     incx,
                                           hipDoubleComplex*       AP);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSspr_64(hipblasHandle_t   handle,
                                              hipblasFillMode_t uplo,
                                              int64_t           n,
                                              const float*      alpha,
                                              const float*      x,
                                              int64_t           incx,
                                              float*            AP);

HIPBLAS_EXPORT hipblasStatus_t hipblasDspr_64(hipblasHandle_t   handle,
                                              hipblasFillMode_t uplo,
                                              int64_t           n,
                                              const double*     alpha,
                                              const double*     x,
                                              int64_t           incx,
                                              double*           AP);

HIPBLAS_EXPORT hipblasStatus_t hipblasCspr_64(hipblasHandle_t   handle,
                                              hipblasFillMode_t uplo,
                                              int64_t           n,
                                              const hipComplex* alpha,
                                              const hipComplex* x,
                                              int64_t           incx,
                                              hipComplex*       AP);

HIPBLAS_EXPORT hipblasStatus_t hipblasZspr_64(hipblasHandle_t         handle,
                                              hipblasFillMode_t       uplo,
                                              int64_t                 n,
                                              const hipDoubleComplex* alpha,
                                              const hipDoubleComplex* x,
                                              int64_t                 incx,
                                              hipDoubleComplex*       AP);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The sprBatched functions perform the matrix-vector operations:

        A_i := A_i + alpha*x_i*x_i**T

    where ``alpha`` is a scalar, ``x_i`` is a vector, and ``A_i`` is an
    ``n`` by ``n`` symmetric matrix, supplied in packed form, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP.
    @param[in]
    n         [int]
              the number of rows and columns of each matrix A_i. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[inout]
    AP        device array of device pointers storing the packed version of the specified triangular portion of
              each symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount.
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of each symmetric matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(0,1)  
                AP(2) = A(1,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4)  
                        1 2 4 7  
                        2 3 5 8   -> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]  
                        4 5 6 9  
                        7 8 9 0  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of each symmetric matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(1,0)  
                AP(2) = A(2,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4)  
                        1 2 3 4  
                        2 5 6 7    -> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]  
                        3 6 8 9  
                        4 7 9 0  
    @param[in]
    batchCount [int]
                number of instances in the batch.
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsprBatched(hipblasHandle_t    handle,
                                                  hipblasFillMode_t  uplo,
                                                  int                n,
                                                  const float*       alpha,
                                                  const float* const x[],
                                                  int                incx,
                                                  float* const       AP[],
                                                  int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsprBatched(hipblasHandle_t     handle,
                                                  hipblasFillMode_t   uplo,
                                                  int                 n,
                                                  const double*       alpha,
                                                  const double* const x[],
                                                  int                 incx,
                                                  double* const       AP[],
                                                  int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsprBatched(hipblasHandle_t         handle,
                                                  hipblasFillMode_t       uplo,
                                                  int                     n,
                                                  const hipComplex*       alpha,
                                                  const hipComplex* const x[],
                                                  int                     incx,
                                                  hipComplex* const       AP[],
                                                  int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsprBatched(hipblasHandle_t               handle,
                                                  hipblasFillMode_t             uplo,
                                                  int                           n,
                                                  const hipDoubleComplex*       alpha,
                                                  const hipDoubleComplex* const x[],
                                                  int                           incx,
                                                  hipDoubleComplex* const       AP[],
                                                  int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsprBatched_64(hipblasHandle_t    handle,
                                                     hipblasFillMode_t  uplo,
                                                     int64_t            n,
                                                     const float*       alpha,
                                                     const float* const x[],
                                                     int64_t            incx,
                                                     float* const       AP[],
                                                     int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsprBatched_64(hipblasHandle_t     handle,
                                                     hipblasFillMode_t   uplo,
                                                     int64_t             n,
                                                     const double*       alpha,
                                                     const double* const x[],
                                                     int64_t             incx,
                                                     double* const       AP[],
                                                     int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsprBatched_64(hipblasHandle_t         handle,
                                                     hipblasFillMode_t       uplo,
                                                     int64_t                 n,
                                                     const hipComplex*       alpha,
                                                     const hipComplex* const x[],
                                                     int64_t                 incx,
                                                     hipComplex* const       AP[],
                                                     int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsprBatched_64(hipblasHandle_t               handle,
                                                     hipblasFillMode_t             uplo,
                                                     int64_t                       n,
                                                     const hipDoubleComplex*       alpha,
                                                     const hipDoubleComplex* const x[],
                                                     int64_t                       incx,
                                                     hipDoubleComplex* const       AP[],
                                                     int64_t                       batchCount);
//! @}

/*! @{
    \brief  <b> BLAS Level 2 API </b>

    \details
    The sprStridedBatched functions perform the matrix-vector operations:

        A_i := A_i + alpha*x_i*x_i**T

    where ``alpha`` is a scalar, ``x_i`` is a vector, and ``A_i`` is an
    ``n`` by ``n`` symmetric matrix, supplied in packed form, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP.
    @param[in]
    n         [int]
              the number of rows and columns of each matrix A_i. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer pointing to the first vector (x_1).
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex  [hipblasStride]
              stride from the start of one vector (x_i) to the next one (x_i+1).
    @param[inout]
    AP        device pointer storing the packed version of the specified triangular portion of
              each symmetric matrix A_i. Points to the first A_1.
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of each symmetric matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(0,1)  
                AP(2) = A(1,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4)  
                        1 2 4 7  
                        2 3 5 8   -> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]  
                        4 5 6 9  
                        7 8 9 0  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of each symmetric matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(1,0)  
                AP(2) = A(2,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4)  
                        1 2 3 4  
                        2 5 6 7    -> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]  
                        3 6 8 9  
                        4 7 9 0  
    @param[in]
    strideA    [hipblasStride]
                stride from the start of one (A_i) to the next (A_i+1).
    @param[in]
    batchCount [int]
                number of instances in the batch.
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsprStridedBatched(hipblasHandle_t   handle,
                                                         hipblasFillMode_t uplo,
                                                         int               n,
                                                         const float*      alpha,
                                                         const float*      x,
                                                         int               incx,
                                                         hipblasStride     stridex,
                                                         float*            AP,
                                                         hipblasStride     strideA,
                                                         int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsprStridedBatched(hipblasHandle_t   handle,
                                                         hipblasFillMode_t uplo,
                                                         int               n,
                                                         const double*     alpha,
                                                         const double*     x,
                                                         int               incx,
                                                         hipblasStride     stridex,
                                                         double*           AP,
                                                         hipblasStride     strideA,
                                                         int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsprStridedBatched(hipblasHandle_t   handle,
                                                         hipblasFillMode_t uplo,
                                                         int               n,
                                                         const hipComplex* alpha,
                                                         const hipComplex* x,
                                                         int               incx,
                                                         hipblasStride     stridex,
                                                         hipComplex*       AP,
                                                         hipblasStride     strideA,
                                                         int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsprStridedBatched(hipblasHandle_t         handle,
                                                         hipblasFillMode_t       uplo,
                                                         int                     n,
                                                         const hipDoubleComplex* alpha,
                                                         const hipDoubleComplex* x,
                                                         int                     incx,
                                                         hipblasStride           stridex,
                                                         hipDoubleComplex*       AP,
                                                         hipblasStride           strideA,
                                                         int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsprStridedBatched_64(hipblasHandle_t   handle,
                                                            hipblasFillMode_t uplo,
                                                            int64_t           n,
                                                            const float*      alpha,
                                                            const float*      x,
                                                            int64_t           incx,
                                                            hipblasStride     stridex,
                                                            float*            AP,
                                                            hipblasStride     strideA,
                                                            int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsprStridedBatched_64(hipblasHandle_t   handle,
                                                            hipblasFillMode_t uplo,
                                                            int64_t           n,
                                                            const double*     alpha,
                                                            const double*     x,
                                                            int64_t           incx,
                                                            hipblasStride     stridex,
                                                            double*           AP,
                                                            hipblasStride     strideA,
                                                            int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsprStridedBatched_64(hipblasHandle_t   handle,
                                                            hipblasFillMode_t uplo,
                                                            int64_t           n,
                                                            const hipComplex* alpha,
                                                            const hipComplex* x,
                                                            int64_t           incx,
                                                            hipblasStride     stridex,
                                                            hipComplex*       AP,
                                                            hipblasStride     strideA,
                                                            int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsprStridedBatched_64(hipblasHandle_t         handle,
                                                            hipblasFillMode_t       uplo,
                                                            int64_t                 n,
                                                            const hipDoubleComplex* alpha,
                                                            const hipDoubleComplex* x,
                                                            int64_t                 incx,
                                                            hipblasStride           stridex,
                                                            hipDoubleComplex*       AP,
                                                            hipblasStride           strideA,
                                                            int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The spr2 functions perform the matrix-vector operation:

        A := A + alpha*x*y**T + alpha*y*x**T

    where ``alpha`` is a scalar, ``x`` and ``y`` are vectors, and ``A`` is an
    ``n`` by ``n`` symmetric matrix, supplied in packed form.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : ``s`` and ``d``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of A is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of A is supplied in AP.
    @param[in]
    n         [int]
              the number of rows and columns of matrix A. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    y         device pointer storing vector y.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.
    @param[inout]
    AP        device pointer storing the packed version of the specified triangular portion of
              the symmetric matrix A. Of at least size ((n * (n + 1)) / 2).
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of the symmetric matrix A is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(0,1)  
                AP(2) = A(1,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4)  
                        1 2 4 7  
                        2 3 5 8   -> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]  
                        4 5 6 9  
                        7 8 9 0  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of the symmetric matrix A is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(1,0)  
                AP(n) = A(2,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4)  
                        1 2 3 4  
                        2 5 6 7    -> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]  
                        3 6 8 9  
                        4 7 9 0  
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSspr2(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            const float*      alpha,
                                            const float*      x,
                                            int               incx,
                                            const float*      y,
                                            int               incy,
                                            float*            AP);

HIPBLAS_EXPORT hipblasStatus_t hipblasDspr2(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            const double*     alpha,
                                            const double*     x,
                                            int               incx,
                                            const double*     y,
                                            int               incy,
                                            double*           AP);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSspr2_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               const float*      alpha,
                                               const float*      x,
                                               int64_t           incx,
                                               const float*      y,
                                               int64_t           incy,
                                               float*            AP);

HIPBLAS_EXPORT hipblasStatus_t hipblasDspr2_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               const double*     alpha,
                                               const double*     x,
                                               int64_t           incx,
                                               const double*     y,
                                               int64_t           incy,
                                               double*           AP);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The spr2Batched functions perform the matrix-vector operation:

        A_i := A_i + alpha*x_i*y_i**T + alpha*y_i*x_i**T

    where ``alpha`` is a scalar, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``n`` by ``n`` symmetric matrix, supplied in packed form, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP.
    @param[in]
    n         [int]
              the number of rows and columns of each matrix A_i. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each y_i.
    @param[inout]
    AP        device array of device pointers storing the packed version of the specified triangular portion of
              each symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batchCount.
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of each symmetric matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(0,1)  
                AP(2) = A(1,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4)  
                        1 2 4 7  
                        2 3 5 8   -> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]  
                        4 5 6 9  
                        7 8 9 0  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:  
                The lower triangular portion of each symmetric matrix A_i is supplied.  
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(1,0)  
                AP(n) = A(2,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4)  
                        1 2 3 4  
                        2 5 6 7    -> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]  
                        3 6 8 9  
                        4 7 9 0  
    @param[in]
    batchCount [int]
                number of instances in the batch.
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSspr2Batched(hipblasHandle_t    handle,
                                                   hipblasFillMode_t  uplo,
                                                   int                n,
                                                   const float*       alpha,
                                                   const float* const x[],
                                                   int                incx,
                                                   const float* const y[],
                                                   int                incy,
                                                   float* const       AP[],
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDspr2Batched(hipblasHandle_t     handle,
                                                   hipblasFillMode_t   uplo,
                                                   int                 n,
                                                   const double*       alpha,
                                                   const double* const x[],
                                                   int                 incx,
                                                   const double* const y[],
                                                   int                 incy,
                                                   double* const       AP[],
                                                   int                 batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSspr2Batched_64(hipblasHandle_t    handle,
                                                      hipblasFillMode_t  uplo,
                                                      int64_t            n,
                                                      const float*       alpha,
                                                      const float* const x[],
                                                      int64_t            incx,
                                                      const float* const y[],
                                                      int64_t            incy,
                                                      float* const       AP[],
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDspr2Batched_64(hipblasHandle_t     handle,
                                                      hipblasFillMode_t   uplo,
                                                      int64_t             n,
                                                      const double*       alpha,
                                                      const double* const x[],
                                                      int64_t             incx,
                                                      const double* const y[],
                                                      int64_t             incy,
                                                      double* const       AP[],
                                                      int64_t             batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The spr2StridedBatched functions perform the matrix-vector operation:

        A_i := A_i + alpha*x_i*y_i**T + alpha*y_i*x_i**T

    where ``alpha`` is a scalar, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``n`` by ``n`` symmetric matrix, supplied in packed form, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s`` and ``d``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - HIPBLAS_FILL_MODE_UPPER: The upper triangular part of each A_i is supplied in AP.
              - HIPBLAS_FILL_MODE_LOWER: The lower triangular part of each A_i is supplied in AP.
    @param[in]
    n         [int]
              the number of rows and columns of each matrix A_i. Must be at least 0.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer pointing to the first vector (x_1).
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex  [hipblasStride]
              stride from the start of one vector (x_i) to the next one (x_i+1).
    @param[in]
    y         device pointer pointing to the first vector (y_1).
    @param[in]
    incy      [int]
              specifies the increment for the elements of each y_i.
    @param[in]
    stridey  [hipblasStride]
              stride from the start of one vector (y_i) to the next one (y_i+1).
    @param[inout]
    AP        device pointer storing the packed version of the specified triangular portion of
              each symmetric matrix A_i. Points to the first A_1.
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The upper triangular portion of each symmetric matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(0,1)  
                AP(2) = A(1,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_UPPER; n = 4)  
                        1 2 4 7  
                        2 3 5 8   -> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]  
                        4 5 6 9  
                        7 8 9 0  
              - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The lower triangular portion of each symmetric matrix A_i is supplied.
                The matrix is compacted so that AP contains the triangular portion column-by-column
                so that:  
                AP(0) = A(0,0)  
                AP(1) = A(1,0)  
                AP(n) = A(2,1), and so forth.  
                    Ex: (HIPBLAS_FILL_MODE_LOWER; n = 4)  
                        1 2 3 4  
                        2 5 6 7    -> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]  
                        3 6 8 9  
                        4 7 9 0  
    @param[in]
    strideA   [hipblasStride]
                stride from the start of one (A_i) to the next (A_i+1).
    @param[in]
    batchCount [int]
                number of instances in the batch.
    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSspr2StridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          const float*      alpha,
                                                          const float*      x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const float*      y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          float*            AP,
                                                          hipblasStride     strideA,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDspr2StridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          const double*     alpha,
                                                          const double*     x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const double*     y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          double*           AP,
                                                          hipblasStride     strideA,
                                                          int               batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSspr2StridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             const float*      alpha,
                                                             const float*      x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const float*      y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             float*            AP,
                                                             hipblasStride     strideA,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDspr2StridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             const double*     alpha,
                                                             const double*     x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const double*     y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             double*           AP,
                                                             hipblasStride     strideA,
                                                             int64_t           batchCount);
//! @}

/*! @{
    \brief  <b> BLAS Level 2 API </b>

    \details
    The symv functions perform the matrix-vector operation:

        y := alpha*A*x + beta*y,

    where ``alpha`` and ``beta`` are scalars, ``x`` and ``y`` are ``n``-element vectors, and
    ``A`` should contain an upper or lower triangular ``n`` by ``n`` symmetric matrix.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.
    @param[in]
    n         [int]
    @param[in]
    alpha
              specifies the scalar alpha.
    @param[in]
    AP         pointer storing matrix A on the GPU.
    @param[in]
    lda       [int]
              specifies the leading dimension of A.
    @param[in]
    x         pointer storing vector x on the GPU.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    beta      specifies the scalar beta.
    @param[out]
    y         pointer storing vector y on the GPU.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsymv(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            const float*      alpha,
                                            const float*      AP,
                                            int               lda,
                                            const float*      x,
                                            int               incx,
                                            const float*      beta,
                                            float*            y,
                                            int               incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsymv(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            const double*     alpha,
                                            const double*     AP,
                                            int               lda,
                                            const double*     x,
                                            int               incx,
                                            const double*     beta,
                                            double*           y,
                                            int               incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsymv(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            const hipComplex* alpha,
                                            const hipComplex* AP,
                                            int               lda,
                                            const hipComplex* x,
                                            int               incx,
                                            const hipComplex* beta,
                                            hipComplex*       y,
                                            int               incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsymv(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            int                     n,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* AP,
                                            int                     lda,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            const hipDoubleComplex* beta,
                                            hipDoubleComplex*       y,
                                            int                     incy);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsymv_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               const float*      alpha,
                                               const float*      AP,
                                               int64_t           lda,
                                               const float*      x,
                                               int64_t           incx,
                                               const float*      beta,
                                               float*            y,
                                               int64_t           incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsymv_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               const double*     alpha,
                                               const double*     AP,
                                               int64_t           lda,
                                               const double*     x,
                                               int64_t           incx,
                                               const double*     beta,
                                               double*           y,
                                               int64_t           incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsymv_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               const hipComplex* alpha,
                                               const hipComplex* AP,
                                               int64_t           lda,
                                               const hipComplex* x,
                                               int64_t           incx,
                                               const hipComplex* beta,
                                               hipComplex*       y,
                                               int64_t           incy);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsymv_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               int64_t                 n,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* AP,
                                               int64_t                 lda,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               const hipDoubleComplex* beta,
                                               hipDoubleComplex*       y,
                                               int64_t                 incy);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    symvBatched performs the matrix-vector operation:

        y_i := alpha*A_i*x_i + beta*y_i,

    where ``(A_i, x_i, y_i)`` is the ``i``-th instance of the batch,
    ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    n by n symmetric matrix, for ``i`` = 1, ..., ``batchCount``.
    ``A`` should contain an upper or lower triangular symmetric matrix.
    The opposing triangular part of ``A`` is not referenced.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.
    @param[in]
    n         [int]
              number of rows and columns of each matrix A_i.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    AP        device array of device pointers storing each matrix A_i.
    @param[in]
    lda       [int]
              specifies the leading dimension of each matrix A_i.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each vector x_i.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[out]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each vector y_i.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsymvBatched(hipblasHandle_t    handle,
                                                   hipblasFillMode_t  uplo,
                                                   int                n,
                                                   const float*       alpha,
                                                   const float* const AP[],
                                                   int                lda,
                                                   const float* const x[],
                                                   int                incx,
                                                   const float*       beta,
                                                   float* const       y[],
                                                   int                incy,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsymvBatched(hipblasHandle_t     handle,
                                                   hipblasFillMode_t   uplo,
                                                   int                 n,
                                                   const double*       alpha,
                                                   const double* const AP[],
                                                   int                 lda,
                                                   const double* const x[],
                                                   int                 incx,
                                                   const double*       beta,
                                                   double* const       y[],
                                                   int                 incy,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsymvBatched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   int                     n,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const AP[],
                                                   int                     lda,
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   const hipComplex*       beta,
                                                   hipComplex* const       y[],
                                                   int                     incy,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsymvBatched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   int                           n,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const AP[],
                                                   int                           lda,
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   const hipDoubleComplex*       beta,
                                                   hipDoubleComplex* const       y[],
                                                   int                           incy,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsymvBatched_64(hipblasHandle_t    handle,
                                                      hipblasFillMode_t  uplo,
                                                      int64_t            n,
                                                      const float*       alpha,
                                                      const float* const AP[],
                                                      int64_t            lda,
                                                      const float* const x[],
                                                      int64_t            incx,
                                                      const float*       beta,
                                                      float* const       y[],
                                                      int64_t            incy,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsymvBatched_64(hipblasHandle_t     handle,
                                                      hipblasFillMode_t   uplo,
                                                      int64_t             n,
                                                      const double*       alpha,
                                                      const double* const AP[],
                                                      int64_t             lda,
                                                      const double* const x[],
                                                      int64_t             incx,
                                                      const double*       beta,
                                                      double* const       y[],
                                                      int64_t             incy,
                                                      int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsymvBatched_64(hipblasHandle_t         handle,
                                                      hipblasFillMode_t       uplo,
                                                      int64_t                 n,
                                                      const hipComplex*       alpha,
                                                      const hipComplex* const AP[],
                                                      int64_t                 lda,
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      const hipComplex*       beta,
                                                      hipComplex* const       y[],
                                                      int64_t                 incy,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsymvBatched_64(hipblasHandle_t               handle,
                                                      hipblasFillMode_t             uplo,
                                                      int64_t                       n,
                                                      const hipDoubleComplex*       alpha,
                                                      const hipDoubleComplex* const AP[],
                                                      int64_t                       lda,
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      const hipDoubleComplex*       beta,
                                                      hipDoubleComplex* const       y[],
                                                      int64_t                       incy,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The symvStridedBatched functions perform the matrix-vector operation:

        y_i := alpha*A_i*x_i + beta*y_i,

    where ``(A_i, x_i, y_i)`` is the ``i``-th instance of the batch,
    ``alpha`` and ``beta`` are scalars, ``x_i`` and ``y_i`` are vectors, and ``A_i`` is an
    ``n`` by ``n`` symmetric matrix, for ``i`` = 1, ..., ``batchCount``.
    ``A`` should contain an upper or lower triangular symmetric matrix.
    The opposing triangular part of A is not referenced

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.
    @param[in]
    n         [int]
              number of rows and columns of each matrix A_i.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    AP         Device pointer to the first matrix A_1 on the GPU.
    @param[in]
    lda       [int]
              specifies the leading dimension of each matrix A_i.
    @param[in]
    strideA     [hipblasStride]
                stride from the start of one matrix (A_i) to the next one (A_i+1).
    @param[in]
    x         Device pointer to the first vector x_1 on the GPU.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each vector x_i.
    @param[in]
    stridex     [hipblasStride]
                stride from the start of one vector (x_i) to the next one (x_i+1).
                There are no restrictions placed on stridex. However, the user should
                take care to ensure that stridex is of an appropriate size.
                This typically means stridex >= n * incx. stridex should be non zero.
    @param[in]
    beta      device pointer or host pointer to scalar beta.
    @param[out]
    y         Device pointer to the first vector y_1 on the GPU.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each vector y_i.
    @param[in]
    stridey     [hipblasStride]
                stride from the start of one vector (y_i) to the next one (y_i+1).
                There are no restrictions placed on stridey. However, the user should
                take care to ensure that stridey is of an appropriate size.
                This typically means stridey >= n * incy. stridey should be non zero.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsymvStridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          const float*      alpha,
                                                          const float*      AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          const float*      x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const float*      beta,
                                                          float*            y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsymvStridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          const double*     alpha,
                                                          const double*     AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          const double*     x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const double*     beta,
                                                          double*           y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsymvStridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          const hipComplex* alpha,
                                                          const hipComplex* AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          const hipComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const hipComplex* beta,
                                                          hipComplex*       y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsymvStridedBatched(hipblasHandle_t         handle,
                                                          hipblasFillMode_t       uplo,
                                                          int                     n,
                                                          const hipDoubleComplex* alpha,
                                                          const hipDoubleComplex* AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          const hipDoubleComplex* beta,
                                                          hipDoubleComplex*       y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsymvStridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             const float*      alpha,
                                                             const float*      AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             const float*      x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const float*      beta,
                                                             float*            y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsymvStridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             const double*     alpha,
                                                             const double*     AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             const double*     x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const double*     beta,
                                                             double*           y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsymvStridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             const hipComplex* alpha,
                                                             const hipComplex* AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             const hipComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const hipComplex* beta,
                                                             hipComplex*       y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsymvStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasFillMode_t       uplo,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* alpha,
                                                             const hipDoubleComplex* AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             const hipDoubleComplex* beta,
                                                             hipDoubleComplex*       y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The syr functions perform the matrix-vector operations:

        A := A + alpha*x*x**T

    where ``alpha`` is a scalar, ``x`` is a vector, and ``A`` is an
    ``n`` by ``n`` symmetric matrix.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.

    @param[in]
    n         [int]
              the number of rows and columns of matrix A.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[inout]
    AP         device pointer storing matrix A.
    @param[in]
    lda       [int]
              specifies the leading dimension of A.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr(hipblasHandle_t   handle,
                                           hipblasFillMode_t uplo,
                                           int               n,
                                           const float*      alpha,
                                           const float*      x,
                                           int               incx,
                                           float*            AP,
                                           int               lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr(hipblasHandle_t   handle,
                                           hipblasFillMode_t uplo,
                                           int               n,
                                           const double*     alpha,
                                           const double*     x,
                                           int               incx,
                                           double*           AP,
                                           int               lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr(hipblasHandle_t   handle,
                                           hipblasFillMode_t uplo,
                                           int               n,
                                           const hipComplex* alpha,
                                           const hipComplex* x,
                                           int               incx,
                                           hipComplex*       AP,
                                           int               lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr(hipblasHandle_t         handle,
                                           hipblasFillMode_t       uplo,
                                           int                     n,
                                           const hipDoubleComplex* alpha,
                                           const hipDoubleComplex* x,
                                           int                     incx,
                                           hipDoubleComplex*       AP,
                                           int                     lda);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr_64(hipblasHandle_t   handle,
                                              hipblasFillMode_t uplo,
                                              int64_t           n,
                                              const float*      alpha,
                                              const float*      x,
                                              int64_t           incx,
                                              float*            AP,
                                              int64_t           lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr_64(hipblasHandle_t   handle,
                                              hipblasFillMode_t uplo,
                                              int64_t           n,
                                              const double*     alpha,
                                              const double*     x,
                                              int64_t           incx,
                                              double*           AP,
                                              int64_t           lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr_64(hipblasHandle_t   handle,
                                              hipblasFillMode_t uplo,
                                              int64_t           n,
                                              const hipComplex* alpha,
                                              const hipComplex* x,
                                              int64_t           incx,
                                              hipComplex*       AP,
                                              int64_t           lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr_64(hipblasHandle_t         handle,
                                              hipblasFillMode_t       uplo,
                                              int64_t                 n,
                                              const hipDoubleComplex* alpha,
                                              const hipDoubleComplex* x,
                                              int64_t                 incx,
                                              hipDoubleComplex*       AP,
                                              int64_t                 lda);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The syrBatched functions perform a batch of matrix-vector operations:

        A[i] := A[i] + alpha*x[i]*x[i]**T

    where ``alpha`` is a scalar, ``x`` is an array of vectors, and ``A`` is an array of
    ``n`` by ``n`` symmetric matrices, for ``i`` = 1 , ... , ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.
    @param[in]
    n         [int]
              the number of rows and columns of matrix A.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[inout]
    AP         device array of device pointers storing each matrix A_i.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrBatched(hipblasHandle_t    handle,
                                                  hipblasFillMode_t  uplo,
                                                  int                n,
                                                  const float*       alpha,
                                                  const float* const x[],
                                                  int                incx,
                                                  float* const       AP[],
                                                  int                lda,
                                                  int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrBatched(hipblasHandle_t     handle,
                                                  hipblasFillMode_t   uplo,
                                                  int                 n,
                                                  const double*       alpha,
                                                  const double* const x[],
                                                  int                 incx,
                                                  double* const       AP[],
                                                  int                 lda,
                                                  int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrBatched(hipblasHandle_t         handle,
                                                  hipblasFillMode_t       uplo,
                                                  int                     n,
                                                  const hipComplex*       alpha,
                                                  const hipComplex* const x[],
                                                  int                     incx,
                                                  hipComplex* const       AP[],
                                                  int                     lda,
                                                  int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrBatched(hipblasHandle_t               handle,
                                                  hipblasFillMode_t             uplo,
                                                  int                           n,
                                                  const hipDoubleComplex*       alpha,
                                                  const hipDoubleComplex* const x[],
                                                  int                           incx,
                                                  hipDoubleComplex* const       AP[],
                                                  int                           lda,
                                                  int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrBatched_64(hipblasHandle_t    handle,
                                                     hipblasFillMode_t  uplo,
                                                     int64_t            n,
                                                     const float*       alpha,
                                                     const float* const x[],
                                                     int64_t            incx,
                                                     float* const       AP[],
                                                     int64_t            lda,
                                                     int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrBatched_64(hipblasHandle_t     handle,
                                                     hipblasFillMode_t   uplo,
                                                     int64_t             n,
                                                     const double*       alpha,
                                                     const double* const x[],
                                                     int64_t             incx,
                                                     double* const       AP[],
                                                     int64_t             lda,
                                                     int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrBatched_64(hipblasHandle_t         handle,
                                                     hipblasFillMode_t       uplo,
                                                     int64_t                 n,
                                                     const hipComplex*       alpha,
                                                     const hipComplex* const x[],
                                                     int64_t                 incx,
                                                     hipComplex* const       AP[],
                                                     int64_t                 lda,
                                                     int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrBatched_64(hipblasHandle_t               handle,
                                                     hipblasFillMode_t             uplo,
                                                     int64_t                       n,
                                                     const hipDoubleComplex*       alpha,
                                                     const hipDoubleComplex* const x[],
                                                     int64_t                       incx,
                                                     hipDoubleComplex* const       AP[],
                                                     int64_t                       lda,
                                                     int64_t                       batchCount);
//! @}

/*! @{
    \brief  <b> BLAS Level 2 API </b>

    \details
    The syrStridedBatched functions perform the matrix-vector operations:

        A[i] := A[i] + alpha*x[i]*x[i]**T

    where ``alpha`` is a scalar, ``x`` is a pointer to an array of vectors, and ``A`` is an array of
    ``n`` by ``n`` symmetric matrices, for ``i`` = 1 , ... , ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.
    @param[in]
    n         [int]
              the number of rows and columns of each matrix A.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer to the first vector x_1.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex   [hipblasStride]
              specifies the pointer increment between vectors (x_i) and (x_i+1).
    @param[inout]
    AP         device pointer to the first matrix A_1.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i.
    @param[in]
    strideA   [hipblasStride]
              stride from the start of one matrix (A_i) to the next one (A_i+1).
    @param[in]
    batchCount [int]
              number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrStridedBatched(hipblasHandle_t   handle,
                                                         hipblasFillMode_t uplo,
                                                         int               n,
                                                         const float*      alpha,
                                                         const float*      x,
                                                         int               incx,
                                                         hipblasStride     stridex,
                                                         float*            AP,
                                                         int               lda,
                                                         hipblasStride     strideA,
                                                         int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrStridedBatched(hipblasHandle_t   handle,
                                                         hipblasFillMode_t uplo,
                                                         int               n,
                                                         const double*     alpha,
                                                         const double*     x,
                                                         int               incx,
                                                         hipblasStride     stridex,
                                                         double*           AP,
                                                         int               lda,
                                                         hipblasStride     strideA,
                                                         int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrStridedBatched(hipblasHandle_t   handle,
                                                         hipblasFillMode_t uplo,
                                                         int               n,
                                                         const hipComplex* alpha,
                                                         const hipComplex* x,
                                                         int               incx,
                                                         hipblasStride     stridex,
                                                         hipComplex*       AP,
                                                         int               lda,
                                                         hipblasStride     strideA,
                                                         int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrStridedBatched(hipblasHandle_t         handle,
                                                         hipblasFillMode_t       uplo,
                                                         int                     n,
                                                         const hipDoubleComplex* alpha,
                                                         const hipDoubleComplex* x,
                                                         int                     incx,
                                                         hipblasStride           stridex,
                                                         hipDoubleComplex*       AP,
                                                         int                     lda,
                                                         hipblasStride           strideA,
                                                         int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrStridedBatched_64(hipblasHandle_t   handle,
                                                            hipblasFillMode_t uplo,
                                                            int64_t           n,
                                                            const float*      alpha,
                                                            const float*      x,
                                                            int64_t           incx,
                                                            hipblasStride     stridex,
                                                            float*            AP,
                                                            int64_t           lda,
                                                            hipblasStride     strideA,
                                                            int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrStridedBatched_64(hipblasHandle_t   handle,
                                                            hipblasFillMode_t uplo,
                                                            int64_t           n,
                                                            const double*     alpha,
                                                            const double*     x,
                                                            int64_t           incx,
                                                            hipblasStride     stridex,
                                                            double*           AP,
                                                            int64_t           lda,
                                                            hipblasStride     strideA,
                                                            int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrStridedBatched_64(hipblasHandle_t   handle,
                                                            hipblasFillMode_t uplo,
                                                            int64_t           n,
                                                            const hipComplex* alpha,
                                                            const hipComplex* x,
                                                            int64_t           incx,
                                                            hipblasStride     stridex,
                                                            hipComplex*       AP,
                                                            int64_t           lda,
                                                            hipblasStride     strideA,
                                                            int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrStridedBatched_64(hipblasHandle_t         handle,
                                                            hipblasFillMode_t       uplo,
                                                            int64_t                 n,
                                                            const hipDoubleComplex* alpha,
                                                            const hipDoubleComplex* x,
                                                            int64_t                 incx,
                                                            hipblasStride           stridex,
                                                            hipDoubleComplex*       AP,
                                                            int64_t                 lda,
                                                            hipblasStride           strideA,
                                                            int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The syr2 functions perform the matrix-vector operations:

        A := A + alpha*x*y**T + alpha*y*x**T

    where ``alpha`` is a scalar, ``x`` and ``y`` are vectors, and ``A`` is an
    ``n`` by ``n`` symmetric matrix.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.

    @param[in]
    n         [int]
              the number of rows and columns of matrix A.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.
    @param[in]
    y         device pointer storing vector y.
    @param[in]
    incy      [int]
              specifies the increment for the elements of y.
    @param[inout]
    AP         device pointer storing matrix A.
    @param[in]
    lda       [int]
              specifies the leading dimension of A.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr2(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            const float*      alpha,
                                            const float*      x,
                                            int               incx,
                                            const float*      y,
                                            int               incy,
                                            float*            AP,
                                            int               lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr2(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            const double*     alpha,
                                            const double*     x,
                                            int               incx,
                                            const double*     y,
                                            int               incy,
                                            double*           AP,
                                            int               lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr2(hipblasHandle_t   handle,
                                            hipblasFillMode_t uplo,
                                            int               n,
                                            const hipComplex* alpha,
                                            const hipComplex* x,
                                            int               incx,
                                            const hipComplex* y,
                                            int               incy,
                                            hipComplex*       AP,
                                            int               lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr2(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            int                     n,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* x,
                                            int                     incx,
                                            const hipDoubleComplex* y,
                                            int                     incy,
                                            hipDoubleComplex*       AP,
                                            int                     lda);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr2_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               const float*      alpha,
                                               const float*      x,
                                               int64_t           incx,
                                               const float*      y,
                                               int64_t           incy,
                                               float*            AP,
                                               int64_t           lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr2_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               const double*     alpha,
                                               const double*     x,
                                               int64_t           incx,
                                               const double*     y,
                                               int64_t           incy,
                                               double*           AP,
                                               int64_t           lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr2_64(hipblasHandle_t   handle,
                                               hipblasFillMode_t uplo,
                                               int64_t           n,
                                               const hipComplex* alpha,
                                               const hipComplex* x,
                                               int64_t           incx,
                                               const hipComplex* y,
                                               int64_t           incy,
                                               hipComplex*       AP,
                                               int64_t           lda);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr2_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               int64_t                 n,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* x,
                                               int64_t                 incx,
                                               const hipDoubleComplex* y,
                                               int64_t                 incy,
                                               hipDoubleComplex*       AP,
                                               int64_t                 lda);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The syr2Batched functions perform a batch of matrix-vector operations:

        A[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T

    where ``alpha`` is a scalar, ``x[i]`` and ``y[i]`` are vectors, and ``A[i]`` is an
    ``n`` by ``n`` symmetric matrix, for ``i`` = 1 , ... , ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.
    @param[in]
    n         [int]
              the number of rows and columns of matrix A.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device array of device pointers storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    y         device array of device pointers storing each vector y_i.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each y_i.
    @param[inout]
    AP         device array of device pointers storing each matrix A_i.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr2Batched(hipblasHandle_t    handle,
                                                   hipblasFillMode_t  uplo,
                                                   int                n,
                                                   const float*       alpha,
                                                   const float* const x[],
                                                   int                incx,
                                                   const float* const y[],
                                                   int                incy,
                                                   float* const       AP[],
                                                   int                lda,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr2Batched(hipblasHandle_t     handle,
                                                   hipblasFillMode_t   uplo,
                                                   int                 n,
                                                   const double*       alpha,
                                                   const double* const x[],
                                                   int                 incx,
                                                   const double* const y[],
                                                   int                 incy,
                                                   double* const       AP[],
                                                   int                 lda,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr2Batched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   int                     n,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const x[],
                                                   int                     incx,
                                                   const hipComplex* const y[],
                                                   int                     incy,
                                                   hipComplex* const       AP[],
                                                   int                     lda,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr2Batched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   int                           n,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const x[],
                                                   int                           incx,
                                                   const hipDoubleComplex* const y[],
                                                   int                           incy,
                                                   hipDoubleComplex* const       AP[],
                                                   int                           lda,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr2Batched_64(hipblasHandle_t    handle,
                                                      hipblasFillMode_t  uplo,
                                                      int64_t            n,
                                                      const float*       alpha,
                                                      const float* const x[],
                                                      int64_t            incx,
                                                      const float* const y[],
                                                      int64_t            incy,
                                                      float* const       AP[],
                                                      int64_t            lda,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr2Batched_64(hipblasHandle_t     handle,
                                                      hipblasFillMode_t   uplo,
                                                      int64_t             n,
                                                      const double*       alpha,
                                                      const double* const x[],
                                                      int64_t             incx,
                                                      const double* const y[],
                                                      int64_t             incy,
                                                      double* const       AP[],
                                                      int64_t             lda,
                                                      int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr2Batched_64(hipblasHandle_t         handle,
                                                      hipblasFillMode_t       uplo,
                                                      int64_t                 n,
                                                      const hipComplex*       alpha,
                                                      const hipComplex* const x[],
                                                      int64_t                 incx,
                                                      const hipComplex* const y[],
                                                      int64_t                 incy,
                                                      hipComplex* const       AP[],
                                                      int64_t                 lda,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr2Batched_64(hipblasHandle_t               handle,
                                                      hipblasFillMode_t             uplo,
                                                      int64_t                       n,
                                                      const hipDoubleComplex*       alpha,
                                                      const hipDoubleComplex* const x[],
                                                      int64_t                       incx,
                                                      const hipDoubleComplex* const y[],
                                                      int64_t                       incy,
                                                      hipDoubleComplex* const       AP[],
                                                      int64_t                       lda,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The syr2StridedBatched functions perform the matrix-vector operations:

        A[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T

    where ``alpha`` is a scalar, ``x[i]`` and ``y[i]`` are vectors, and ``A[i]`` is an
    ``n`` by ``n`` symmetric matrices, for ``i`` = 1 , ... , ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              specifies either upper (HIPBLAS_FILL_MODE_UPPER) or lower (HIPBLAS_FILL_MODE_LOWER):
              - If HIPBLAS_FILL_MODE_UPPER, the lower part of A is not referenced.
              - If HIPBLAS_FILL_MODE_LOWER, the upper part of A is not referenced.
    @param[in]
    n         [int]
              the number of rows and columns of each matrix A.
    @param[in]
    alpha
              device pointer or host pointer to scalar alpha.
    @param[in]
    x         device pointer to the first vector x_1.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex   [hipblasStride]
              specifies the pointer increment between vectors (x_i) and (x_i+1).
    @param[in]
    y         device pointer to the first vector y_1.
    @param[in]
    incy      [int]
              specifies the increment for the elements of each y_i.
    @param[in]
    stridey   [hipblasStride]
              specifies the pointer increment between vectors (y_i) and (y_i+1).
    @param[inout]
    AP         device pointer to the first matrix A_1.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i.
    @param[in]
    strideA   [hipblasStride]
              stride from the start of one matrix (A_i) to the next one (A_i+1).
    @param[in]
    batchCount [int]
              number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr2StridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          const float*      alpha,
                                                          const float*      x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const float*      y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          float*            AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr2StridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          const double*     alpha,
                                                          const double*     x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const double*     y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          double*           AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr2StridedBatched(hipblasHandle_t   handle,
                                                          hipblasFillMode_t uplo,
                                                          int               n,
                                                          const hipComplex* alpha,
                                                          const hipComplex* x,
                                                          int               incx,
                                                          hipblasStride     stridex,
                                                          const hipComplex* y,
                                                          int               incy,
                                                          hipblasStride     stridey,
                                                          hipComplex*       AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr2StridedBatched(hipblasHandle_t         handle,
                                                          hipblasFillMode_t       uplo,
                                                          int                     n,
                                                          const hipDoubleComplex* alpha,
                                                          const hipDoubleComplex* x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          const hipDoubleComplex* y,
                                                          int                     incy,
                                                          hipblasStride           stridey,
                                                          hipDoubleComplex*       AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsyr2StridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             const float*      alpha,
                                                             const float*      x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const float*      y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             float*            AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyr2StridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             const double*     alpha,
                                                             const double*     x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const double*     y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             double*           AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyr2StridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           n,
                                                             const hipComplex* alpha,
                                                             const hipComplex* x,
                                                             int64_t           incx,
                                                             hipblasStride     stridex,
                                                             const hipComplex* y,
                                                             int64_t           incy,
                                                             hipblasStride     stridey,
                                                             hipComplex*       AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyr2StridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasFillMode_t       uplo,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* alpha,
                                                             const hipDoubleComplex* x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             const hipDoubleComplex* y,
                                                             int64_t                 incy,
                                                             hipblasStride           stridey,
                                                             hipDoubleComplex*       AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief BLAS Level 2 API

    \details
    The tbmv functions perform one of the matrix-vector operations:

        x := A*x      or
        x := A**T*x   or
        x := A**H*x,

    where ``x`` is a vector and ``A`` is a banded ``n`` by ``n`` matrix (see description below).

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              - HIPBLAS_FILL_MODE_UPPER: A is an upper banded triangular matrix.
              - HIPBLAS_FILL_MODE_LOWER: A is a lower banded triangular matrix.
    @param[in]
    transA     [hipblasOperation_t]
              indicates whether matrix A is tranposed (conjugated) or not.
    @param[in]
    diag      [hipblasDiagType_t]
              - HIPBLAS_DIAG_UNIT: The main diagonal of A is assumed to consist of only
                                     1's and is not referenced.
              - HIPBLAS_DIAG_NON_UNIT: No assumptions are made of A's main diagonal.
    @param[in]
    n         [int]
              the number of rows and columns of the matrix represented by A.
    @param[in]
    k         [int]
              - if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals
                of the matrix A.
              - if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals
                of the matrix A.
              - k must satisfy k > 0 && k < lda.
    @param[in]
    AP         device pointer storing banded triangular matrix A.
               - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The matrix represented is an upper banded triangular matrix
                with the main diagonal and k super-diagonals. Everything
                else can be assumed to be 0.  
                The matrix is compacted so that the main diagonal resides on the k'th
                row, the first super diagonal resides on the RHS of the k-1'th row, and so forth,
                with the k'th diagonal on the RHS of the 0'th row.  
                   Ex: (HIPBLAS_FILL_MODE_UPPER; n = 5; k = 2)  
                      1 6 9 0 0     ->       0 0 9 8 7  
                      0 2 7 8 0     ->       0 6 7 8 9  
                      0 0 3 8 7     ->       1 2 3 4 5  
                      0 0 0 4 9     ->       0 0 0 0 0  
                      0 0 0 0 5     ->       0 0 0 0 0  
               - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The matrix represnted is a lower banded triangular matrix
                with the main diagonal and k sub-diagonals. Everything else can be
                assumed to be 0.  
                The matrix is compacted so that the main diagonal resides on the 0'th row,
                working up to the k'th diagonal residing on the LHS of the k'th row.  
                   Ex: (HIPBLAS_FILL_MODE_LOWER; n = 5; k = 2)  
                      1 0 0 0 0     ->      1 2 3 4 5  
                      6 2 0 0 0     ->      6 7 8 9 0  
                      9 7 3 0 0     ->      9 8 7 0 0  
                      0 8 8 4 0     ->      0 0 0 0 0  
                      0 0 7 9 5     ->      0 0 0 0 0  
    @param[in]
    lda       [int]
              specifies the leading dimension of A. lda must satisfy lda > k.
    @param[inout]
    x         device pointer storing vector x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of x.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStbmv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            int                k,
                                            const float*       AP,
                                            int                lda,
                                            float*             x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtbmv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            int                k,
                                            const double*      AP,
                                            int                lda,
                                            double*            x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtbmv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            int                k,
                                            const hipComplex*  AP,
                                            int                lda,
                                            hipComplex*        x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtbmv(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            hipblasOperation_t      transA,
                                            hipblasDiagType_t       diag,
                                            int                     n,
                                            int                     k,
                                            const hipDoubleComplex* AP,
                                            int                     lda,
                                            hipDoubleComplex*       x,
                                            int                     incx);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStbmv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               int64_t            k,
                                               const float*       AP,
                                               int64_t            lda,
                                               float*             x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtbmv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               int64_t            k,
                                               const double*      AP,
                                               int64_t            lda,
                                               double*            x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtbmv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               int64_t            k,
                                               const hipComplex*  AP,
                                               int64_t            lda,
                                               hipComplex*        x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtbmv_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               hipblasOperation_t      transA,
                                               hipblasDiagType_t       diag,
                                               int64_t                 n,
                                               int64_t                 k,
                                               const hipDoubleComplex* AP,
                                               int64_t                 lda,
                                               hipDoubleComplex*       x,
                                               int64_t                 incx);

//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The tbmvBatched functions perform one of the matrix-vector operations:

        x_i := A_i*x_i      or
        x_i := A_i**T*x_i   or
        x_i := A_i**H*x_i,

    where ``(A_i, x_i)`` is the ``i``-th instance of the batch,
    ``x_i`` is a vector, and ``A_i`` is an ``n`` by ``n`` matrix, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              - HIPBLAS_FILL_MODE_UPPER: each A_i is an upper banded triangular matrix.
              - HIPBLAS_FILL_MODE_LOWER: each A_i is a  lower banded triangular matrix.
    @param[in]
    transA     [hipblasOperation_t]
              indicates whether each matrix A_i is tranposed (conjugated) or not.
    @param[in]
    diag      [hipblasDiagType_t]
              - HIPBLAS_DIAG_UNIT: The main diagonal of each A_i is assumed to consist of only
                                     1's and is not referenced.
              - HIPBLAS_DIAG_NON_UNIT: No assumptions are made of each A_i's main diagonal.
    @param[in]
    n         [int]
              the number of rows and columns of the matrix represented by each A_i.
    @param[in]
    k         [int]
              - if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals
              of each matrix A_i.
              - if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals
              of each matrix A_i.
              - k must satisfy k > 0 && k < lda.
    @param[in]
    AP         device array of device pointers storing each banded triangular matrix A_i.
               - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The matrix represented is an upper banded triangular matrix
                with the main diagonal and k super-diagonals. Everything
                else can be assumed to be 0.  
                The matrix is compacted so that the main diagonal resides on the k'th
                row, the first super diagonal resides on the RHS of the k-1'th row, and so forth,
                with the k'th diagonal on the RHS of the 0'th row.  
                   Ex: (HIPBLAS_FILL_MODE_UPPER; n = 5; k = 2)  
                      1 6 9 0 0        ->      0 0 9 8 7  
                      0 2 7 8 0        ->      0 6 7 8 9  
                      0 0 3 8 7        ->      1 2 3 4 5  
                      0 0 0 4 9        ->      0 0 0 0 0  
                      0 0 0 0 5        ->      0 0 0 0 0  
               - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The matrix represnted is a lower banded triangular matrix
                with the main diagonal and k sub-diagonals. Everything else can be
                assumed to be 0.  
                The matrix is compacted so that the main diagonal resides on the 0'th row,
                working up to the k'th diagonal residing on the LHS of the k'th row.  
                   Ex: (HIPBLAS_FILL_MODE_LOWER; n = 5; k = 2)  
                      1 0 0 0 0        ->      1 2 3 4 5  
                      6 2 0 0 0        ->      6 7 8 9 0  
                      9 7 3 0 0        ->      9 8 7 0 0  
                      0 8 8 4 0        ->      0 0 0 0 0  
                      0 0 7 9 5        ->      0 0 0 0 0  
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i. lda must satisfy lda > k.
    @param[inout]
    x         device array of device pointer storing each vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStbmvBatched(hipblasHandle_t    handle,
                                                   hipblasFillMode_t  uplo,
                                                   hipblasOperation_t transA,
                                                   hipblasDiagType_t  diag,
                                                   int                n,
                                                   int                k,
                                                   const float* const AP[],
                                                   int                lda,
                                                   float* const       x[],
                                                   int                incx,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtbmvBatched(hipblasHandle_t     handle,
                                                   hipblasFillMode_t   uplo,
                                                   hipblasOperation_t  transA,
                                                   hipblasDiagType_t   diag,
                                                   int                 n,
                                                   int                 k,
                                                   const double* const AP[],
                                                   int                 lda,
                                                   double* const       x[],
                                                   int                 incx,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtbmvBatched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   hipblasOperation_t      transA,
                                                   hipblasDiagType_t       diag,
                                                   int                     n,
                                                   int                     k,
                                                   const hipComplex* const AP[],
                                                   int                     lda,
                                                   hipComplex* const       x[],
                                                   int                     incx,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtbmvBatched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   hipblasOperation_t            transA,
                                                   hipblasDiagType_t             diag,
                                                   int                           n,
                                                   int                           k,
                                                   const hipDoubleComplex* const AP[],
                                                   int                           lda,
                                                   hipDoubleComplex* const       x[],
                                                   int                           incx,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStbmvBatched_64(hipblasHandle_t    handle,
                                                      hipblasFillMode_t  uplo,
                                                      hipblasOperation_t transA,
                                                      hipblasDiagType_t  diag,
                                                      int64_t            n,
                                                      int64_t            k,
                                                      const float* const AP[],
                                                      int64_t            lda,
                                                      float* const       x[],
                                                      int64_t            incx,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtbmvBatched_64(hipblasHandle_t     handle,
                                                      hipblasFillMode_t   uplo,
                                                      hipblasOperation_t  transA,
                                                      hipblasDiagType_t   diag,
                                                      int64_t             n,
                                                      int64_t             k,
                                                      const double* const AP[],
                                                      int64_t             lda,
                                                      double* const       x[],
                                                      int64_t             incx,
                                                      int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtbmvBatched_64(hipblasHandle_t         handle,
                                                      hipblasFillMode_t       uplo,
                                                      hipblasOperation_t      transA,
                                                      hipblasDiagType_t       diag,
                                                      int64_t                 n,
                                                      int64_t                 k,
                                                      const hipComplex* const AP[],
                                                      int64_t                 lda,
                                                      hipComplex* const       x[],
                                                      int64_t                 incx,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtbmvBatched_64(hipblasHandle_t               handle,
                                                      hipblasFillMode_t             uplo,
                                                      hipblasOperation_t            transA,
                                                      hipblasDiagType_t             diag,
                                                      int64_t                       n,
                                                      int64_t                       k,
                                                      const hipDoubleComplex* const AP[],
                                                      int64_t                       lda,
                                                      hipDoubleComplex* const       x[],
                                                      int64_t                       incx,
                                                      int64_t                       batchCount);

//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The tbmvStridedBatched functions perform one of the matrix-vector operations:

        x_i := A_i*x_i      or
        x_i := A_i**T*x_i   or
        x_i := A_i**H*x_i,

    where ``(A_i, x_i)`` is the ``i``-th instance of the batch,
    ``x_i`` is a vector, and ``A_i`` is an ``n`` by ``n`` matrix, for ``i`` = 1, ..., ``batchCount``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    uplo      [hipblasFillMode_t]
              - HIPBLAS_FILL_MODE_UPPER: each A_i is an upper banded triangular matrix.
              - HIPBLAS_FILL_MODE_LOWER: each A_i is a lower banded triangular matrix.
    @param[in]
    transA     [hipblasOperation_t]
              indicates whether each matrix A_i is tranposed (conjugated) or not.
    @param[in]
    diag      [hipblasDiagType_t]
              - HIPBLAS_DIAG_UNIT: The main diagonal of each A_i is assumed to consist of only
                                     1's and is not referenced.
              - HIPBLAS_DIAG_NON_UNIT: No assumptions are made of each A_i's main diagonal.
    @param[in]
    n         [int]
              the number of rows and columns of the matrix represented by each A_i.
    @param[in]
    k         [int]
              - if uplo == HIPBLAS_FILL_MODE_UPPER, k specifies the number of super-diagonals
              of each matrix A_i.
              - if uplo == HIPBLAS_FILL_MODE_LOWER, k specifies the number of sub-diagonals
              of each matrix A_i.
              - k must satisfy k > 0 && k < lda.
    @param[in]
    AP         device array to the first matrix A_i of the batch. Stores each banded triangular matrix A_i.
              - if uplo == HIPBLAS_FILL_MODE_UPPER:
                The matrix represented is an upper banded triangular matrix
                with the main diagonal and k super-diagonals. Everything
                else can be assumed to be 0.  
                The matrix is compacted so that the main diagonal resides on the k'th
                row, the first super diagonal resides on the RHS of the k-1'th row, and so forth,
                with the k'th diagonal on the RHS of the 0'th row.  
                   Ex: (HIPBLAS_FILL_MODE_UPPER; n = 5; k = 2)  
                      1 6 9 0 0     ->         0 0 9 8 7  
                      0 2 7 8 0     ->         0 6 7 8 9  
                      0 0 3 8 7     ->         1 2 3 4 5  
                      0 0 0 4 9     ->         0 0 0 0 0  
                      0 0 0 0 5     ->         0 0 0 0 0  
               - if uplo == HIPBLAS_FILL_MODE_LOWER:
                The matrix represnted is a lower banded triangular matrix
                with the main diagonal and k sub-diagonals. Everything else can be
                assumed to be 0.  
                The matrix is compacted so that the main diagonal resides on the 0'th row,
                working up to the k'th diagonal residing on the LHS of the k'th row.  
                   Ex: (HIPBLAS_FILL_MODE_LOWER; n = 5; k = 2)  
                      1 0 0 0 0     ->         1 2 3 4 5  
                      6 2 0 0 0     ->         6 7 8 9 0  
                      9 7 3 0 0     ->         9 8 7 0 0  
                      0 8 8 4 0     ->         0 0 0 0 0  
                      0 0 7 9 5     ->         0 0 0 0 0  
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i. lda must satisfy lda > k.
    @param[in]
    strideA  [hipblasStride]
              stride from the start of one A_i matrix to the next A_(i + 1).
    @param[inout]
    x         device array to the first vector x_i of the batch.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex  [hipblasStride]
              stride from the start of one x_i matrix to the next x_(i + 1).
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStbmvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          int                k,
                                                          const float*       AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          float*             x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtbmvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          int                k,
                                                          const double*      AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          double*            x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtbmvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          int                k,
                                                          const hipComplex*  AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          hipComplex*        x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtbmvStridedBatched(hipblasHandle_t         handle,
                                                          hipblasFillMode_t       uplo,
                                                          hipblasOperation_t      transA,
                                                          hipblasDiagType_t       diag,
                                                          int                     n,
                                                          int                     k,
                                                          const hipDoubleComplex* AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          hipDoubleComplex*       x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStbmvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             int64_t            k,
                                                             const float*       AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             float*             x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtbmvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             int64_t            k,
                                                             const double*      AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             double*            x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtbmvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             int64_t            k,
                                                             const hipComplex*  AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             hipComplex*        x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtbmvStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasFillMode_t       uplo,
                                                             hipblasOperation_t      transA,
                                                             hipblasDiagType_t       diag,
                                                             int64_t                 n,
                                                             int64_t                 k,
                                                             const hipDoubleComplex* AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             hipDoubleComplex*       x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The tbsv functions solve:

         A*x = b or A**T*x = b or A**H*x = b,

    where ``x`` and ``b`` are vectors and ``A`` is a banded triangular matrix.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A is a lower triangular matrix.

    @param[in]
    transA     [hipblasOperation_t]
               - HIPBLAS_OP_N: Solves A*x = b
               - HIPBLAS_OP_T: Solves A**T*x = b
               - HIPBLAS_OP_C: Solves A**H*x = b

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     A is assumed to be unit triangular (that is, the diagonal elements
                                       of A are not used in computations).
            - HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular.

    @param[in]
    n         [int]
              n specifies the number of rows of b. n >= 0.
    @param[in]
    k         [int]
              - if(uplo == HIPBLAS_FILL_MODE_UPPER),
                k specifies the number of super-diagonals of A.
              - if(uplo == HIPBLAS_FILL_MODE_LOWER),
                k specifies the number of sub-diagonals of A.
              - k >= 0.

    @param[in]
    AP         device pointer storing the matrix A in banded format.

    @param[in]
    lda       [int]
              specifies the leading dimension of A.
              lda >= (k + 1).

    @param[inout]
    x         device pointer storing input vector b. Overwritten by the output vector x.

    @param[in]
    incx      [int]
              specifies the increment for the elements of x.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStbsv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            int                k,
                                            const float*       AP,
                                            int                lda,
                                            float*             x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtbsv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            int                k,
                                            const double*      AP,
                                            int                lda,
                                            double*            x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtbsv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            int                k,
                                            const hipComplex*  AP,
                                            int                lda,
                                            hipComplex*        x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtbsv(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            hipblasOperation_t      transA,
                                            hipblasDiagType_t       diag,
                                            int                     n,
                                            int                     k,
                                            const hipDoubleComplex* AP,
                                            int                     lda,
                                            hipDoubleComplex*       x,
                                            int                     incx);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStbsv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               int64_t            k,
                                               const float*       AP,
                                               int64_t            lda,
                                               float*             x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtbsv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               int64_t            k,
                                               const double*      AP,
                                               int64_t            lda,
                                               double*            x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtbsv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               int64_t            k,
                                               const hipComplex*  AP,
                                               int64_t            lda,
                                               hipComplex*        x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtbsv_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               hipblasOperation_t      transA,
                                               hipblasDiagType_t       diag,
                                               int64_t                 n,
                                               int64_t                 k,
                                               const hipDoubleComplex* AP,
                                               int64_t                 lda,
                                               hipDoubleComplex*       x,
                                               int64_t                 incx);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The tbsvBatched functions solve:

         A_i*x_i = b_i or A_i**T*x_i = b_i or A_i**H*x_i = b_i,

    where ``x_i`` and ``b_i`` are vectors and ``A_i`` is a banded triangular matrix,
    for ``i`` = [1, ``batchCount``].

    The input vectors ``b_i`` are overwritten by the output vectors ``x_i``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A_i is a  lower triangular matrix.

    @param[in]
    transA     [hipblasOperation_t]
               - HIPBLAS_OP_N: Solves A_i*x_i = b_i
               - HIPBLAS_OP_T: Solves A_i**T*x_i = b_i
               - HIPBLAS_OP_C: Solves A_i**H*x_i = b_i

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     each A_i is assumed to be unit triangular (that is, the diagonal elements
                                       of each A_i are not used in computations).
            - HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular.

    @param[in]
    n         [int]
              n specifies the number of rows of each b_i. n >= 0.
    @param[in]
    k         [int]
              - if(uplo == HIPBLAS_FILL_MODE_UPPER),
                k specifies the number of super-diagonals of each A_i.
              - if(uplo == HIPBLAS_FILL_MODE_LOWER),
                k specifies the number of sub-diagonals of each A_i.
              - k >= 0.

    @param[in]
    AP         device vector of device pointers storing each matrix A_i in banded format.

    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i.
              lda >= (k + 1).

    @param[inout]
    x         device vector of device pointers storing each input vector b_i. Overwritten by each output
              vector x_i.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStbsvBatched(hipblasHandle_t    handle,
                                                   hipblasFillMode_t  uplo,
                                                   hipblasOperation_t transA,
                                                   hipblasDiagType_t  diag,
                                                   int                n,
                                                   int                k,
                                                   const float* const AP[],
                                                   int                lda,
                                                   float* const       x[],
                                                   int                incx,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtbsvBatched(hipblasHandle_t     handle,
                                                   hipblasFillMode_t   uplo,
                                                   hipblasOperation_t  transA,
                                                   hipblasDiagType_t   diag,
                                                   int                 n,
                                                   int                 k,
                                                   const double* const AP[],
                                                   int                 lda,
                                                   double* const       x[],
                                                   int                 incx,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtbsvBatched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   hipblasOperation_t      transA,
                                                   hipblasDiagType_t       diag,
                                                   int                     n,
                                                   int                     k,
                                                   const hipComplex* const AP[],
                                                   int                     lda,
                                                   hipComplex* const       x[],
                                                   int                     incx,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtbsvBatched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   hipblasOperation_t            transA,
                                                   hipblasDiagType_t             diag,
                                                   int                           n,
                                                   int                           k,
                                                   const hipDoubleComplex* const AP[],
                                                   int                           lda,
                                                   hipDoubleComplex* const       x[],
                                                   int                           incx,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStbsvBatched_64(hipblasHandle_t    handle,
                                                      hipblasFillMode_t  uplo,
                                                      hipblasOperation_t transA,
                                                      hipblasDiagType_t  diag,
                                                      int64_t            n,
                                                      int64_t            k,
                                                      const float* const AP[],
                                                      int64_t            lda,
                                                      float* const       x[],
                                                      int64_t            incx,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtbsvBatched_64(hipblasHandle_t     handle,
                                                      hipblasFillMode_t   uplo,
                                                      hipblasOperation_t  transA,
                                                      hipblasDiagType_t   diag,
                                                      int64_t             n,
                                                      int64_t             k,
                                                      const double* const AP[],
                                                      int64_t             lda,
                                                      double* const       x[],
                                                      int64_t             incx,
                                                      int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtbsvBatched_64(hipblasHandle_t         handle,
                                                      hipblasFillMode_t       uplo,
                                                      hipblasOperation_t      transA,
                                                      hipblasDiagType_t       diag,
                                                      int64_t                 n,
                                                      int64_t                 k,
                                                      const hipComplex* const AP[],
                                                      int64_t                 lda,
                                                      hipComplex* const       x[],
                                                      int64_t                 incx,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtbsvBatched_64(hipblasHandle_t               handle,
                                                      hipblasFillMode_t             uplo,
                                                      hipblasOperation_t            transA,
                                                      hipblasDiagType_t             diag,
                                                      int64_t                       n,
                                                      int64_t                       k,
                                                      const hipDoubleComplex* const AP[],
                                                      int64_t                       lda,
                                                      hipDoubleComplex* const       x[],
                                                      int64_t                       incx,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The tbsvStridedBatched functions solve:

         A_i*x_i = b_i or A_i**T*x_i = b_i or A_i**H*x_i = b_i,

    where ``x_i`` and ``b_i`` are vectors and ``A_i`` is a banded triangular matrix,
    for ``i`` = [1, ``batchCount``].

    The input vectors ``b_i`` are overwritten by the output vectors ``x_i``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A_i is a lower triangular matrix.

    @param[in]
    transA     [hipblasOperation_t]
               - HIPBLAS_OP_N: Solves A_i*x_i = b_i
               - HIPBLAS_OP_T: Solves A_i**T*x_i = b_i
               - HIPBLAS_OP_C: Solves A_i**H*x_i = b_i

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     each A_i is assumed to be unit triangular (that is, the diagonal elements
                                       of each A_i are not used in computations).
            - HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular.

    @param[in]
    n         [int]
              n specifies the number of rows of each b_i. n >= 0.
    @param[in]
    k         [int]
              - if(uplo == HIPBLAS_FILL_MODE_UPPER),
                k specifies the number of super-diagonals of each A_i.
              - if(uplo == HIPBLAS_FILL_MODE_LOWER),
                k specifies the number of sub-diagonals of each A_i.
              - k >= 0.

    @param[in]
    AP         device pointer pointing to the first banded matrix A_1.

    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i.
              lda >= (k + 1).
    @param[in]
    strideA  [hipblasStride]
              specifies the distance between the start of one matrix (A_i) and the next (A_i+1).

    @param[inout]
    x         device pointer pointing to the first input vector b_1. Overwritten by output vectors x.
    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex  [hipblasStride]
              specifies the distance between the start of one vector (x_i) and the next (x_i+1).
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStbsvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          int                k,
                                                          const float*       AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          float*             x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtbsvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          int                k,
                                                          const double*      AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          double*            x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtbsvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          int                k,
                                                          const hipComplex*  AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          hipComplex*        x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtbsvStridedBatched(hipblasHandle_t         handle,
                                                          hipblasFillMode_t       uplo,
                                                          hipblasOperation_t      transA,
                                                          hipblasDiagType_t       diag,
                                                          int                     n,
                                                          int                     k,
                                                          const hipDoubleComplex* AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          hipDoubleComplex*       x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStbsvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             int64_t            k,
                                                             const float*       AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             float*             x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtbsvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             int64_t            k,
                                                             const double*      AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             double*            x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtbsvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             int64_t            k,
                                                             const hipComplex*  AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             hipComplex*        x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtbsvStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasFillMode_t       uplo,
                                                             hipblasOperation_t      transA,
                                                             hipblasDiagType_t       diag,
                                                             int64_t                 n,
                                                             int64_t                 k,
                                                             const hipDoubleComplex* AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             hipDoubleComplex*       x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The tpmv functions perform one of the matrix-vector operations:

         x = A*x or x = A**T*x,

    where ``x`` is an ``n``-element vector and ``A`` is an ``n`` by ``n`` unit, or non-unit, upper or lower triangular matrix, supplied in the pack form.

    The vector ``x`` is overwritten.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A is a  lower triangular matrix.

    @param[in]
    transA     [hipblasOperation_t]

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     A is assumed to be unit triangular.
            - HIPBLAS_DIAG_NON_UNIT:  A is not assumed to be unit triangular.

    @param[in]
    n       [int]
            n specifies the number of rows of A. n >= 0.

    @param[in]
    AP       device pointer storing matrix A,
            of dimension at least ( n * ( n + 1 ) / 2 ).
          - Before entry with uplo = HIPBLAS_FILL_MODE_UPPER, the array A
          must contain the upper triangular matrix packed sequentially,
          column by column, so that A[0] contains a_{0,0}, A[1] and A[2] contain
          a_{0,1} and a_{1, 1} respectively, and so on.
          - Before entry with uplo = HIPBLAS_FILL_MODE_LOWER, the array A
          must contain the lower triangular matrix packed sequentially,
          column by column, so that A[0] contains a_{0,0}, A[1] and A[2] contain
          a_{1,0} and a_{2,0} respectively, and so on.
          - Note that when DIAG = HIPBLAS_DIAG_UNIT, the diagonal elements of A are
          not referenced, but are assumed to be unity.

    @param[in]
    x       device pointer storing vector x.

    @param[in]
    incx    [int]
            specifies the increment for the elements of x. incx must not be zero.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStpmv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            const float*       AP,
                                            float*             x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtpmv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            const double*      AP,
                                            double*            x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtpmv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            const hipComplex*  AP,
                                            hipComplex*        x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtpmv(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            hipblasOperation_t      transA,
                                            hipblasDiagType_t       diag,
                                            int                     n,
                                            const hipDoubleComplex* AP,
                                            hipDoubleComplex*       x,
                                            int                     incx);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStpmv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               const float*       AP,
                                               float*             x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtpmv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               const double*      AP,
                                               double*            x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtpmv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               const hipComplex*  AP,
                                               hipComplex*        x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtpmv_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               hipblasOperation_t      transA,
                                               hipblasDiagType_t       diag,
                                               int64_t                 n,
                                               const hipDoubleComplex* AP,
                                               hipDoubleComplex*       x,
                                               int64_t                 incx);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The tpmvBatched functions perform one of the matrix-vector operations:

         x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount

    where ``x_i`` is an ``n``-element vector and ``A_i`` is an ``n`` by ``n`` (unit, or non-unit, upper or lower triangular matrix).

    The vectors ``x_i`` are overwritten.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A_i is a  lower triangular matrix.

    @param[in]
    transA     [hipblasOperation_t]

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     A_i is assumed to be unit triangular.
            - HIPBLAS_DIAG_NON_UNIT:  A_i is not assumed to be unit triangular.

    @param[in]
    n         [int]
              n specifies the number of rows of matrices A_i. n >= 0.

    @param[in]
    AP         device pointer storing pointer of matrices A_i
              of dimension ( lda, n ).

    @param[in]
    x         device pointer storing vectors x_i.

    @param[in]
    incx      [int]
              specifies the increment for the elements of vectors x_i.

    @param[in]
    batchCount [int]
              The number of batched matrices/vectors.


    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStpmvBatched(hipblasHandle_t    handle,
                                                   hipblasFillMode_t  uplo,
                                                   hipblasOperation_t transA,
                                                   hipblasDiagType_t  diag,
                                                   int                n,
                                                   const float* const AP[],
                                                   float* const       x[],
                                                   int                incx,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtpmvBatched(hipblasHandle_t     handle,
                                                   hipblasFillMode_t   uplo,
                                                   hipblasOperation_t  transA,
                                                   hipblasDiagType_t   diag,
                                                   int                 n,
                                                   const double* const AP[],
                                                   double* const       x[],
                                                   int                 incx,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtpmvBatched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   hipblasOperation_t      transA,
                                                   hipblasDiagType_t       diag,
                                                   int                     n,
                                                   const hipComplex* const AP[],
                                                   hipComplex* const       x[],
                                                   int                     incx,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtpmvBatched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   hipblasOperation_t            transA,
                                                   hipblasDiagType_t             diag,
                                                   int                           n,
                                                   const hipDoubleComplex* const AP[],
                                                   hipDoubleComplex* const       x[],
                                                   int                           incx,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStpmvBatched_64(hipblasHandle_t    handle,
                                                      hipblasFillMode_t  uplo,
                                                      hipblasOperation_t transA,
                                                      hipblasDiagType_t  diag,
                                                      int64_t            n,
                                                      const float* const AP[],
                                                      float* const       x[],
                                                      int64_t            incx,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtpmvBatched_64(hipblasHandle_t     handle,
                                                      hipblasFillMode_t   uplo,
                                                      hipblasOperation_t  transA,
                                                      hipblasDiagType_t   diag,
                                                      int64_t             n,
                                                      const double* const AP[],
                                                      double* const       x[],
                                                      int64_t             incx,
                                                      int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtpmvBatched_64(hipblasHandle_t         handle,
                                                      hipblasFillMode_t       uplo,
                                                      hipblasOperation_t      transA,
                                                      hipblasDiagType_t       diag,
                                                      int64_t                 n,
                                                      const hipComplex* const AP[],
                                                      hipComplex* const       x[],
                                                      int64_t                 incx,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtpmvBatched_64(hipblasHandle_t               handle,
                                                      hipblasFillMode_t             uplo,
                                                      hipblasOperation_t            transA,
                                                      hipblasDiagType_t             diag,
                                                      int64_t                       n,
                                                      const hipDoubleComplex* const AP[],
                                                      hipDoubleComplex* const       x[],
                                                      int64_t                       incx,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief  <b> BLAS Level 2 API </b>

    \details
    The tpmvStridedBatched functions perform one of the matrix-vector operations:

         x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount

    where ``x_i`` is an n element vector and ``A_i`` is an ``n`` by ``n`` (unit, or non-unit, upper or lower triangular matrix),
    with strides specifying how to retrieve ``$x_i$`` (resp. ``$A_i$``) from ``$x_{i-1}$`` (resp. ``$A_i$``).

    The vectors ``x_i`` are overwritten.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A_i is a lower triangular matrix.

    @param[in]
    transA     [hipblasOperation_t]

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     A_i is assumed to be unit triangular.
            - HIPBLAS_DIAG_NON_UNIT:  A_i is not assumed to be unit triangular.

    @param[in]
    n         [int]
              n specifies the number of rows of matrices A_i. n >= 0.

    @param[in]
    AP         device pointer of the matrix A_0
              of dimension ( lda, n ).

    @param[in]
    strideA  [hipblasStride]
              stride from the start of one A_i matrix to the next A_{i + 1}.

    @param[in]
    x         device pointer storing the vector x_0.

    @param[in]
    incx      [int]
              specifies the increment for the elements of one vector x.

    @param[in]
    stridex  [hipblasStride]
              stride from the start of one x_i vector to the next x_{i + 1}.

    @param[in]
    batchCount [int]
              The number of batched matrices/vectors.


    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStpmvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          const float*       AP,
                                                          hipblasStride      strideA,
                                                          float*             x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtpmvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          const double*      AP,
                                                          hipblasStride      strideA,
                                                          double*            x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtpmvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          const hipComplex*  AP,
                                                          hipblasStride      strideA,
                                                          hipComplex*        x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtpmvStridedBatched(hipblasHandle_t         handle,
                                                          hipblasFillMode_t       uplo,
                                                          hipblasOperation_t      transA,
                                                          hipblasDiagType_t       diag,
                                                          int                     n,
                                                          const hipDoubleComplex* AP,
                                                          hipblasStride           strideA,
                                                          hipDoubleComplex*       x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStpmvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             const float*       AP,
                                                             hipblasStride      strideA,
                                                             float*             x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtpmvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             const double*      AP,
                                                             hipblasStride      strideA,
                                                             double*            x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtpmvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             const hipComplex*  AP,
                                                             hipblasStride      strideA,
                                                             hipComplex*        x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtpmvStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasFillMode_t       uplo,
                                                             hipblasOperation_t      transA,
                                                             hipblasDiagType_t       diag,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* AP,
                                                             hipblasStride           strideA,
                                                             hipDoubleComplex*       x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The tpsv functions solve:

         A*x = b or A**T*x = b, or A**H*x = b,

    where ``x`` and ``b`` are vectors and ``A`` is a triangular matrix stored in the packed format.

    The input vector ``b`` is overwritten by the output vector ``x``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A is a lower triangular matrix.

    @param[in]
    transA  [hipblasOperation_t]
            - HIPBLAS_OP_N: Solves A*x = b
            - HIPBLAS_OP_T: Solves A**T*x = b
            - HIPBLAS_OP_C: Solves A**H*x = b

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     A is assumed to be unit triangular (that is, the diagonal elements
                                       of A are not used in computations).
            - HIPBLAS_DIAG_NON_UNIT: A is not assumed to be unit triangular.

    @param[in]
    n         [int]
              n specifies the number of rows of b. n >= 0.

    @param[in]
    AP        device pointer storing the packed version of matrix A
              of dimension >= (n * (n + 1) / 2).

    @param[inout]
    x         device pointer storing vector b on input, overwritten by x on output.

    @param[in]
    incx      [int]
              specifies the increment for the elements of x.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStpsv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            const float*       AP,
                                            float*             x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtpsv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            const double*      AP,
                                            double*            x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtpsv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            const hipComplex*  AP,
                                            hipComplex*        x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtpsv(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            hipblasOperation_t      transA,
                                            hipblasDiagType_t       diag,
                                            int                     n,
                                            const hipDoubleComplex* AP,
                                            hipDoubleComplex*       x,
                                            int                     incx);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStpsv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               const float*       AP,
                                               float*             x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtpsv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               const double*      AP,
                                               double*            x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtpsv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               const hipComplex*  AP,
                                               hipComplex*        x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtpsv_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               hipblasOperation_t      transA,
                                               hipblasDiagType_t       diag,
                                               int64_t                 n,
                                               const hipDoubleComplex* AP,
                                               hipDoubleComplex*       x,
                                               int64_t                 incx);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The tpsvBatched functions solve:

         A_i*x_i = b_i or A_i**T*x_i = b_i, or A_i**H*x_i = b_i,

    where ``x_i`` and ``b_i`` are vectors and ``A_i`` is a triangular matrix stored in the packed format,
    for ``i`` in [1, ``batchCount``].

    The input vectors ``b_i`` are overwritten by the output vectors ``x_i``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  each A_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  each A_i is a  lower triangular matrix.

    @param[in]
    transA  [hipblasOperation_t]
            - HIPBLAS_OP_N: Solves A*x = b
            - HIPBLAS_OP_T: Solves A**T*x = b
            - HIPBLAS_OP_C: Solves A**H*x = b

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     each A_i is assumed to be unit triangular (that is, the diagonal elements
                                       of each A_i are not used in computations).
            - HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular.

    @param[in]
    n         [int]
              n specifies the number of rows of each b_i. n >= 0.

    @param[in]
    AP        device array of device pointers storing the packed versions of each matrix A_i
              of dimension >= (n * (n + 1) / 2).

    @param[inout]
    x         device array of device pointers storing each input vector b_i, overwritten by x_i on output.

    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    batchCount [int]
                specifies the number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStpsvBatched(hipblasHandle_t    handle,
                                                   hipblasFillMode_t  uplo,
                                                   hipblasOperation_t transA,
                                                   hipblasDiagType_t  diag,
                                                   int                n,
                                                   const float* const AP[],
                                                   float* const       x[],
                                                   int                incx,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtpsvBatched(hipblasHandle_t     handle,
                                                   hipblasFillMode_t   uplo,
                                                   hipblasOperation_t  transA,
                                                   hipblasDiagType_t   diag,
                                                   int                 n,
                                                   const double* const AP[],
                                                   double* const       x[],
                                                   int                 incx,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtpsvBatched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   hipblasOperation_t      transA,
                                                   hipblasDiagType_t       diag,
                                                   int                     n,
                                                   const hipComplex* const AP[],
                                                   hipComplex* const       x[],
                                                   int                     incx,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtpsvBatched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   hipblasOperation_t            transA,
                                                   hipblasDiagType_t             diag,
                                                   int                           n,
                                                   const hipDoubleComplex* const AP[],
                                                   hipDoubleComplex* const       x[],
                                                   int                           incx,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStpsvBatched_64(hipblasHandle_t    handle,
                                                      hipblasFillMode_t  uplo,
                                                      hipblasOperation_t transA,
                                                      hipblasDiagType_t  diag,
                                                      int64_t            n,
                                                      const float* const AP[],
                                                      float* const       x[],
                                                      int64_t            incx,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtpsvBatched_64(hipblasHandle_t     handle,
                                                      hipblasFillMode_t   uplo,
                                                      hipblasOperation_t  transA,
                                                      hipblasDiagType_t   diag,
                                                      int64_t             n,
                                                      const double* const AP[],
                                                      double* const       x[],
                                                      int64_t             incx,
                                                      int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtpsvBatched_64(hipblasHandle_t         handle,
                                                      hipblasFillMode_t       uplo,
                                                      hipblasOperation_t      transA,
                                                      hipblasDiagType_t       diag,
                                                      int64_t                 n,
                                                      const hipComplex* const AP[],
                                                      hipComplex* const       x[],
                                                      int64_t                 incx,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtpsvBatched_64(hipblasHandle_t               handle,
                                                      hipblasFillMode_t             uplo,
                                                      hipblasOperation_t            transA,
                                                      hipblasDiagType_t             diag,
                                                      int64_t                       n,
                                                      const hipDoubleComplex* const AP[],
                                                      hipDoubleComplex* const       x[],
                                                      int64_t                       incx,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The tpsvStridedBatched functions solve:

         A_i*x_i = b_i or A_i**T*x_i = b_i, or A_i**H*x_i = b_i,

    where ``x_i`` and ``b_i`` are vectors and ``A_i`` is a triangular matrix stored in the packed format,
    for ``i`` in [1, ``batchCount``].

    The input vectors ``b_i`` are overwritten by the output vectors ``x_i``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  each A_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  each A_i is a  lower triangular matrix.

    @param[in]
    transA  [hipblasOperation_t]
            - HIPBLAS_OP_N: Solves A*x = b
            - HIPBLAS_OP_T: Solves A**T*x = b
            - HIPBLAS_OP_C: Solves A**H*x = b

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     each A_i is assumed to be unit triangular (that is, the diagonal elements
                                       of each A_i are not used in computations).
            - HIPBLAS_DIAG_NON_UNIT: each A_i is not assumed to be unit triangular.

    @param[in]
    n         [int]
              n specifies the number of rows of each b_i. n >= 0.

    @param[in]
    AP        device pointer pointing to the first packed matrix A_1
              of dimension >= (n * (n + 1) / 2).

    @param[in]
    strideA  [hipblasStride]
              stride from the beginning of one packed matrix (AP_i) to the next (AP_i+1).

    @param[inout]
    x         device pointer pointing to the first input vector b_1. Overwritten by each x_i on output.

    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.
    @param[in]
    stridex  [hipblasStride]
              stride from the beginning of one vector (x_i) to the next (x_i+1).
    @param[in]
    batchCount [int]
                specifies the number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStpsvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          const float*       AP,
                                                          hipblasStride      strideA,
                                                          float*             x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtpsvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          const double*      AP,
                                                          hipblasStride      strideA,
                                                          double*            x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtpsvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          const hipComplex*  AP,
                                                          hipblasStride      strideA,
                                                          hipComplex*        x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtpsvStridedBatched(hipblasHandle_t         handle,
                                                          hipblasFillMode_t       uplo,
                                                          hipblasOperation_t      transA,
                                                          hipblasDiagType_t       diag,
                                                          int                     n,
                                                          const hipDoubleComplex* AP,
                                                          hipblasStride           strideA,
                                                          hipDoubleComplex*       x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStpsvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             const float*       AP,
                                                             hipblasStride      strideA,
                                                             float*             x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtpsvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             const double*      AP,
                                                             hipblasStride      strideA,
                                                             double*            x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtpsvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             const hipComplex*  AP,
                                                             hipblasStride      strideA,
                                                             hipComplex*        x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtpsvStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasFillMode_t       uplo,
                                                             hipblasOperation_t      transA,
                                                             hipblasDiagType_t       diag,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* AP,
                                                             hipblasStride           strideA,
                                                             hipDoubleComplex*       x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The trmv functions perform one of the matrix-vector operations:

         x = A*x or x = A**T*x,

    where ``x`` is an ``n``-element vector and ``A`` is an ``n`` by ``n`` unit, or non-unit, upper or lower triangular matrix.

    The vector ``x`` is overwritten.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A is a lower triangular matrix.

    @param[in]
    transA     [hipblasOperation_t]

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     A is assumed to be unit triangular.
            - HIPBLAS_DIAG_NON_UNIT:  A is not assumed to be unit triangular.

    @param[in]
    n         [int]
              n specifies the number of rows of A. n >= 0.

    @param[in]
    AP        device pointer storing matrix A,
              of dimension ( lda, n ).

    @param[in]
    lda       [int]
              specifies the leading dimension of A.
              lda = max( 1, n ).

    @param[in]
    x         device pointer storing vector x.

    @param[in]
    incx      [int]
              specifies the increment for the elements of x.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStrmv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            const float*       AP,
                                            int                lda,
                                            float*             x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtrmv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            const double*      AP,
                                            int                lda,
                                            double*            x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtrmv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            const hipComplex*  AP,
                                            int                lda,
                                            hipComplex*        x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtrmv(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            hipblasOperation_t      transA,
                                            hipblasDiagType_t       diag,
                                            int                     n,
                                            const hipDoubleComplex* AP,
                                            int                     lda,
                                            hipDoubleComplex*       x,
                                            int                     incx);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStrmv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               const float*       AP,
                                               int64_t            lda,
                                               float*             x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtrmv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               const double*      AP,
                                               int64_t            lda,
                                               double*            x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtrmv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               const hipComplex*  AP,
                                               int64_t            lda,
                                               hipComplex*        x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtrmv_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               hipblasOperation_t      transA,
                                               hipblasDiagType_t       diag,
                                               int64_t                 n,
                                               const hipDoubleComplex* AP,
                                               int64_t                 lda,
                                               hipDoubleComplex*       x,
                                               int64_t                 incx);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The trmvBatched functions perform one of the matrix-vector operations:

         x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount

    where ``x_i`` is an ``n``-element vector and ``A_i`` is an ``n`` by ``n`` (unit, or non-unit, upper or lower triangular) matrix.

    The vectors ``x_i`` are overwritten.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A_i is a lower triangular matrix.

    @param[in]
    transA     [hipblasOperation_t]

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     A_i is assumed to be unit triangular.
            - HIPBLAS_DIAG_NON_UNIT:  A_i is not assumed to be unit triangular.

    @param[in]
    n         [int]
              n specifies the number of rows of matrices A_i. n >= 0.

    @param[in]
    AP        device pointer storing pointer of matrices A_i,
              of dimension ( lda, n ).

    @param[in]
    lda       [int]
              specifies the leading dimension of A_i.
              lda >= max( 1, n ).

    @param[in]
    x         device pointer storing vectors x_i.

    @param[in]
    incx      [int]
              specifies the increment for the elements of vectors x_i.

    @param[in]
    batchCount [int]
              The number of batched matrices/vectors.


    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStrmvBatched(hipblasHandle_t    handle,
                                                   hipblasFillMode_t  uplo,
                                                   hipblasOperation_t transA,
                                                   hipblasDiagType_t  diag,
                                                   int                n,
                                                   const float* const AP[],
                                                   int                lda,
                                                   float* const       x[],
                                                   int                incx,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtrmvBatched(hipblasHandle_t     handle,
                                                   hipblasFillMode_t   uplo,
                                                   hipblasOperation_t  transA,
                                                   hipblasDiagType_t   diag,
                                                   int                 n,
                                                   const double* const AP[],
                                                   int                 lda,
                                                   double* const       x[],
                                                   int                 incx,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtrmvBatched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   hipblasOperation_t      transA,
                                                   hipblasDiagType_t       diag,
                                                   int                     n,
                                                   const hipComplex* const AP[],
                                                   int                     lda,
                                                   hipComplex* const       x[],
                                                   int                     incx,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtrmvBatched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   hipblasOperation_t            transA,
                                                   hipblasDiagType_t             diag,
                                                   int                           n,
                                                   const hipDoubleComplex* const AP[],
                                                   int                           lda,
                                                   hipDoubleComplex* const       x[],
                                                   int                           incx,
                                                   int                           batchCount);
// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStrmvBatched_64(hipblasHandle_t    handle,
                                                      hipblasFillMode_t  uplo,
                                                      hipblasOperation_t transA,
                                                      hipblasDiagType_t  diag,
                                                      int64_t            n,
                                                      const float* const AP[],
                                                      int64_t            lda,
                                                      float* const       x[],
                                                      int64_t            incx,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtrmvBatched_64(hipblasHandle_t     handle,
                                                      hipblasFillMode_t   uplo,
                                                      hipblasOperation_t  transA,
                                                      hipblasDiagType_t   diag,
                                                      int64_t             n,
                                                      const double* const AP[],
                                                      int64_t             lda,
                                                      double* const       x[],
                                                      int64_t             incx,
                                                      int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtrmvBatched_64(hipblasHandle_t         handle,
                                                      hipblasFillMode_t       uplo,
                                                      hipblasOperation_t      transA,
                                                      hipblasDiagType_t       diag,
                                                      int64_t                 n,
                                                      const hipComplex* const AP[],
                                                      int64_t                 lda,
                                                      hipComplex* const       x[],
                                                      int64_t                 incx,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtrmvBatched_64(hipblasHandle_t               handle,
                                                      hipblasFillMode_t             uplo,
                                                      hipblasOperation_t            transA,
                                                      hipblasDiagType_t             diag,
                                                      int64_t                       n,
                                                      const hipDoubleComplex* const AP[],
                                                      int64_t                       lda,
                                                      hipDoubleComplex* const       x[],
                                                      int64_t                       incx,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief  <b> BLAS Level 2 API </b>

    \details
    The trmvStridedBatched functions perform one of the matrix-vector operations:

         x_i = A_i*x_i or x_i = A**T*x_i, 0 \le i < batchCount

    where ``x_i``is an ``n``-element vector and ``A_i`` is an ``n`` by ``n`` (unit, or non-unit, upper or lower triangular) matrix,
    with strides specifying how to retrieve ``$x_i$`` (resp. ``$A_i$``) from ``$x_{i-1}$`` (resp. ``$A_i$``).

    The vectors ``x_i`` are overwritten.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A_i is a lower triangular matrix.

    @param[in]
    transA     [hipblasOperation_t]

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     A_i is assumed to be unit triangular.
            - HIPBLAS_DIAG_NON_UNIT:  A_i is not assumed to be unit triangular.

    @param[in]
    n         [int]
              n specifies the number of rows of matrices A_i. n >= 0.

    @param[in]
    AP        device pointer of the matrix A_0,
              of dimension ( lda, n ).

    @param[in]
    lda       [int]
              specifies the leading dimension of A_i.
              lda >= max( 1, n ).

    @param[in]
    strideA  [hipblasStride]
              stride from the start of one A_i matrix to the next A_{i + 1}.

    @param[in]
    x         device pointer storing the vector x_0.

    @param[in]
    incx      [int]
              specifies the increment for the elements of one vector x.

    @param[in]
    stridex  [hipblasStride]
              stride from the start of one x_i vector to the next x_{i + 1}.

    @param[in]
    batchCount [int]
              The number of batched matrices/vectors.


    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStrmvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          const float*       AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          float*             x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtrmvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          const double*      AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          double*            x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtrmvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          const hipComplex*  AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          hipComplex*        x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtrmvStridedBatched(hipblasHandle_t         handle,
                                                          hipblasFillMode_t       uplo,
                                                          hipblasOperation_t      transA,
                                                          hipblasDiagType_t       diag,
                                                          int                     n,
                                                          const hipDoubleComplex* AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          hipDoubleComplex*       x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStrmvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             const float*       AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             float*             x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtrmvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             const double*      AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             double*            x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtrmvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             const hipComplex*  AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             hipComplex*        x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtrmvStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasFillMode_t       uplo,
                                                             hipblasOperation_t      transA,
                                                             hipblasDiagType_t       diag,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             hipDoubleComplex*       x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The trsv functions solve:

         A*x = b or A**T*x = b,

    where ``x`` and ``b`` are vectors and ``A`` is a triangular matrix.

    The vector ``x`` is overwritten on ``b``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A is a lower triangular matrix.

    @param[in]
    transA     [hipblasOperation_t]

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     A is assumed to be unit triangular.
            - HIPBLAS_DIAG_NON_UNIT:  A is not assumed to be unit triangular.

    @param[in]
    n         [int]
              n specifies the number of rows of b. n >= 0.

    @param[in]
    AP        device pointer storing matrix A,
              of dimension ( lda, n ).

    @param[in]
    lda       [int]
              specifies the leading dimension of A.
              lda = max( 1, n ).

    @param[in]
    x         device pointer storing vector x.

    @param[in]
    incx      [int]
              specifies the increment for the elements of x.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStrsv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            const float*       AP,
                                            int                lda,
                                            float*             x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            const double*      AP,
                                            int                lda,
                                            double*            x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsv(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            hipblasDiagType_t  diag,
                                            int                n,
                                            const hipComplex*  AP,
                                            int                lda,
                                            hipComplex*        x,
                                            int                incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsv(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            hipblasOperation_t      transA,
                                            hipblasDiagType_t       diag,
                                            int                     n,
                                            const hipDoubleComplex* AP,
                                            int                     lda,
                                            hipDoubleComplex*       x,
                                            int                     incx);
// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStrsv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               const float*       AP,
                                               int64_t            lda,
                                               float*             x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               const double*      AP,
                                               int64_t            lda,
                                               double*            x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsv_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               hipblasDiagType_t  diag,
                                               int64_t            n,
                                               const hipComplex*  AP,
                                               int64_t            lda,
                                               hipComplex*        x,
                                               int64_t            incx);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsv_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               hipblasOperation_t      transA,
                                               hipblasDiagType_t       diag,
                                               int64_t                 n,
                                               const hipDoubleComplex* AP,
                                               int64_t                 lda,
                                               hipDoubleComplex*       x,
                                               int64_t                 incx);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The trsvBatched functions solve:

         A_i*x_i = b_i or A_i**T*x_i = b_i,

    where ``(A_i, x_i, b_i)`` is the ``i``-th instance of the batch,
    ``x_i`` and ``b_i`` are vectors, and ``A_i`` is an
    ``n`` by ``n`` triangular matrix.

    The vector ``x`` is overwritten on ``b``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A is a lower triangular matrix.

    @param[in]
    transA     [hipblasOperation_t]

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     A is assumed to be unit triangular.
            - HIPBLAS_DIAG_NON_UNIT:  A is not assumed to be unit triangular.

    @param[in]
    n         [int]
              n specifies the number of rows of b. n >= 0.

    @param[in]
    AP         device array of device pointers storing each matrix A_i.

    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i.
              lda = max(1, n).

    @param[in]
    x         device array of device pointers storing each vector x_i.

    @param[in]
    incx      [int]
              specifies the increment for the elements of x.

    @param[in]
    batchCount [int]
                number of instances in the batch

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStrsvBatched(hipblasHandle_t    handle,
                                                   hipblasFillMode_t  uplo,
                                                   hipblasOperation_t transA,
                                                   hipblasDiagType_t  diag,
                                                   int                n,
                                                   const float* const AP[],
                                                   int                lda,
                                                   float* const       x[],
                                                   int                incx,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsvBatched(hipblasHandle_t     handle,
                                                   hipblasFillMode_t   uplo,
                                                   hipblasOperation_t  transA,
                                                   hipblasDiagType_t   diag,
                                                   int                 n,
                                                   const double* const AP[],
                                                   int                 lda,
                                                   double* const       x[],
                                                   int                 incx,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsvBatched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   hipblasOperation_t      transA,
                                                   hipblasDiagType_t       diag,
                                                   int                     n,
                                                   const hipComplex* const AP[],
                                                   int                     lda,
                                                   hipComplex* const       x[],
                                                   int                     incx,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsvBatched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   hipblasOperation_t            transA,
                                                   hipblasDiagType_t             diag,
                                                   int                           n,
                                                   const hipDoubleComplex* const AP[],
                                                   int                           lda,
                                                   hipDoubleComplex* const       x[],
                                                   int                           incx,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStrsvBatched_64(hipblasHandle_t    handle,
                                                      hipblasFillMode_t  uplo,
                                                      hipblasOperation_t transA,
                                                      hipblasDiagType_t  diag,
                                                      int64_t            n,
                                                      const float* const AP[],
                                                      int64_t            lda,
                                                      float* const       x[],
                                                      int64_t            incx,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsvBatched_64(hipblasHandle_t     handle,
                                                      hipblasFillMode_t   uplo,
                                                      hipblasOperation_t  transA,
                                                      hipblasDiagType_t   diag,
                                                      int64_t             n,
                                                      const double* const AP[],
                                                      int64_t             lda,
                                                      double* const       x[],
                                                      int64_t             incx,
                                                      int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsvBatched_64(hipblasHandle_t         handle,
                                                      hipblasFillMode_t       uplo,
                                                      hipblasOperation_t      transA,
                                                      hipblasDiagType_t       diag,
                                                      int64_t                 n,
                                                      const hipComplex* const AP[],
                                                      int64_t                 lda,
                                                      hipComplex* const       x[],
                                                      int64_t                 incx,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsvBatched_64(hipblasHandle_t               handle,
                                                      hipblasFillMode_t             uplo,
                                                      hipblasOperation_t            transA,
                                                      hipblasDiagType_t             diag,
                                                      int64_t                       n,
                                                      const hipDoubleComplex* const AP[],
                                                      int64_t                       lda,
                                                      hipDoubleComplex* const       x[],
                                                      int64_t                       incx,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 2 API </b>

    \details
    The trsvStridedBatched functions solve:

         A_i*x_i = b_i or A_i**T*x_i = b_i,

    where ``(A_i, x_i, b_i)`` is the ``i``-th instance of the batch,
    ``x_i`` and ``b_i`` are vectors, and ``A_i`` is an ``n`` by ``n`` triangular matrix, for ``i`` = 1, ..., ``batchCount``.

    The vector ``x`` is overwritten on ``b``.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A is a lower triangular matrix.

    @param[in]
    transA     [hipblasOperation_t]

    @param[in]
    diag    [hipblasDiagType_t]
            - HIPBLAS_DIAG_UNIT:     A is assumed to be unit triangular.
            - HIPBLAS_DIAG_NON_UNIT:  A is not assumed to be unit triangular.

    @param[in]
    n         [int]
              n specifies the number of rows of each b_i. n >= 0.

    @param[in]
    AP         device pointer to the first matrix (A_1) in the batch, of dimension ( lda, n ).

    @param[in]
    strideA  [hipblasStride]
              stride from the start of one A_i matrix to the next A_(i + 1).

    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i.
              lda = max( 1, n ).

    @param[in, out]
    x         device pointer to the first vector (x_1) in the batch.

    @param[in]
    stridex [hipblasStride]
             stride from the start of one x_i vector to the next x_(i + 1).

    @param[in]
    incx      [int]
              specifies the increment for the elements of each x_i.

    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasStrsvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          const float*       AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          float*             x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          const double*      AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          double*            x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsvStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          hipblasDiagType_t  diag,
                                                          int                n,
                                                          const hipComplex*  AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          hipComplex*        x,
                                                          int                incx,
                                                          hipblasStride      stridex,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsvStridedBatched(hipblasHandle_t         handle,
                                                          hipblasFillMode_t       uplo,
                                                          hipblasOperation_t      transA,
                                                          hipblasDiagType_t       diag,
                                                          int                     n,
                                                          const hipDoubleComplex* AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          hipDoubleComplex*       x,
                                                          int                     incx,
                                                          hipblasStride           stridex,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasStrsvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             const float*       AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             float*             x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDtrsvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             const double*      AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             double*            x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCtrsvStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             hipblasDiagType_t  diag,
                                                             int64_t            n,
                                                             const hipComplex*  AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             hipComplex*        x,
                                                             int64_t            incx,
                                                             hipblasStride      stridex,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZtrsvStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasFillMode_t       uplo,
                                                             hipblasOperation_t      transA,
                                                             hipblasDiagType_t       diag,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             hipDoubleComplex*       x,
                                                             int64_t                 incx,
                                                             hipblasStride           stridex,
                                                             int64_t                 batchCount);
//! @}

/*
 * ===========================================================================
 *    level 3 BLAS
 * ===========================================================================
 */

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details
    The gemm functions perform one of the matrix-matrix operations:

        C = alpha*op( A )*op( B ) + beta*C,

    where op( X ) is one of:

        op( X ) = X      or
        op( X ) = X**T   or
        op( X ) = X**H,

    ``alpha`` and ``beta`` are scalars, and ``A``, ``B``, and ``C`` are matrices, with
    ``op( A )`` an ``m`` by ``k`` matrix, ``op( B )`` a ``k`` by ``n`` matrix, and ``C`` an ``m`` by ``n`` matrix.

    - Supported precisions in rocBLAS : ``h``, ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``h``, ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    transA    [hipblasOperation_t]
              specifies the form of op( A ).
    @param[in]
    transB    [hipblasOperation_t]
              specifies the form of op( B ).
    @param[in]
    m         [int]
              number or rows of matrices op( A ) and C.
    @param[in]
    n         [int]
              number of columns of matrices op( B ) and C.
    @param[in]
    k         [int]
              number of columns of matrix op( A ) and number of rows of matrix op( B ).
    @param[in]
    alpha     device pointer or host pointer specifying the scalar alpha.
    @param[in]
    AP         device pointer storing matrix A.
    @param[in]
    lda       [int]
              specifies the leading dimension of A.
    @param[in]
    BP         device pointer storing matrix B.
    @param[in]
    ldb       [int]
              specifies the leading dimension of B.
    @param[in]
    beta      device pointer or host pointer specifying the scalar beta.
    @param[in, out]
    CP         device pointer storing matrix C on the GPU.
    @param[in]
    ldc       [int]
              specifies the leading dimension of C.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasHgemm(hipblasHandle_t    handle,
                                            hipblasOperation_t transA,
                                            hipblasOperation_t transB,
                                            int                m,
                                            int                n,
                                            int                k,
                                            const hipblasHalf* alpha,
                                            const hipblasHalf* AP,
                                            int                lda,
                                            const hipblasHalf* BP,
                                            int                ldb,
                                            const hipblasHalf* beta,
                                            hipblasHalf*       CP,
                                            int                ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasSgemm(hipblasHandle_t    handle,
                                            hipblasOperation_t transA,
                                            hipblasOperation_t transB,
                                            int                m,
                                            int                n,
                                            int                k,
                                            const float*       alpha,
                                            const float*       AP,
                                            int                lda,
                                            const float*       BP,
                                            int                ldb,
                                            const float*       beta,
                                            float*             CP,
                                            int                ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgemm(hipblasHandle_t    handle,
                                            hipblasOperation_t transA,
                                            hipblasOperation_t transB,
                                            int                m,
                                            int                n,
                                            int                k,
                                            const double*      alpha,
                                            const double*      AP,
                                            int                lda,
                                            const double*      BP,
                                            int                ldb,
                                            const double*      beta,
                                            double*            CP,
                                            int                ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgemm(hipblasHandle_t    handle,
                                            hipblasOperation_t transA,
                                            hipblasOperation_t transB,
                                            int                m,
                                            int                n,
                                            int                k,
                                            const hipComplex*  alpha,
                                            const hipComplex*  AP,
                                            int                lda,
                                            const hipComplex*  BP,
                                            int                ldb,
                                            const hipComplex*  beta,
                                            hipComplex*        CP,
                                            int                ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgemm(hipblasHandle_t         handle,
                                            hipblasOperation_t      transA,
                                            hipblasOperation_t      transB,
                                            int                     m,
                                            int                     n,
                                            int                     k,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* AP,
                                            int                     lda,
                                            const hipDoubleComplex* BP,
                                            int                     ldb,
                                            const hipDoubleComplex* beta,
                                            hipDoubleComplex*       CP,
                                            int                     ldc);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasHgemm_64(hipblasHandle_t    handle,
                                               hipblasOperation_t transA,
                                               hipblasOperation_t transB,
                                               int64_t            m,
                                               int64_t            n,
                                               int64_t            k,
                                               const hipblasHalf* alpha,
                                               const hipblasHalf* AP,
                                               int64_t            lda,
                                               const hipblasHalf* BP,
                                               int64_t            ldb,
                                               const hipblasHalf* beta,
                                               hipblasHalf*       CP,
                                               int64_t            ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasSgemm_64(hipblasHandle_t    handle,
                                               hipblasOperation_t transA,
                                               hipblasOperation_t transB,
                                               int64_t            m,
                                               int64_t            n,
                                               int64_t            k,
                                               const float*       alpha,
                                               const float*       AP,
                                               int64_t            lda,
                                               const float*       BP,
                                               int64_t            ldb,
                                               const float*       beta,
                                               float*             CP,
                                               int64_t            ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgemm_64(hipblasHandle_t    handle,
                                               hipblasOperation_t transA,
                                               hipblasOperation_t transB,
                                               int64_t            m,
                                               int64_t            n,
                                               int64_t            k,
                                               const double*      alpha,
                                               const double*      AP,
                                               int64_t            lda,
                                               const double*      BP,
                                               int64_t            ldb,
                                               const double*      beta,
                                               double*            CP,
                                               int64_t            ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgemm_64(hipblasHandle_t    handle,
                                               hipblasOperation_t transA,
                                               hipblasOperation_t transB,
                                               int64_t            m,
                                               int64_t            n,
                                               int64_t            k,
                                               const hipComplex*  alpha,
                                               const hipComplex*  AP,
                                               int64_t            lda,
                                               const hipComplex*  BP,
                                               int64_t            ldb,
                                               const hipComplex*  beta,
                                               hipComplex*        CP,
                                               int64_t            ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgemm_64(hipblasHandle_t         handle,
                                               hipblasOperation_t      transA,
                                               hipblasOperation_t      transB,
                                               int64_t                 m,
                                               int64_t                 n,
                                               int64_t                 k,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* AP,
                                               int64_t                 lda,
                                               const hipDoubleComplex* BP,
                                               int64_t                 ldb,
                                               const hipDoubleComplex* beta,
                                               hipDoubleComplex*       CP,
                                               int64_t                 ldc);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details
    The gemmBatched functions perform one of the batched matrix-matrix operations:

         C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batchCount.

    where ``op( X )`` is one of:

         op( X ) = X      or
         op( X ) = X**T   or
         op( X ) = X**H,

    ``alpha`` and ``beta`` are scalars, and ``A``, ``B`` , and ``C`` are strided batched matrices, with
    ``op( A )`` an ``m`` by ``k`` by ``batchCount`` strided_batched matrix,
    ``op( B )`` a ``k`` by ``n`` by ``batchCount`` strided_batched matrix, and
    ``C`` an ``m`` by ``n`` by ``batchCount`` strided_batched matrix.

    - Supported precisions in rocBLAS : ``h``, ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``h``, ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    transA    [hipblasOperation_t]
              specifies the form of op( A ).
    @param[in]
    transB    [hipblasOperation_t]
              specifies the form of op( B ).
    @param[in]
    m         [int]
              matrix dimension m.
    @param[in]
    n         [int]
              matrix dimension n.
    @param[in]
    k         [int]
              matrix dimension k.
    @param[in]
    alpha     device pointer or host pointer specifying the scalar alpha.
    @param[in]
    AP         device array of device pointers storing each matrix A_i.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i.
    @param[in]
    BP         device array of device pointers storing each matrix B_i.
    @param[in]
    ldb       [int]
              specifies the leading dimension of each B_i.
    @param[in]
    beta      device pointer or host pointer specifying the scalar beta.
    @param[in, out]
    CP         device array of device pointers storing each matrix C_i.
    @param[in]
    ldc       [int]
              specifies the leading dimension of each C_i.
    @param[in]
    batchCount
              [int]
              number of gemm operations in the batch.
     ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasHgemmBatched(hipblasHandle_t          handle,
                                                   hipblasOperation_t       transA,
                                                   hipblasOperation_t       transB,
                                                   int                      m,
                                                   int                      n,
                                                   int                      k,
                                                   const hipblasHalf*       alpha,
                                                   const hipblasHalf* const AP[],
                                                   int                      lda,
                                                   const hipblasHalf* const BP[],
                                                   int                      ldb,
                                                   const hipblasHalf*       beta,
                                                   hipblasHalf* const       CP[],
                                                   int                      ldc,
                                                   int                      batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmBatched(hipblasHandle_t    handle,
                                                   hipblasOperation_t transA,
                                                   hipblasOperation_t transB,
                                                   int                m,
                                                   int                n,
                                                   int                k,
                                                   const float*       alpha,
                                                   const float* const AP[],
                                                   int                lda,
                                                   const float* const BP[],
                                                   int                ldb,
                                                   const float*       beta,
                                                   float* const       CP[],
                                                   int                ldc,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmBatched(hipblasHandle_t     handle,
                                                   hipblasOperation_t  transA,
                                                   hipblasOperation_t  transB,
                                                   int                 m,
                                                   int                 n,
                                                   int                 k,
                                                   const double*       alpha,
                                                   const double* const AP[],
                                                   int                 lda,
                                                   const double* const BP[],
                                                   int                 ldb,
                                                   const double*       beta,
                                                   double* const       CP[],
                                                   int                 ldc,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmBatched(hipblasHandle_t         handle,
                                                   hipblasOperation_t      transA,
                                                   hipblasOperation_t      transB,
                                                   int                     m,
                                                   int                     n,
                                                   int                     k,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const AP[],
                                                   int                     lda,
                                                   const hipComplex* const BP[],
                                                   int                     ldb,
                                                   const hipComplex*       beta,
                                                   hipComplex* const       CP[],
                                                   int                     ldc,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmBatched(hipblasHandle_t               handle,
                                                   hipblasOperation_t            transA,
                                                   hipblasOperation_t            transB,
                                                   int                           m,
                                                   int                           n,
                                                   int                           k,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const AP[],
                                                   int                           lda,
                                                   const hipDoubleComplex* const BP[],
                                                   int                           ldb,
                                                   const hipDoubleComplex*       beta,
                                                   hipDoubleComplex* const       CP[],
                                                   int                           ldc,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasHgemmBatched_64(hipblasHandle_t          handle,
                                                      hipblasOperation_t       transA,
                                                      hipblasOperation_t       transB,
                                                      int64_t                  m,
                                                      int64_t                  n,
                                                      int64_t                  k,
                                                      const hipblasHalf*       alpha,
                                                      const hipblasHalf* const AP[],
                                                      int64_t                  lda,
                                                      const hipblasHalf* const BP[],
                                                      int64_t                  ldb,
                                                      const hipblasHalf*       beta,
                                                      hipblasHalf* const       CP[],
                                                      int64_t                  ldc,
                                                      int64_t                  batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmBatched_64(hipblasHandle_t    handle,
                                                      hipblasOperation_t transA,
                                                      hipblasOperation_t transB,
                                                      int64_t            m,
                                                      int64_t            n,
                                                      int64_t            k,
                                                      const float*       alpha,
                                                      const float* const AP[],
                                                      int64_t            lda,
                                                      const float* const BP[],
                                                      int64_t            ldb,
                                                      const float*       beta,
                                                      float* const       CP[],
                                                      int64_t            ldc,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmBatched_64(hipblasHandle_t     handle,
                                                      hipblasOperation_t  transA,
                                                      hipblasOperation_t  transB,
                                                      int64_t             m,
                                                      int64_t             n,
                                                      int64_t             k,
                                                      const double*       alpha,
                                                      const double* const AP[],
                                                      int64_t             lda,
                                                      const double* const BP[],
                                                      int64_t             ldb,
                                                      const double*       beta,
                                                      double* const       CP[],
                                                      int64_t             ldc,
                                                      int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmBatched_64(hipblasHandle_t         handle,
                                                      hipblasOperation_t      transA,
                                                      hipblasOperation_t      transB,
                                                      int64_t                 m,
                                                      int64_t                 n,
                                                      int64_t                 k,
                                                      const hipComplex*       alpha,
                                                      const hipComplex* const AP[],
                                                      int64_t                 lda,
                                                      const hipComplex* const BP[],
                                                      int64_t                 ldb,
                                                      const hipComplex*       beta,
                                                      hipComplex* const       CP[],
                                                      int64_t                 ldc,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmBatched_64(hipblasHandle_t               handle,
                                                      hipblasOperation_t            transA,
                                                      hipblasOperation_t            transB,
                                                      int64_t                       m,
                                                      int64_t                       n,
                                                      int64_t                       k,
                                                      const hipDoubleComplex*       alpha,
                                                      const hipDoubleComplex* const AP[],
                                                      int64_t                       lda,
                                                      const hipDoubleComplex* const BP[],
                                                      int64_t                       ldb,
                                                      const hipDoubleComplex*       beta,
                                                      hipDoubleComplex* const       CP[],
                                                      int64_t                       ldc,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details
    The gemmStridedBatched functions perform one of the strided batched matrix-matrix operations:

        C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batchCount

    where ``op( X )`` is one of:

        op( X ) = X      or
        op( X ) = X**T   or
        op( X ) = X**H,

    ``alpha`` and ``beta`` are scalars, and ``A``, ``B``, and ``C`` are strided batched matrices, with
    ``op( A )`` an ``m`` by ``k`` by ``batchCount`` strided_batched matrix,
    ``op( B )`` a ``k`` by ``n`` by ``batchCount`` strided_batched matrix, and
    ``C`` an ``m`` by ``n`` by ``batchCount`` strided_batched matrix.

    - Supported precisions in rocBLAS : ``h``, ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``h``, ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.
    @param[in]
    transA    [hipblasOperation_t]
              specifies the form of op( A ).
    @param[in]
    transB    [hipblasOperation_t]
              specifies the form of op( B ).
    @param[in]
    m         [int]
              matrix dimension m.
    @param[in]
    n         [int]
              matrix dimension n.
    @param[in]
    k         [int]
              matrix dimension k.
    @param[in]
    alpha     device pointer or host pointer specifying the scalar alpha.
    @param[in]
    AP         device pointer pointing to the first matrix A_1.
    @param[in]
    lda       [int]
              specifies the leading dimension of each A_i.
    @param[in]
    strideA  [hipblasStride]
              stride from the start of one A_i matrix to the next A_(i + 1).
    @param[in]
    BP         device pointer pointing to the first matrix B_1.
    @param[in]
    ldb       [int]
              specifies the leading dimension of each B_i.
    @param[in]
    strideB  [hipblasStride]
              stride from the start of one B_i matrix to the next B_(i + 1).
    @param[in]
    beta      device pointer or host pointer specifying the scalar beta.
    @param[in, out]
    CP         device pointer pointing to the first matrix C_1.
    @param[in]
    ldc       [int]
              specifies the leading dimension of each C_i.
    @param[in]
    strideC  [hipblasStride]
              stride from the start of one C_i matrix to the next C_(i + 1).
    @param[in]
    batchCount
              [int]
              number of gemm operatons in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasHgemmStridedBatched(hipblasHandle_t    handle,
                                                          hipblasOperation_t transA,
                                                          hipblasOperation_t transB,
                                                          int                m,
                                                          int                n,
                                                          int                k,
                                                          const hipblasHalf* alpha,
                                                          const hipblasHalf* AP,
                                                          int                lda,
                                                          long long          strideA,
                                                          const hipblasHalf* BP,
                                                          int                ldb,
                                                          long long          strideB,
                                                          const hipblasHalf* beta,
                                                          hipblasHalf*       CP,
                                                          int                ldc,
                                                          long long          strideC,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmStridedBatched(hipblasHandle_t    handle,
                                                          hipblasOperation_t transA,
                                                          hipblasOperation_t transB,
                                                          int                m,
                                                          int                n,
                                                          int                k,
                                                          const float*       alpha,
                                                          const float*       AP,
                                                          int                lda,
                                                          long long          strideA,
                                                          const float*       BP,
                                                          int                ldb,
                                                          long long          strideB,
                                                          const float*       beta,
                                                          float*             CP,
                                                          int                ldc,
                                                          long long          strideC,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmStridedBatched(hipblasHandle_t    handle,
                                                          hipblasOperation_t transA,
                                                          hipblasOperation_t transB,
                                                          int                m,
                                                          int                n,
                                                          int                k,
                                                          const double*      alpha,
                                                          const double*      AP,
                                                          int                lda,
                                                          long long          strideA,
                                                          const double*      BP,
                                                          int                ldb,
                                                          long long          strideB,
                                                          const double*      beta,
                                                          double*            CP,
                                                          int                ldc,
                                                          long long          strideC,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmStridedBatched(hipblasHandle_t    handle,
                                                          hipblasOperation_t transA,
                                                          hipblasOperation_t transB,
                                                          int                m,
                                                          int                n,
                                                          int                k,
                                                          const hipComplex*  alpha,
                                                          const hipComplex*  AP,
                                                          int                lda,
                                                          long long          strideA,
                                                          const hipComplex*  BP,
                                                          int                ldb,
                                                          long long          strideB,
                                                          const hipComplex*  beta,
                                                          hipComplex*        CP,
                                                          int                ldc,
                                                          long long          strideC,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmStridedBatched(hipblasHandle_t         handle,
                                                          hipblasOperation_t      transA,
                                                          hipblasOperation_t      transB,
                                                          int                     m,
                                                          int                     n,
                                                          int                     k,
                                                          const hipDoubleComplex* alpha,
                                                          const hipDoubleComplex* AP,
                                                          int                     lda,
                                                          long long               strideA,
                                                          const hipDoubleComplex* BP,
                                                          int                     ldb,
                                                          long long               strideB,
                                                          const hipDoubleComplex* beta,
                                                          hipDoubleComplex*       CP,
                                                          int                     ldc,
                                                          long long               strideC,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasHgemmStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasOperation_t transA,
                                                             hipblasOperation_t transB,
                                                             int64_t            m,
                                                             int64_t            n,
                                                             int64_t            k,
                                                             const hipblasHalf* alpha,
                                                             const hipblasHalf* AP,
                                                             int64_t            lda,
                                                             long long          strideA,
                                                             const hipblasHalf* BP,
                                                             int64_t            ldb,
                                                             long long          strideB,
                                                             const hipblasHalf* beta,
                                                             hipblasHalf*       CP,
                                                             int64_t            ldc,
                                                             long long          strideC,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasSgemmStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasOperation_t transA,
                                                             hipblasOperation_t transB,
                                                             int64_t            m,
                                                             int64_t            n,
                                                             int64_t            k,
                                                             const float*       alpha,
                                                             const float*       AP,
                                                             int64_t            lda,
                                                             long long          strideA,
                                                             const float*       BP,
                                                             int64_t            ldb,
                                                             long long          strideB,
                                                             const float*       beta,
                                                             float*             CP,
                                                             int64_t            ldc,
                                                             long long          strideC,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDgemmStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasOperation_t transA,
                                                             hipblasOperation_t transB,
                                                             int64_t            m,
                                                             int64_t            n,
                                                             int64_t            k,
                                                             const double*      alpha,
                                                             const double*      AP,
                                                             int64_t            lda,
                                                             long long          strideA,
                                                             const double*      BP,
                                                             int64_t            ldb,
                                                             long long          strideB,
                                                             const double*      beta,
                                                             double*            CP,
                                                             int64_t            ldc,
                                                             long long          strideC,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCgemmStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasOperation_t transA,
                                                             hipblasOperation_t transB,
                                                             int64_t            m,
                                                             int64_t            n,
                                                             int64_t            k,
                                                             const hipComplex*  alpha,
                                                             const hipComplex*  AP,
                                                             int64_t            lda,
                                                             long long          strideA,
                                                             const hipComplex*  BP,
                                                             int64_t            ldb,
                                                             long long          strideB,
                                                             const hipComplex*  beta,
                                                             hipComplex*        CP,
                                                             int64_t            ldc,
                                                             long long          strideC,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZgemmStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasOperation_t      transA,
                                                             hipblasOperation_t      transB,
                                                             int64_t                 m,
                                                             int64_t                 n,
                                                             int64_t                 k,
                                                             const hipDoubleComplex* alpha,
                                                             const hipDoubleComplex* AP,
                                                             int64_t                 lda,
                                                             long long               strideA,
                                                             const hipDoubleComplex* BP,
                                                             int64_t                 ldb,
                                                             long long               strideB,
                                                             const hipDoubleComplex* beta,
                                                             hipDoubleComplex*       CP,
                                                             int64_t                 ldc,
                                                             long long               strideC,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details

    The herk functions perform one of the matrix-matrix operations for a Hermitian rank-k update:

        C := alpha*op( A )*op( A )^H + beta*C

    where ``alpha`` and ``beta`` are scalars, ``op(A)`` is an ``n`` by ``k`` matrix, and
    ``C`` is an ``n`` x ``n`` Hermitian matrix stored as either upper or lower.

        op( A ) = A,  and A is n by k if transA == HIPBLAS_OP_N
        op( A ) = A^H and A is k by n if transA == HIPBLAS_OP_C

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : ``c`` and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  C is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  C is a  lower triangular matrix.

    @param[in]
    transA  [hipblasOperation_t]
            - HIPBLAS_OP_C:  op(A) = A^H
            - HIPBLAS_ON_N:  op(A) = A

    @param[in]
    n       [int]
            n specifies the number of rows and columns of C. n >= 0.

    @param[in]
    k       [int]
            k specifies the number of columns of op(A). k >= 0.

    @param[in]
    alpha
            alpha specifies the scalar alpha. When alpha is
            zero, then A is not referenced and A does not need to be set before
            entry.

    @param[in]
    AP       pointer storing matrix A on the GPU.
             Matrix dimension is ( lda, k ) when transA = HIPBLAS_OP_N. Otherwise, (lda, n).
             Only the upper/lower triangular part is accessed.

    @param[in]
    lda     [int]
            lda specifies the first dimension of A.
            If transA = HIPBLAS_OP_N,  lda >= max( 1, n ).
            Otherwise, lda >= max( 1, k ).

    @param[in]
    beta
            beta specifies the scalar beta. When beta is
            zero, then C does not need to be set before entry.

    @param[in]
    CP       pointer storing matrix C on the GPU.
            The imaginary component of the diagonal elements are not used but are set to zero, except for quick return.

    @param[in]
    ldc    [int]
           ldc specifies the first dimension of C. ldc >= max( 1, n ).

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCherk(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            int                n,
                                            int                k,
                                            const float*       alpha,
                                            const hipComplex*  AP,
                                            int                lda,
                                            const float*       beta,
                                            hipComplex*        CP,
                                            int                ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherk(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            hipblasOperation_t      transA,
                                            int                     n,
                                            int                     k,
                                            const double*           alpha,
                                            const hipDoubleComplex* AP,
                                            int                     lda,
                                            const double*           beta,
                                            hipDoubleComplex*       CP,
                                            int                     ldc);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCherk_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               int64_t            n,
                                               int64_t            k,
                                               const float*       alpha,
                                               const hipComplex*  AP,
                                               int64_t            lda,
                                               const float*       beta,
                                               hipComplex*        CP,
                                               int64_t            ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherk_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               hipblasOperation_t      transA,
                                               int64_t                 n,
                                               int64_t                 k,
                                               const double*           alpha,
                                               const hipDoubleComplex* AP,
                                               int64_t                 lda,
                                               const double*           beta,
                                               hipDoubleComplex*       CP,
                                               int64_t                 ldc);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details

    The herkBatched functions perform a batch of the matrix-matrix operations for a Hermitian rank-k update:

        C_i := alpha*op( A_i )*op( A_i )^H + beta*C_i

    where ``alpha`` and ``beta`` are scalars, ``op(A)`` is an ``n`` by ``k`` matrix, and
    ``C_i`` is an ``n`` x ``n`` Hermitian matrix stored as either upper or lower.

        op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N
        op( A_i ) = A_i^H and A_i is k by n if transA == HIPBLAS_OP_C

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  C_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  C_i is a  lower triangular matrix.

    @param[in]
    transA  [hipblasOperation_t]
            - HIPBLAS_OP_C: op(A) = A^H
            - HIPBLAS_OP_N: op(A) = A

    @param[in]
    n       [int]
            n specifies the number of rows and columns of C_i. n >= 0.

    @param[in]
    k       [int]
            k specifies the number of columns of op(A). k >= 0.

    @param[in]
    alpha
            alpha specifies the scalar alpha. When alpha is
            zero, then A is not referenced and A does not need to be set before
            entry.

    @param[in]
    AP       device array of device pointers storing each matrix_i A of dimension (lda, k)
             when transA is HIPBLAS_OP_N. Otherwise, of dimension (lda, n).

    @param[in]
    lda     [int]
            lda specifies the first dimension of A_i.
            If transA = HIPBLAS_OP_N,  lda >= max( 1, n ).
            Otherwise, lda >= max( 1, k ).

    @param[in]
    beta
            beta specifies the scalar beta. When beta is
            zero, then C does not need to be set before entry.

    @param[in]
    CP      device array of device pointers storing each matrix C_i on the GPU.
            The imaginary components of the diagonal elements are not used but are set to zero, except for quick return.

    @param[in]
    ldc    [int]
           ldc specifies the first dimension of C. ldc >= max( 1, n ).
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCherkBatched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   hipblasOperation_t      transA,
                                                   int                     n,
                                                   int                     k,
                                                   const float*            alpha,
                                                   const hipComplex* const AP[],
                                                   int                     lda,
                                                   const float*            beta,
                                                   hipComplex* const       CP[],
                                                   int                     ldc,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherkBatched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   hipblasOperation_t            transA,
                                                   int                           n,
                                                   int                           k,
                                                   const double*                 alpha,
                                                   const hipDoubleComplex* const AP[],
                                                   int                           lda,
                                                   const double*                 beta,
                                                   hipDoubleComplex* const       CP[],
                                                   int                           ldc,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCherkBatched_64(hipblasHandle_t         handle,
                                                      hipblasFillMode_t       uplo,
                                                      hipblasOperation_t      transA,
                                                      int64_t                 n,
                                                      int64_t                 k,
                                                      const float*            alpha,
                                                      const hipComplex* const AP[],
                                                      int64_t                 lda,
                                                      const float*            beta,
                                                      hipComplex* const       CP[],
                                                      int64_t                 ldc,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherkBatched_64(hipblasHandle_t               handle,
                                                      hipblasFillMode_t             uplo,
                                                      hipblasOperation_t            transA,
                                                      int64_t                       n,
                                                      int64_t                       k,
                                                      const double*                 alpha,
                                                      const hipDoubleComplex* const AP[],
                                                      int64_t                       lda,
                                                      const double*                 beta,
                                                      hipDoubleComplex* const       CP[],
                                                      int64_t                       ldc,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details

    The herkStridedBatched functions perform a batch of the matrix-matrix operations for a Hermitian rank-k update:

        C_i := alpha*op( A_i )*op( A_i )^H + beta*C_i

    where ``alpha`` and ``beta`` are scalars, ``op(A)`` is an ``n`` by ``k`` matrix, and
    ``C_i`` is an ``n`` by ``n`` Hermitian matrix stored as either upper or lower.

        op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N
        op( A_i ) = A_i^H and A_i is k by n if transA == HIPBLAS_OP_C

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  C_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  C_i is a  lower triangular matrix.

    @param[in]
    transA  [hipblasOperation_t]
            - HIPBLAS_OP_C: op(A) = A^H
            - HIPBLAS_OP_N: op(A) = A

    @param[in]
    n       [int]
            n specifies the number of rows and columns of C_i. n >= 0.

    @param[in]
    k       [int]
            k specifies the number of columns of op(A). k >= 0.

    @param[in]
    alpha
            alpha specifies the scalar alpha. When alpha is
            zero, then A is not referenced and A does not need to be set before
            entry.

    @param[in]
    AP      Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)
            when transA is HIPBLAS_OP_N. Otherwise, of dimension (lda, n).

    @param[in]
    lda     [int]
            lda specifies the first dimension of A_i.
            If transA = HIPBLAS_OP_N,  lda >= max( 1, n ).
            Otherwise, lda >= max( 1, k ).

    @param[in]
    strideA  [hipblasStride]
              stride from the start of one matrix (A_i) to the next one (A_i+1).

    @param[in]
    beta
            beta specifies the scalar beta. When beta is
            zero, then C does not need to be set before entry.

    @param[in]
    CP      Device pointer to the first matrix C_1 on the GPU.
            The imaginary components of the diagonal elements are not used but are set to zero, except for quick return.

    @param[in]
    ldc    [int]
           ldc specifies the first dimension of C. ldc >= max( 1, n ).

    @param[inout]
    strideC  [hipblasStride]
              stride from the start of one matrix (C_i) to the next one (C_i+1).

    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCherkStridedBatched(hipblasHandle_t    handle,
                                                          hipblasFillMode_t  uplo,
                                                          hipblasOperation_t transA,
                                                          int                n,
                                                          int                k,
                                                          const float*       alpha,
                                                          const hipComplex*  AP,
                                                          int                lda,
                                                          hipblasStride      strideA,
                                                          const float*       beta,
                                                          hipComplex*        CP,
                                                          int                ldc,
                                                          hipblasStride      strideC,
                                                          int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherkStridedBatched(hipblasHandle_t         handle,
                                                          hipblasFillMode_t       uplo,
                                                          hipblasOperation_t      transA,
                                                          int                     n,
                                                          int                     k,
                                                          const double*           alpha,
                                                          const hipDoubleComplex* AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          const double*           beta,
                                                          hipDoubleComplex*       CP,
                                                          int                     ldc,
                                                          hipblasStride           strideC,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCherkStridedBatched_64(hipblasHandle_t    handle,
                                                             hipblasFillMode_t  uplo,
                                                             hipblasOperation_t transA,
                                                             int64_t            n,
                                                             int64_t            k,
                                                             const float*       alpha,
                                                             const hipComplex*  AP,
                                                             int64_t            lda,
                                                             hipblasStride      strideA,
                                                             const float*       beta,
                                                             hipComplex*        CP,
                                                             int64_t            ldc,
                                                             hipblasStride      strideC,
                                                             int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherkStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasFillMode_t       uplo,
                                                             hipblasOperation_t      transA,
                                                             int64_t                 n,
                                                             int64_t                 k,
                                                             const double*           alpha,
                                                             const hipDoubleComplex* AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             const double*           beta,
                                                             hipDoubleComplex*       CP,
                                                             int64_t                 ldc,
                                                             hipblasStride           strideC,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details

    The herkx functions perform one of the matrix-matrix operations for a Hermitian rank-k update:

        C := alpha*op( A )*op( B )^H + beta*C

    where ``alpha`` and ``beta`` are scalars, ``op(A)`` and ``op(B)`` are ``n`` by ``k`` matrices, and
    ``C`` is an ``n`` by ``n`` Hermitian matrix stored as either upper or lower.
    This routine should only be used when the caller can guarantee that the result of ``op( A )*op( B )^T`` will be Hermitian.

        op( A ) = A, op( B ) = B, and A and B are n by k if trans == HIPBLAS_OP_N
        op( A ) = A^H, op( B ) = B^H,  and A and B are k by n if trans == HIPBLAS_OP_C

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : ``c`` and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  C is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  C is a lower triangular matrix.

    @param[in]
    transA  [hipblasOperation_t]
            - HIPBLAS_OP_C:  op( A ) = A^H, op( B ) = B^H
            - HIPBLAS_OP_N:  op( A ) = A, op( B ) = B

    @param[in]
    n       [int]
            n specifies the number of rows and columns of C. n >= 0.

    @param[in]
    k       [int]
            k specifies the number of columns of op(A). k >= 0.

    @param[in]
    alpha
            alpha specifies the scalar alpha. When alpha is
            zero, then A is not referenced and does not need to be set before
            entry.

    @param[in]
    AP      pointer storing matrix A on the GPU.
            Matrix dimension is ( lda, k ) when trans = HIPBLAS_OP_N. Otherwise, (lda, n).
            Only the upper/lower triangular part is accessed.

    @param[in]
    lda     [int]
            lda specifies the first dimension of A.
            if trans = HIPBLAS_OP_N,  lda >= max( 1, n ).
            Otherwise, lda >= max( 1, k ).
    @param[in]
    BP      pointer storing matrix B on the GPU.
            Matrix dimension is ( ldb, k ) when trans = HIPBLAS_OP_N. Otherwise, (ldb, n).
            Only the upper/lower triangular part is accessed.

    @param[in]
    ldb     [int]
            ldb specifies the first dimension of B.
            If trans = HIPBLAS_OP_N,  ldb >= max( 1, n ).
            Otherwise, ldb >= max( 1, k ).
    @param[in]
    beta
            beta specifies the scalar beta. When beta is
            zero, then C does not need to be set before entry.

    @param[in]
    CP       pointer storing matrix C on the GPU.
            The imaginary components of the diagonal elements are not used but are set to zero, except for quick return.

    @param[in]
    ldc    [int]
           ldc specifies the first dimension of C. ldc >= max( 1, n ).

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCherkx(hipblasHandle_t    handle,
                                             hipblasFillMode_t  uplo,
                                             hipblasOperation_t transA,
                                             int                n,
                                             int                k,
                                             const hipComplex*  alpha,
                                             const hipComplex*  AP,
                                             int                lda,
                                             const hipComplex*  BP,
                                             int                ldb,
                                             const float*       beta,
                                             hipComplex*        CP,
                                             int                ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherkx(hipblasHandle_t         handle,
                                             hipblasFillMode_t       uplo,
                                             hipblasOperation_t      transA,
                                             int                     n,
                                             int                     k,
                                             const hipDoubleComplex* alpha,
                                             const hipDoubleComplex* AP,
                                             int                     lda,
                                             const hipDoubleComplex* BP,
                                             int                     ldb,
                                             const double*           beta,
                                             hipDoubleComplex*       CP,
                                             int                     ldc);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCherkx_64(hipblasHandle_t    handle,
                                                hipblasFillMode_t  uplo,
                                                hipblasOperation_t transA,
                                                int64_t            n,
                                                int64_t            k,
                                                const hipComplex*  alpha,
                                                const hipComplex*  AP,
                                                int64_t            lda,
                                                const hipComplex*  BP,
                                                int64_t            ldb,
                                                const float*       beta,
                                                hipComplex*        CP,
                                                int64_t            ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherkx_64(hipblasHandle_t         handle,
                                                hipblasFillMode_t       uplo,
                                                hipblasOperation_t      transA,
                                                int64_t                 n,
                                                int64_t                 k,
                                                const hipDoubleComplex* alpha,
                                                const hipDoubleComplex* AP,
                                                int64_t                 lda,
                                                const hipDoubleComplex* BP,
                                                int64_t                 ldb,
                                                const double*           beta,
                                                hipDoubleComplex*       CP,
                                                int64_t                 ldc);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details

    The herkxBatched functions perform a batch of the matrix-matrix operations for a Hermitian rank-k update:

        C_i := alpha*op( A_i )*op( B_i )^H + beta*C_i

    where ``alpha`` and ``beta`` are scalars, ``op(A_i)`` and ``op(B_i)`` are ``n`` by ``k`` matrices, and
    ``C_i`` is an ``n`` by ``n`` Hermitian matrix stored as either upper or lower.
    This routine should only be used when the caller can guarantee that the result of ``op( A )*op( B )^T`` will be Hermitian.

        op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N
        op( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == HIPBLAS_OP_C

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  C_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  C_i is a lower triangular matrix.

    @param[in]
    transA  [hipblasOperation_t]
            - HIPBLAS_OP_C: op(A) = A^H
            - HIPBLAS_OP_N: op(A) = A

    @param[in]
    n       [int]
            n specifies the number of rows and columns of C_i. n >= 0.

    @param[in]
    k       [int]
            k specifies the number of columns of op(A). k >= 0.

    @param[in]
    alpha
            alpha specifies the scalar alpha. When alpha is
            zero, then A is not referenced and does not need to be set before
            entry.

    @param[in]
    AP      device array of device pointers storing each matrix_i A of dimension (lda, k)
            when trans is HIPBLAS_OP_N. Otherwise, of dimension (lda, n).

    @param[in]
    lda     [int]
            lda specifies the first dimension of A_i.
            If trans = HIPBLAS_OP_N,  lda >= max( 1, n ).
            Otherwise, lda >= max( 1, k ).

    @param[in]
    BP      device array of device pointers storing each matrix_i B of dimension (ldb, k)
            when trans is HIPBLAS_OP_N. Otherwise, of dimension (ldb, n).

    @param[in]
    ldb     [int]
            ldb specifies the first dimension of B_i.
            If trans = HIPBLAS_OP_N,  ldb >= max( 1, n ).
            Otherwise, ldb >= max( 1, k ).

    @param[in]
    beta
            beta specifies the scalar beta. When beta is
            zero, then C does not need to be set before entry.

    @param[in]
    CP      device array of device pointers storing each matrix C_i on the GPU.
            The imaginary components of the diagonal elements are not used but are set to zero, except for quick return.

    @param[in]
    ldc    [int]
           ldc specifies the first dimension of C. ldc >= max( 1, n ).

    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCherkxBatched(hipblasHandle_t         handle,
                                                    hipblasFillMode_t       uplo,
                                                    hipblasOperation_t      transA,
                                                    int                     n,
                                                    int                     k,
                                                    const hipComplex*       alpha,
                                                    const hipComplex* const AP[],
                                                    int                     lda,
                                                    const hipComplex* const BP[],
                                                    int                     ldb,
                                                    const float*            beta,
                                                    hipComplex* const       CP[],
                                                    int                     ldc,
                                                    int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherkxBatched(hipblasHandle_t               handle,
                                                    hipblasFillMode_t             uplo,
                                                    hipblasOperation_t            transA,
                                                    int                           n,
                                                    int                           k,
                                                    const hipDoubleComplex*       alpha,
                                                    const hipDoubleComplex* const AP[],
                                                    int                           lda,
                                                    const hipDoubleComplex* const BP[],
                                                    int                           ldb,
                                                    const double*                 beta,
                                                    hipDoubleComplex* const       CP[],
                                                    int                           ldc,
                                                    int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCherkxBatched_64(hipblasHandle_t         handle,
                                                       hipblasFillMode_t       uplo,
                                                       hipblasOperation_t      transA,
                                                       int64_t                 n,
                                                       int64_t                 k,
                                                       const hipComplex*       alpha,
                                                       const hipComplex* const AP[],
                                                       int64_t                 lda,
                                                       const hipComplex* const BP[],
                                                       int64_t                 ldb,
                                                       const float*            beta,
                                                       hipComplex* const       CP[],
                                                       int64_t                 ldc,
                                                       int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherkxBatched_64(hipblasHandle_t               handle,
                                                       hipblasFillMode_t             uplo,
                                                       hipblasOperation_t            transA,
                                                       int64_t                       n,
                                                       int64_t                       k,
                                                       const hipDoubleComplex*       alpha,
                                                       const hipDoubleComplex* const AP[],
                                                       int64_t                       lda,
                                                       const hipDoubleComplex* const BP[],
                                                       int64_t                       ldb,
                                                       const double*                 beta,
                                                       hipDoubleComplex* const       CP[],
                                                       int64_t                       ldc,
                                                       int64_t                       batchCount);
//! @}

/*! @{
    \brief  <b> BLAS Level 3 API </b>

    \details

    The herkxStridedBatched functions perform a batch of the matrix-matrix operations for a Hermitian rank-k update:

        C_i := alpha*op( A_i )*op( B_i )^H + beta*C_i

    where ``alpha`` and ``beta`` are scalars, ``op(A_i)`` and ``op(B_i)`` are ``n`` by ``k`` matrices, and
    ``C_i`` is an ``n`` by ``n`` Hermitian matrix stored as either upper or lower.
    This routine should only be used when the caller can guarantee that the result of ``op( A )*op( B )^T`` will be Hermitian.

        op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N
        op( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == HIPBLAS_OP_C

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  C_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  C_i is a lower triangular matrix.

    @param[in]
    transA  [hipblasOperation_t]
            - HIPBLAS_OP_C: op( A_i ) = A_i^H, op( B_i ) = B_i^H
            - HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i

    @param[in]
    n       [int]
            n specifies the number of rows and columns of C_i. n >= 0.

    @param[in]
    k       [int]
            k specifies the number of columns of op(A). k >= 0.

    @param[in]
    alpha
            alpha specifies the scalar alpha. When alpha is
            zero, then A is not referenced and does not need to be set before
            entry.

    @param[in]
    AP       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)
            when trans is HIPBLAS_OP_N. Otherwise, of dimension (lda, n).

    @param[in]
    lda     [int]
            lda specifies the first dimension of A_i.
            If trans = HIPBLAS_OP_N,  lda >= max( 1, n ).
            Otherwise, lda >= max( 1, k ).

    @param[in]
    strideA  [hipblasStride]
              stride from the start of one matrix (A_i) to the next one (A_i+1).

    @param[in]
    BP      Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)
            when trans is HIPBLAS_OP_N. Otherwise, of dimension (ldb, n).

    @param[in]
    ldb     [int]
            ldb specifies the first dimension of B_i.
            If trans = HIPBLAS_OP_N,  ldb >= max( 1, n ).
            Otherwise, ldb >= max( 1, k ).

    @param[in]
    strideB  [hipblasStride]
              stride from the start of one matrix (B_i) to the next one (B_i+1).

    @param[in]
    beta
            beta specifies the scalar beta. When beta is
            zero, then C does not need to be set before entry.

    @param[in]
    CP      Device pointer to the first matrix C_1 on the GPU.
            The imaginary components of the diagonal elements are not used but are set to zero, except for quick return.

    @param[in]
    ldc    [int]
           ldc specifies the first dimension of C. ldc >= max( 1, n ).

    @param[inout]
    strideC  [hipblasStride]
              stride from the start of one matrix (C_i) to the next one (C_i+1).

    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCherkxStridedBatched(hipblasHandle_t    handle,
                                                           hipblasFillMode_t  uplo,
                                                           hipblasOperation_t transA,
                                                           int                n,
                                                           int                k,
                                                           const hipComplex*  alpha,
                                                           const hipComplex*  AP,
                                                           int                lda,
                                                           hipblasStride      strideA,
                                                           const hipComplex*  BP,
                                                           int                ldb,
                                                           hipblasStride      strideB,
                                                           const float*       beta,
                                                           hipComplex*        CP,
                                                           int                ldc,
                                                           hipblasStride      strideC,
                                                           int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherkxStridedBatched(hipblasHandle_t         handle,
                                                           hipblasFillMode_t       uplo,
                                                           hipblasOperation_t      transA,
                                                           int                     n,
                                                           int                     k,
                                                           const hipDoubleComplex* alpha,
                                                           const hipDoubleComplex* AP,
                                                           int                     lda,
                                                           hipblasStride           strideA,
                                                           const hipDoubleComplex* BP,
                                                           int                     ldb,
                                                           hipblasStride           strideB,
                                                           const double*           beta,
                                                           hipDoubleComplex*       CP,
                                                           int                     ldc,
                                                           hipblasStride           strideC,
                                                           int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCherkxStridedBatched_64(hipblasHandle_t    handle,
                                                              hipblasFillMode_t  uplo,
                                                              hipblasOperation_t transA,
                                                              int64_t            n,
                                                              int64_t            k,
                                                              const hipComplex*  alpha,
                                                              const hipComplex*  AP,
                                                              int64_t            lda,
                                                              hipblasStride      strideA,
                                                              const hipComplex*  BP,
                                                              int64_t            ldb,
                                                              hipblasStride      strideB,
                                                              const float*       beta,
                                                              hipComplex*        CP,
                                                              int64_t            ldc,
                                                              hipblasStride      strideC,
                                                              int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZherkxStridedBatched_64(hipblasHandle_t         handle,
                                                              hipblasFillMode_t       uplo,
                                                              hipblasOperation_t      transA,
                                                              int64_t                 n,
                                                              int64_t                 k,
                                                              const hipDoubleComplex* alpha,
                                                              const hipDoubleComplex* AP,
                                                              int64_t                 lda,
                                                              hipblasStride           strideA,
                                                              const hipDoubleComplex* BP,
                                                              int64_t                 ldb,
                                                              hipblasStride           strideB,
                                                              const double*           beta,
                                                              hipDoubleComplex*       CP,
                                                              int64_t                 ldc,
                                                              hipblasStride           strideC,
                                                              int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details

    The her2k functions perform one of the matrix-matrix operations for a Hermitian rank-2k update:

        C := alpha*op( A )*op( B )^H + conj(alpha)*op( B )*op( A )^H + beta*C

    where ``alpha`` and ``beta`` are scalars, ``op(A)`` and ``op(B)`` are ``n`` by ``k`` matrices, and
    ``C`` is an ``n`` by ``n`` Hermitian matrix stored as either upper or lower.

        op( A ) = A, op( B ) = B, and A and B are n by k if trans == HIPBLAS_OP_N
        op( A ) = A^H, op( B ) = B^H,  and A and B are k by n if trans == HIPBLAS_OP_C

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : ``c`` and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  C is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  C is a lower triangular matrix.

    @param[in]
    transA  [hipblasOperation_t]
            - HIPBLAS_OP_C:  op( A ) = A^H, op( B ) = B^H
            - HIPBLAS_OP_N:  op( A ) = A, op( B ) = B

    @param[in]
    n       [int]
            n specifies the number of rows and columns of C. n >= 0.

    @param[in]
    k       [int]
            k specifies the number of columns of op(A). k >= 0.

    @param[in]
    alpha
            alpha specifies the scalar alpha. When alpha is
            zero, then A is not referenced and does not need to be set before
            entry.

    @param[in]
    AP       pointer storing matrix A on the GPU.
            Matrix dimension is ( lda, k ) when trans = HIPBLAS_OP_N. Otherwise, (lda, n).
            Only the upper/lower triangular part is accessed.

    @param[in]
    lda     [int]
            lda specifies the first dimension of A.
            If trans = HIPBLAS_OP_N,  lda >= max( 1, n ).
            Otherwise, lda >= max( 1, k ).
    @param[in]
    BP      pointer storing matrix B on the GPU.
            Matrix dimension is ( ldb, k ) when trans = HIPBLAS_OP_N. Otherwise, (ldb, n).
            Only the upper/lower triangular part is accessed.

    @param[in]
    ldb     [int]
            ldb specifies the first dimension of B.
            If trans = HIPBLAS_OP_N,  ldb >= max( 1, n ).
            Otherwise, ldb >= max( 1, k ).
    @param[in]
    beta
            beta specifies the scalar beta. When beta is
            zero, then C does not need to be set before entry.

    @param[in]
    CP      pointer storing matrix C on the GPU.
            The imaginary components of the diagonal elements are not used but are set to zero, except for quick return.

    @param[in]
    ldc    [int]
           ldc specifies the first dimension of C. ldc >= max( 1, n ).

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCher2k(hipblasHandle_t    handle,
                                             hipblasFillMode_t  uplo,
                                             hipblasOperation_t transA,
                                             int                n,
                                             int                k,
                                             const hipComplex*  alpha,
                                             const hipComplex*  AP,
                                             int                lda,
                                             const hipComplex*  BP,
                                             int                ldb,
                                             const float*       beta,
                                             hipComplex*        CP,
                                             int                ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasZher2k(hipblasHandle_t         handle,
                                             hipblasFillMode_t       uplo,
                                             hipblasOperation_t      transA,
                                             int                     n,
                                             int                     k,
                                             const hipDoubleComplex* alpha,
                                             const hipDoubleComplex* AP,
                                             int                     lda,
                                             const hipDoubleComplex* BP,
                                             int                     ldb,
                                             const double*           beta,
                                             hipDoubleComplex*       CP,
                                             int                     ldc);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCher2k_64(hipblasHandle_t    handle,
                                                hipblasFillMode_t  uplo,
                                                hipblasOperation_t transA,
                                                int64_t            n,
                                                int64_t            k,
                                                const hipComplex*  alpha,
                                                const hipComplex*  AP,
                                                int64_t            lda,
                                                const hipComplex*  BP,
                                                int64_t            ldb,
                                                const float*       beta,
                                                hipComplex*        CP,
                                                int64_t            ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasZher2k_64(hipblasHandle_t         handle,
                                                hipblasFillMode_t       uplo,
                                                hipblasOperation_t      transA,
                                                int64_t                 n,
                                                int64_t                 k,
                                                const hipDoubleComplex* alpha,
                                                const hipDoubleComplex* AP,
                                                int64_t                 lda,
                                                const hipDoubleComplex* BP,
                                                int64_t                 ldb,
                                                const double*           beta,
                                                hipDoubleComplex*       CP,
                                                int64_t                 ldc);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details

    The her2kBatched functions perform a batch of the matrix-matrix operations for a Hermitian rank-2k update:

        C_i := alpha*op( A_i )*op( B_i )^H + conj(alpha)*op( B_i )*op( A_i )^H + beta*C_i

    where ``alpha`` and ``beta`` are scalars, ``op(A_i)`` and ``op(B_i)`` are ``n`` by ``k`` matrices, and
    ``C_i`` is an ``n`` by ``n`` Hermitian matrix stored as either upper or lower.

        op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N
        op( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == HIPBLAS_OP_C

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  C_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  C_i is a lower triangular matrix.

    @param[in]
    transA  [hipblasOperation_t]
            - HIPBLAS_OP_C: op(A) = A^H
            - HIPBLAS_OP_N: op(A) = A

    @param[in]
    n       [int]
            n specifies the number of rows and columns of C_i. n >= 0.

    @param[in]
    k       [int]
            k specifies the number of columns of op(A). k >= 0.

    @param[in]
    alpha
            alpha specifies the scalar alpha. When alpha is
            zero, then A is not referenced and does not need to be set before
            entry.

    @param[in]
    AP      device array of device pointers storing each matrix_i A of dimension (lda, k)
            when trans is HIPBLAS_OP_N. Otherwise, of dimension (lda, n).

    @param[in]
    lda     [int]
            lda specifies the first dimension of A_i.
            If trans = HIPBLAS_OP_N,  lda >= max( 1, n ).
            Otherwise, lda >= max( 1, k ).
    @param[in]
    BP      device array of device pointers storing each matrix_i B of dimension (ldb, k)
            when trans is HIPBLAS_OP_N. Otherwise, of dimension (ldb, n).

    @param[in]
    ldb     [int]
            ldb specifies the first dimension of B_i.
            If trans = HIPBLAS_OP_N,  ldb >= max( 1, n ).
            Otherwise, ldb >= max( 1, k ).
    @param[in]
    beta
            beta specifies the scalar beta. When beta is
            zero, then C does not need to be set before entry.

    @param[in]
    CP      device array of device pointers storing each matrix C_i on the GPU.
            The imaginary components of the diagonal elements are not used but are set to zero, except for quick return.

    @param[in]
    ldc    [int]
           ldc specifies the first dimension of C. ldc >= max( 1, n ).
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCher2kBatched(hipblasHandle_t         handle,
                                                    hipblasFillMode_t       uplo,
                                                    hipblasOperation_t      transA,
                                                    int                     n,
                                                    int                     k,
                                                    const hipComplex*       alpha,
                                                    const hipComplex* const AP[],
                                                    int                     lda,
                                                    const hipComplex* const BP[],
                                                    int                     ldb,
                                                    const float*            beta,
                                                    hipComplex* const       CP[],
                                                    int                     ldc,
                                                    int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZher2kBatched(hipblasHandle_t               handle,
                                                    hipblasFillMode_t             uplo,
                                                    hipblasOperation_t            transA,
                                                    int                           n,
                                                    int                           k,
                                                    const hipDoubleComplex*       alpha,
                                                    const hipDoubleComplex* const AP[],
                                                    int                           lda,
                                                    const hipDoubleComplex* const BP[],
                                                    int                           ldb,
                                                    const double*                 beta,
                                                    hipDoubleComplex* const       CP[],
                                                    int                           ldc,
                                                    int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCher2kBatched_64(hipblasHandle_t         handle,
                                                       hipblasFillMode_t       uplo,
                                                       hipblasOperation_t      transA,
                                                       int64_t                 n,
                                                       int64_t                 k,
                                                       const hipComplex*       alpha,
                                                       const hipComplex* const AP[],
                                                       int64_t                 lda,
                                                       const hipComplex* const BP[],
                                                       int64_t                 ldb,
                                                       const float*            beta,
                                                       hipComplex* const       CP[],
                                                       int64_t                 ldc,
                                                       int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZher2kBatched_64(hipblasHandle_t               handle,
                                                       hipblasFillMode_t             uplo,
                                                       hipblasOperation_t            transA,
                                                       int64_t                       n,
                                                       int64_t                       k,
                                                       const hipDoubleComplex*       alpha,
                                                       const hipDoubleComplex* const AP[],
                                                       int64_t                       lda,
                                                       const hipDoubleComplex* const BP[],
                                                       int64_t                       ldb,
                                                       const double*                 beta,
                                                       hipDoubleComplex* const       CP[],
                                                       int64_t                       ldc,
                                                       int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details

    The her2kStridedBatched functions perform a batch of the matrix-matrix operations for a Hermitian rank-2k update:

        C_i := alpha*op( A_i )*op( B_i )^H + conj(alpha)*op( B_i )*op( A_i )^H + beta*C_i

    where ``alpha`` and ``beta`` are scalars, ``op(A_i)`` and ``op(B_i)`` are ``n`` by ``k`` matrices, and
    ``C_i`` is an ``n`` by ``n`` Hermitian matrix stored as either upper or lower.

        op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == HIPBLAS_OP_N
        op( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == HIPBLAS_OP_C

    - Supported precisions in rocBLAS : ``c`` and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  C_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  C_i is a lower triangular matrix.

    @param[in]
    transA  [hipblasOperation_t]
            - HIPBLAS_OP_C: op( A_i ) = A_i^H, op( B_i ) = B_i^H
            - HIPBLAS_OP_N: op( A_i ) = A_i, op( B_i ) = B_i

    @param[in]
    n       [int]
            n specifies the number of rows and columns of C_i. n >= 0.

    @param[in]
    k       [int]
            k specifies the number of columns of op(A). k >= 0.

    @param[in]
    alpha
            alpha specifies the scalar alpha. When alpha is
            zero, then A is not referenced and does not need to be set before
            entry.

    @param[in]
    AP      Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)
            when trans is HIPBLAS_OP_N. Otherwise, of dimension (lda, n).

    @param[in]
    lda     [int]
            lda specifies the first dimension of A_i.
            if trans = HIPBLAS_OP_N,  lda >= max( 1, n ).
            Otherwise, lda >= max( 1, k ).

    @param[in]
    strideA  [hipblasStride]
              stride from the start of one matrix (A_i) to the next one (A_i+1).

    @param[in]
    BP      Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)
            when trans is HIPBLAS_OP_N. Otherwise, of dimension (ldb, n).

    @param[in]
    ldb     [int]
            ldb specifies the first dimension of B_i.
            If trans = HIPBLAS_OP_N,  ldb >= max( 1, n ).
            Otherwise, ldb >= max( 1, k ).

    @param[in]
    strideB  [hipblasStride]
              stride from the start of one matrix (B_i) to the next one (B_i+1).

    @param[in]
    beta
            beta specifies the scalar beta. When beta is
            zero, then C does not need to be set before entry.

    @param[in]
    CP      Device pointer to the first matrix C_1 on the GPU.
            The imaginary components of the diagonal elements are not used but are set to zero, except for quick return.

    @param[in]
    ldc    [int]
           ldc specifies the first dimension of C. ldc >= max( 1, n ).

    @param[inout]
    strideC  [hipblasStride]
              stride from the start of one matrix (C_i) to the next one (C_i+1).

    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasCher2kStridedBatched(hipblasHandle_t    handle,
                                                           hipblasFillMode_t  uplo,
                                                           hipblasOperation_t transA,
                                                           int                n,
                                                           int                k,
                                                           const hipComplex*  alpha,
                                                           const hipComplex*  AP,
                                                           int                lda,
                                                           hipblasStride      strideA,
                                                           const hipComplex*  BP,
                                                           int                ldb,
                                                           hipblasStride      strideB,
                                                           const float*       beta,
                                                           hipComplex*        CP,
                                                           int                ldc,
                                                           hipblasStride      strideC,
                                                           int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZher2kStridedBatched(hipblasHandle_t         handle,
                                                           hipblasFillMode_t       uplo,
                                                           hipblasOperation_t      transA,
                                                           int                     n,
                                                           int                     k,
                                                           const hipDoubleComplex* alpha,
                                                           const hipDoubleComplex* AP,
                                                           int                     lda,
                                                           hipblasStride           strideA,
                                                           const hipDoubleComplex* BP,
                                                           int                     ldb,
                                                           hipblasStride           strideB,
                                                           const double*           beta,
                                                           hipDoubleComplex*       CP,
                                                           int                     ldc,
                                                           hipblasStride           strideC,
                                                           int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasCher2kStridedBatched_64(hipblasHandle_t    handle,
                                                              hipblasFillMode_t  uplo,
                                                              hipblasOperation_t transA,
                                                              int64_t            n,
                                                              int64_t            k,
                                                              const hipComplex*  alpha,
                                                              const hipComplex*  AP,
                                                              int64_t            lda,
                                                              hipblasStride      strideA,
                                                              const hipComplex*  BP,
                                                              int64_t            ldb,
                                                              hipblasStride      strideB,
                                                              const float*       beta,
                                                              hipComplex*        CP,
                                                              int64_t            ldc,
                                                              hipblasStride      strideC,
                                                              int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZher2kStridedBatched_64(hipblasHandle_t         handle,
                                                              hipblasFillMode_t       uplo,
                                                              hipblasOperation_t      transA,
                                                              int64_t                 n,
                                                              int64_t                 k,
                                                              const hipDoubleComplex* alpha,
                                                              const hipDoubleComplex* AP,
                                                              int64_t                 lda,
                                                              hipblasStride           strideA,
                                                              const hipDoubleComplex* BP,
                                                              int64_t                 ldb,
                                                              hipblasStride           strideB,
                                                              const double*           beta,
                                                              hipDoubleComplex*       CP,
                                                              int64_t                 ldc,
                                                              hipblasStride           strideC,
                                                              int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details

    The symm functions perform one of the matrix-matrix operations:

        C := alpha*A*B + beta*C if side == HIPBLAS_SIDE_LEFT,
        C := alpha*B*A + beta*C if side == HIPBLAS_SIDE_RIGHT,

    where ``alpha`` and ``beta`` are scalars, ``B`` and ``C`` are ``m`` by ``n`` matrices, and
    ``A`` is a symmetric matrix stored as either upper or lower.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    side  [hipblasSideMode_t]
            - HIPBLAS_SIDE_LEFT:      C := alpha*A*B + beta*C
            - HIPBLAS_SIDE_RIGHT:     C := alpha*B*A + beta*C

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A is a lower triangular matrix.

    @param[in]
    m       [int]
            m specifies the number of rows of B and C. m >= 0.

    @param[in]
    n       [int]
            n specifies the number of columns of B and C. n >= 0.

    @param[in]
    alpha
            alpha specifies the scalar alpha. When alpha is
            zero, then A and B are not referenced.

    @param[in]
    AP      pointer storing matrix A on the GPU.
            A is m by m if side == HIPBLAS_SIDE_LEFT.
            A is n by n if side == HIPBLAS_SIDE_RIGHT.
            Only the upper/lower triangular part is accessed.

    @param[in]
    lda     [int]
            lda specifies the first dimension of A.
            If side = HIPBLAS_SIDE_LEFT,  lda >= max( 1, m ).
            Otherwise, lda >= max( 1, n ).

    @param[in]
    BP      pointer storing matrix B on the GPU.
            Matrix dimension is m by n.

    @param[in]
    ldb     [int]
            ldb specifies the first dimension of B. ldb >= max( 1, m ).

    @param[in]
    beta
            beta specifies the scalar beta. When beta is
            zero, then C does not need to be set before entry.

    @param[in]
    CP      pointer storing matrix C on the GPU.
            Matrix dimension is m by n.

    @param[in]
    ldc    [int]
           ldc specifies the first dimension of C. ldc >= max( 1, m ).

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsymm(hipblasHandle_t   handle,
                                            hipblasSideMode_t side,
                                            hipblasFillMode_t uplo,
                                            int               m,
                                            int               n,
                                            const float*      alpha,
                                            const float*      AP,
                                            int               lda,
                                            const float*      BP,
                                            int               ldb,
                                            const float*      beta,
                                            float*            CP,
                                            int               ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsymm(hipblasHandle_t   handle,
                                            hipblasSideMode_t side,
                                            hipblasFillMode_t uplo,
                                            int               m,
                                            int               n,
                                            const double*     alpha,
                                            const double*     AP,
                                            int               lda,
                                            const double*     BP,
                                            int               ldb,
                                            const double*     beta,
                                            double*           CP,
                                            int               ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsymm(hipblasHandle_t   handle,
                                            hipblasSideMode_t side,
                                            hipblasFillMode_t uplo,
                                            int               m,
                                            int               n,
                                            const hipComplex* alpha,
                                            const hipComplex* AP,
                                            int               lda,
                                            const hipComplex* BP,
                                            int               ldb,
                                            const hipComplex* beta,
                                            hipComplex*       CP,
                                            int               ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsymm(hipblasHandle_t         handle,
                                            hipblasSideMode_t       side,
                                            hipblasFillMode_t       uplo,
                                            int                     m,
                                            int                     n,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* AP,
                                            int                     lda,
                                            const hipDoubleComplex* BP,
                                            int                     ldb,
                                            const hipDoubleComplex* beta,
                                            hipDoubleComplex*       CP,
                                            int                     ldc);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsymm_64(hipblasHandle_t   handle,
                                               hipblasSideMode_t side,
                                               hipblasFillMode_t uplo,
                                               int64_t           m,
                                               int64_t           n,
                                               const float*      alpha,
                                               const float*      AP,
                                               int64_t           lda,
                                               const float*      BP,
                                               int64_t           ldb,
                                               const float*      beta,
                                               float*            CP,
                                               int64_t           ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsymm_64(hipblasHandle_t   handle,
                                               hipblasSideMode_t side,
                                               hipblasFillMode_t uplo,
                                               int64_t           m,
                                               int64_t           n,
                                               const double*     alpha,
                                               const double*     AP,
                                               int64_t           lda,
                                               const double*     BP,
                                               int64_t           ldb,
                                               const double*     beta,
                                               double*           CP,
                                               int64_t           ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsymm_64(hipblasHandle_t   handle,
                                               hipblasSideMode_t side,
                                               hipblasFillMode_t uplo,
                                               int64_t           m,
                                               int64_t           n,
                                               const hipComplex* alpha,
                                               const hipComplex* AP,
                                               int64_t           lda,
                                               const hipComplex* BP,
                                               int64_t           ldb,
                                               const hipComplex* beta,
                                               hipComplex*       CP,
                                               int64_t           ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsymm_64(hipblasHandle_t         handle,
                                               hipblasSideMode_t       side,
                                               hipblasFillMode_t       uplo,
                                               int64_t                 m,
                                               int64_t                 n,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* AP,
                                               int64_t                 lda,
                                               const hipDoubleComplex* BP,
                                               int64_t                 ldb,
                                               const hipDoubleComplex* beta,
                                               hipDoubleComplex*       CP,
                                               int64_t                 ldc);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details

    symmBatched performs a batch of the matrix-matrix operations:

        C_i := alpha*A_i*B_i + beta*C_i if side == HIPBLAS_SIDE_LEFT,
        C_i := alpha*B_i*A_i + beta*C_i if side == HIPBLAS_SIDE_RIGHT,

    where ``alpha`` and ``beta`` are scalars, ``B_i`` and ``C_i`` are ``m`` by ``n`` matrices, and
    ``A_i`` is a symmetric matrix stored as either upper or lower.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    side  [hipblasSideMode_t]
            - HIPBLAS_SIDE_LEFT:      C_i := alpha*A_i*B_i + beta*C_i
            - HIPBLAS_SIDE_RIGHT:     C_i := alpha*B_i*A_i + beta*C_i

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A_i is a lower triangular matrix.

    @param[in]
    m       [int]
            m specifies the number of rows of B_i and C_i. m >= 0.

    @param[in]
    n       [int]
            n specifies the number of columns of B_i and C_i. n >= 0.

    @param[in]
    alpha
            alpha specifies the scalar alpha. When alpha is
            zero, then A_i and B_i are not referenced.

    @param[in]
    AP      device array of device pointers storing each matrix A_i on the GPU.
            A_i is m by m if side == HIPBLAS_SIDE_LEFT.
            A_i is n by n if side == HIPBLAS_SIDE_RIGHT.
            Only the upper/lower triangular part is accessed.

    @param[in]
    lda     [int]
            lda specifies the first dimension of A_i.
            If side = HIPBLAS_SIDE_LEFT,  lda >= max( 1, m ).
            Otherwise, lda >= max( 1, n ).

    @param[in]
    BP      device array of device pointers storing each matrix B_i on the GPU.
            Matrix dimension is m by n.

    @param[in]
    ldb     [int]
            ldb specifies the first dimension of B_i. ldb >= max( 1, m ).

    @param[in]
    beta
            beta specifies the scalar beta. When beta is
            zero, then C_i does not need to be set before entry.

    @param[in]
    CP      device array of device pointers storing each matrix C_i on the GPU.
            Matrix dimension is m by n.

    @param[in]
    ldc    [int]
           ldc specifies the first dimension of C_i. ldc >= max( 1, m ).

    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsymmBatched(hipblasHandle_t    handle,
                                                   hipblasSideMode_t  side,
                                                   hipblasFillMode_t  uplo,
                                                   int                m,
                                                   int                n,
                                                   const float*       alpha,
                                                   const float* const AP[],
                                                   int                lda,
                                                   const float* const BP[],
                                                   int                ldb,
                                                   const float*       beta,
                                                   float* const       CP[],
                                                   int                ldc,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsymmBatched(hipblasHandle_t     handle,
                                                   hipblasSideMode_t   side,
                                                   hipblasFillMode_t   uplo,
                                                   int                 m,
                                                   int                 n,
                                                   const double*       alpha,
                                                   const double* const AP[],
                                                   int                 lda,
                                                   const double* const BP[],
                                                   int                 ldb,
                                                   const double*       beta,
                                                   double* const       CP[],
                                                   int                 ldc,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsymmBatched(hipblasHandle_t         handle,
                                                   hipblasSideMode_t       side,
                                                   hipblasFillMode_t       uplo,
                                                   int                     m,
                                                   int                     n,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const AP[],
                                                   int                     lda,
                                                   const hipComplex* const BP[],
                                                   int                     ldb,
                                                   const hipComplex*       beta,
                                                   hipComplex* const       CP[],
                                                   int                     ldc,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsymmBatched(hipblasHandle_t               handle,
                                                   hipblasSideMode_t             side,
                                                   hipblasFillMode_t             uplo,
                                                   int                           m,
                                                   int                           n,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const AP[],
                                                   int                           lda,
                                                   const hipDoubleComplex* const BP[],
                                                   int                           ldb,
                                                   const hipDoubleComplex*       beta,
                                                   hipDoubleComplex* const       CP[],
                                                   int                           ldc,
                                                   int                           batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsymmBatched_64(hipblasHandle_t    handle,
                                                      hipblasSideMode_t  side,
                                                      hipblasFillMode_t  uplo,
                                                      int64_t            m,
                                                      int64_t            n,
                                                      const float*       alpha,
                                                      const float* const AP[],
                                                      int64_t            lda,
                                                      const float* const BP[],
                                                      int64_t            ldb,
                                                      const float*       beta,
                                                      float* const       CP[],
                                                      int64_t            ldc,
                                                      int64_t            batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsymmBatched_64(hipblasHandle_t     handle,
                                                      hipblasSideMode_t   side,
                                                      hipblasFillMode_t   uplo,
                                                      int64_t             m,
                                                      int64_t             n,
                                                      const double*       alpha,
                                                      const double* const AP[],
                                                      int64_t             lda,
                                                      const double* const BP[],
                                                      int64_t             ldb,
                                                      const double*       beta,
                                                      double* const       CP[],
                                                      int64_t             ldc,
                                                      int64_t             batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsymmBatched_64(hipblasHandle_t         handle,
                                                      hipblasSideMode_t       side,
                                                      hipblasFillMode_t       uplo,
                                                      int64_t                 m,
                                                      int64_t                 n,
                                                      const hipComplex*       alpha,
                                                      const hipComplex* const AP[],
                                                      int64_t                 lda,
                                                      const hipComplex* const BP[],
                                                      int64_t                 ldb,
                                                      const hipComplex*       beta,
                                                      hipComplex* const       CP[],
                                                      int64_t                 ldc,
                                                      int64_t                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsymmBatched_64(hipblasHandle_t               handle,
                                                      hipblasSideMode_t             side,
                                                      hipblasFillMode_t             uplo,
                                                      int64_t                       m,
                                                      int64_t                       n,
                                                      const hipDoubleComplex*       alpha,
                                                      const hipDoubleComplex* const AP[],
                                                      int64_t                       lda,
                                                      const hipDoubleComplex* const BP[],
                                                      int64_t                       ldb,
                                                      const hipDoubleComplex*       beta,
                                                      hipDoubleComplex* const       CP[],
                                                      int64_t                       ldc,
                                                      int64_t                       batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details

    The symmStridedBatched functions perform a batch of the matrix-matrix operations:

        C_i := alpha*A_i*B_i + beta*C_i if side == HIPBLAS_SIDE_LEFT,
        C_i := alpha*B_i*A_i + beta*C_i if side == HIPBLAS_SIDE_RIGHT,

    where ``alpha`` and ``beta`` are scalars, ``B_i`` and ``C_i`` are ``m`` by ``n`` matrices, and
    ``A_i`` is a symmetric matrix stored as either upper or lower.

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    side  [hipblasSideMode_t]
            - HIPBLAS_SIDE_LEFT:      C_i := alpha*A_i*B_i + beta*C_i
            - HIPBLAS_SIDE_RIGHT:     C_i := alpha*B_i*A_i + beta*C_i

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  A_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  A_i is a lower triangular matrix.

    @param[in]
    m       [int]
            m specifies the number of rows of B_i and C_i. m >= 0.

    @param[in]
    n       [int]
            n specifies the number of columns of B_i and C_i. n >= 0.

    @param[in]
    alpha
            alpha specifies the scalar alpha. When alpha is
            zero, then A_i and B_i are not referenced.

    @param[in]
    AP       device pointer to first matrix A_1.
            A_i is m by m if side == HIPBLAS_SIDE_LEFT.
            A_i is n by n if side == HIPBLAS_SIDE_RIGHT.
            Only the upper/lower triangular part is accessed.

    @param[in]
    lda     [int]
            lda specifies the first dimension of A_i.
            If side = HIPBLAS_SIDE_LEFT,  lda >= max( 1, m ).
            Otherwise, lda >= max( 1, n ).

    @param[in]
    strideA  [hipblasStride]
              stride from the start of one matrix (A_i) to the next one (A_i+1).

    @param[in]
    BP       device pointer to first matrix B_1 of dimension (ldb, n) on the GPU.

    @param[in]
    ldb     [int]
            ldb specifies the first dimension of B_i. ldb >= max( 1, m ).

    @param[in]
    strideB  [hipblasStride]
              stride from the start of one matrix (B_i) to the next one (B_i+1).
    @param[in]
    beta
            beta specifies the scalar beta. When beta is
            zero, then C does not need to be set before entry.

    @param[in]
    CP        device pointer to first matrix C_1 of dimension (ldc, n) on the GPU.

    @param[in]
    ldc    [int]
           ldc specifies the first dimension of C. ldc >= max( 1, m ).

    @param[inout]
    strideC  [hipblasStride]
              stride from the start of one matrix (C_i) to the next one (C_i+1).

    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsymmStridedBatched(hipblasHandle_t   handle,
                                                          hipblasSideMode_t side,
                                                          hipblasFillMode_t uplo,
                                                          int               m,
                                                          int               n,
                                                          const float*      alpha,
                                                          const float*      AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          const float*      BP,
                                                          int               ldb,
                                                          hipblasStride     strideB,
                                                          const float*      beta,
                                                          float*            CP,
                                                          int               ldc,
                                                          hipblasStride     strideC,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsymmStridedBatched(hipblasHandle_t   handle,
                                                          hipblasSideMode_t side,
                                                          hipblasFillMode_t uplo,
                                                          int               m,
                                                          int               n,
                                                          const double*     alpha,
                                                          const double*     AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          const double*     BP,
                                                          int               ldb,
                                                          hipblasStride     strideB,
                                                          const double*     beta,
                                                          double*           CP,
                                                          int               ldc,
                                                          hipblasStride     strideC,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsymmStridedBatched(hipblasHandle_t   handle,
                                                          hipblasSideMode_t side,
                                                          hipblasFillMode_t uplo,
                                                          int               m,
                                                          int               n,
                                                          const hipComplex* alpha,
                                                          const hipComplex* AP,
                                                          int               lda,
                                                          hipblasStride     strideA,
                                                          const hipComplex* BP,
                                                          int               ldb,
                                                          hipblasStride     strideB,
                                                          const hipComplex* beta,
                                                          hipComplex*       CP,
                                                          int               ldc,
                                                          hipblasStride     strideC,
                                                          int               batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsymmStridedBatched(hipblasHandle_t         handle,
                                                          hipblasSideMode_t       side,
                                                          hipblasFillMode_t       uplo,
                                                          int                     m,
                                                          int                     n,
                                                          const hipDoubleComplex* alpha,
                                                          const hipDoubleComplex* AP,
                                                          int                     lda,
                                                          hipblasStride           strideA,
                                                          const hipDoubleComplex* BP,
                                                          int                     ldb,
                                                          hipblasStride           strideB,
                                                          const hipDoubleComplex* beta,
                                                          hipDoubleComplex*       CP,
                                                          int                     ldc,
                                                          hipblasStride           strideC,
                                                          int                     batchCount);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsymmStridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasSideMode_t side,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           m,
                                                             int64_t           n,
                                                             const float*      alpha,
                                                             const float*      AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             const float*      BP,
                                                             int64_t           ldb,
                                                             hipblasStride     strideB,
                                                             const float*      beta,
                                                             float*            CP,
                                                             int64_t           ldc,
                                                             hipblasStride     strideC,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsymmStridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasSideMode_t side,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           m,
                                                             int64_t           n,
                                                             const double*     alpha,
                                                             const double*     AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             const double*     BP,
                                                             int64_t           ldb,
                                                             hipblasStride     strideB,
                                                             const double*     beta,
                                                             double*           CP,
                                                             int64_t           ldc,
                                                             hipblasStride     strideC,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsymmStridedBatched_64(hipblasHandle_t   handle,
                                                             hipblasSideMode_t side,
                                                             hipblasFillMode_t uplo,
                                                             int64_t           m,
                                                             int64_t           n,
                                                             const hipComplex* alpha,
                                                             const hipComplex* AP,
                                                             int64_t           lda,
                                                             hipblasStride     strideA,
                                                             const hipComplex* BP,
                                                             int64_t           ldb,
                                                             hipblasStride     strideB,
                                                             const hipComplex* beta,
                                                             hipComplex*       CP,
                                                             int64_t           ldc,
                                                             hipblasStride     strideC,
                                                             int64_t           batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsymmStridedBatched_64(hipblasHandle_t         handle,
                                                             hipblasSideMode_t       side,
                                                             hipblasFillMode_t       uplo,
                                                             int64_t                 m,
                                                             int64_t                 n,
                                                             const hipDoubleComplex* alpha,
                                                             const hipDoubleComplex* AP,
                                                             int64_t                 lda,
                                                             hipblasStride           strideA,
                                                             const hipDoubleComplex* BP,
                                                             int64_t                 ldb,
                                                             hipblasStride           strideB,
                                                             const hipDoubleComplex* beta,
                                                             hipDoubleComplex*       CP,
                                                             int64_t                 ldc,
                                                             hipblasStride           strideC,
                                                             int64_t                 batchCount);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details

    The syrk functions perform one of the matrix-matrix operations for a symmetric rank-k update:

        C := alpha*op( A )*op( A )^T + beta*C

    where ``alpha`` and ``beta`` are scalars, ``op(A)`` is an ``n`` by ``k`` matrix, and
    ``C`` is a symmetric ``n`` by ``n`` matrix stored as either upper or lower.

        op( A ) = A, and A is n by k if transA == HIPBLAS_OP_N
        op( A ) = A^T and A is k by n if transA == HIPBLAS_OP_T

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : ``s``, ``d``, ``c``, and ``z``.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  C is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  C is a lower triangular matrix.

    @param[in]
    transA  [hipblasOperation_t]
            - HIPBLAS_OP_T: op(A) = A^T
            - HIPBLAS_OP_N: op(A) = A
            - HIPBLAS_OP_C: op(A) = A^T
            - HIPBLAS_OP_C is not supported for complex types. See cherk
            and zherk.

    @param[in]
    n       [int]
            n specifies the number of rows and columns of C. n >= 0.

    @param[in]
    k       [int]
            k specifies the number of columns of op(A). k >= 0.

    @param[in]
    alpha
            alpha specifies the scalar alpha. When alpha is
            zero, then A is not referenced and A does not need to be set before
            entry.

    @param[in]
    AP      pointer storing matrix A on the GPU.
            Matrix dimension is ( lda, k ) when transA = HIPBLAS_OP_N. Otherwise, (lda, n).
            Only the upper/lower triangular part is accessed.

    @param[in]
    lda     [int]
            lda specifies the first dimension of A.
            If transA = HIPBLAS_OP_N,  lda >= max( 1, n ).
            Otherwise, lda >= max( 1, k ).

    @param[in]
    beta
            beta specifies the scalar beta. When beta is
            zero, then C does not need to be set before entry.

    @param[in]
    CP       pointer storing matrix C on the GPU.

    @param[in]
    ldc    [int]
           ldc specifies the first dimension of C. ldc >= max( 1, n ).

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrk(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            int                n,
                                            int                k,
                                            const float*       alpha,
                                            const float*       AP,
                                            int                lda,
                                            const float*       beta,
                                            float*             CP,
                                            int                ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrk(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            int                n,
                                            int                k,
                                            const double*      alpha,
                                            const double*      AP,
                                            int                lda,
                                            const double*      beta,
                                            double*            CP,
                                            int                ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrk(hipblasHandle_t    handle,
                                            hipblasFillMode_t  uplo,
                                            hipblasOperation_t transA,
                                            int                n,
                                            int                k,
                                            const hipComplex*  alpha,
                                            const hipComplex*  AP,
                                            int                lda,
                                            const hipComplex*  beta,
                                            hipComplex*        CP,
                                            int                ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrk(hipblasHandle_t         handle,
                                            hipblasFillMode_t       uplo,
                                            hipblasOperation_t      transA,
                                            int                     n,
                                            int                     k,
                                            const hipDoubleComplex* alpha,
                                            const hipDoubleComplex* AP,
                                            int                     lda,
                                            const hipDoubleComplex* beta,
                                            hipDoubleComplex*       CP,
                                            int                     ldc);

// 64-bit interface
HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrk_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               int64_t            n,
                                               int64_t            k,
                                               const float*       alpha,
                                               const float*       AP,
                                               int64_t            lda,
                                               const float*       beta,
                                               float*             CP,
                                               int64_t            ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrk_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               int64_t            n,
                                               int64_t            k,
                                               const double*      alpha,
                                               const double*      AP,
                                               int64_t            lda,
                                               const double*      beta,
                                               double*            CP,
                                               int64_t            ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrk_64(hipblasHandle_t    handle,
                                               hipblasFillMode_t  uplo,
                                               hipblasOperation_t transA,
                                               int64_t            n,
                                               int64_t            k,
                                               const hipComplex*  alpha,
                                               const hipComplex*  AP,
                                               int64_t            lda,
                                               const hipComplex*  beta,
                                               hipComplex*        CP,
                                               int64_t            ldc);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrk_64(hipblasHandle_t         handle,
                                               hipblasFillMode_t       uplo,
                                               hipblasOperation_t      transA,
                                               int64_t                 n,
                                               int64_t                 k,
                                               const hipDoubleComplex* alpha,
                                               const hipDoubleComplex* AP,
                                               int64_t                 lda,
                                               const hipDoubleComplex* beta,
                                               hipDoubleComplex*       CP,
                                               int64_t                 ldc);
//! @}

/*! @{
    \brief <b> BLAS Level 3 API </b>

    \details

    The syrkBatched functions performs a batch of the matrix-matrix operations for a symmetric rank-k update:

        C_i := alpha*op( A_i )*op( A_i )^T + beta*C_i

    where ``alpha`` and ``beta`` are scalars, ``op(A_i)`` is an ``n`` by ``k`` matrix, and
    ``C_i`` is a symmetric ``n`` by ``n`` matrix stored as either upper or lower.

        op( A_i ) = A_i, and A_i is n by k if transA == HIPBLAS_OP_N
        op( A_i ) = A_i^T and A_i is k by n if transA == HIPBLAS_OP_T

    - Supported precisions in rocBLAS : ``s``, ``d``, ``c``, and ``z``.
    - Supported precisions in cuBLAS  : No support.

    @param[in]
    handle    [hipblasHandle_t]
              handle to the hipBLAS library context queue.

    @param[in]
    uplo    [hipblasFillMode_t]
            - HIPBLAS_FILL_MODE_UPPER:  C_i is an upper triangular matrix.
            - HIPBLAS_FILL_MODE_LOWER:  C_i is a lower triangular matrix.

    @param[in]
    transA  [hipblasOperation_t]
            - HIPBLAS_OP_T: op(A) = A^T
            - HIPBLAS_OP_N: op(A) = A
            - HIPBLAS_OP_C: op(A) = A^T
            - HIPBLAS_OP_C is not supported for complex types. See cherk
            and zherk.

    @param[in]
    n       [int]
            n specifies the number of rows and columns of C_i. n >= 0.

    @param[in]
    k       [int]
            k specifies the number of columns of op(A). k >= 0.

    @param[in]
    alpha
            alpha specifies the scalar alpha. When alpha is
            zero, then A is not referenced and does not need to be set before
            entry.

    @param[in]
    AP      device array of device pointers storing each matrix_i A of dimension (lda, k)
            when transA is HIPBLAS_OP_N. Otherwise, of dimension (lda, n).

    @param[in]
    lda     [int]
            lda specifies the first dimension of A_i.
            If transA = HIPBLAS_OP_N, lda >= max( 1, n ).
            Otherwise, lda >= max( 1, k ).

    @param[in]
    beta
            beta specifies the scalar beta. When beta is
            zero, then C does not need to be set before entry.

    @param[in]
    CP       device array of device pointers storing each matrix C_i on the GPU.

    @param[in]
    ldc    [int]
           ldc specifies the first dimension of C. ldc >= max( 1, n ).
    @param[in]
    batchCount [int]
                number of instances in the batch.

    ********************************************************************/

HIPBLAS_EXPORT hipblasStatus_t hipblasSsyrkBatched(hipblasHandle_t    handle,
                                                   hipblasFillMode_t  uplo,
                                                   hipblasOperation_t transA,
                                                   int                n,
                                                   int                k,
                                                   const float*       alpha,
                                                   const float* const AP[],
                                                   int                lda,
                                                   const float*       beta,
                                                   float* const       CP[],
                                                   int                ldc,
                                                   int                batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasDsyrkBatched(hipblasHandle_t     handle,
                                                   hipblasFillMode_t   uplo,
                                                   hipblasOperation_t  transA,
                                                   int                 n,
                                                   int                 k,
                                                   const double*       alpha,
                                                   const double* const AP[],
                                                   int                 lda,
                                                   const double*       beta,
                                                   double* const       CP[],
                                                   int                 ldc,
                                                   int                 batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasCsyrkBatched(hipblasHandle_t         handle,
                                                   hipblasFillMode_t       uplo,
                                                   hipblasOperation_t      transA,
                                                   int                     n,
                                                   int                     k,
                                                   const hipComplex*       alpha,
                                                   const hipComplex* const AP[],
                                                   int                     lda,
                                                   const hipComplex*       beta,
                                                   hipComplex* const       CP[],
                                                   int                     ldc,
                                                   int                     batchCount);

HIPBLAS_EXPORT hipblasStatus_t hipblasZsyrkBatched(hipblasHandle_t               handle,
                                                   hipblasFillMode_t             uplo,
                                                   hipblasOperation_t            transA,
                                                   int                           n,
                                                   int                           k,
                                                   const hipDoubleComplex*       alpha,
                                                   const hipDoubleComplex* const AP[],
                                                   int     