| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| #pragma once |
|
|
| #include <cstdio> |
| #include <cstring> |
| #include <vector> |
| #include <array> |
| #include <string> |
| #include <fstream> |
| #include <cstdint> |
| #include <cstdlib> |
|
|
| #include "utils/checker.h" |
| #include "ax_sys_api.h" |
| #include "ax_engine_type.h" |
|
|
|
|
| #define IO_CMM_ALIGN_SIZE 128 |
|
|
| namespace utils { |
| typedef enum { |
| IO_BUFFER_STRATEGY_DEFAULT, |
| IO_BUFFER_STRATEGY_CACHED |
| } IO_BUFFER_STRATEGY_T; |
|
|
| static inline AX_S32 query_model_input_size(const AX_ENGINE_IO_INFO_T* io_info, std::array<int, 2> &input_size, |
| AX_IMG_FORMAT_E &eDtype) { |
| int height = 0; |
| int width = 0; |
| int size = 0; |
| int channel = 0; |
| int data_type_size = 0; |
| auto& input = io_info->pInputs[0]; |
|
|
| switch (input.eLayout) { |
| case AX_ENGINE_TENSOR_LAYOUT_NHWC: |
| height = input.pShape[1]; |
| width = input.pShape[2]; |
| channel = input.pShape[3]; |
| size = input.nSize; |
| break; |
| case AX_ENGINE_TENSOR_LAYOUT_NCHW: |
| channel = input.pShape[1]; |
| height = input.pShape[2]; |
| width = input.pShape[3]; |
| size = input.nSize; |
| break; |
| default: |
| height = input.pShape[1]; |
| width = input.pShape[2]; |
| channel = input.pShape[3]; |
| size = input.nSize; |
| break; |
| } |
|
|
| switch (input.eDataType) { |
| case AX_ENGINE_DT_UINT8: |
| case AX_ENGINE_DT_SINT8: |
| data_type_size = 1; |
| break; |
| case AX_ENGINE_DT_UINT16: |
| case AX_ENGINE_DT_SINT16: |
| data_type_size = 2; |
| break; |
| case AX_ENGINE_DT_FLOAT32: |
| data_type_size = 4; |
| break; |
| case AX_ENGINE_DT_SINT32: |
| case AX_ENGINE_DT_UINT32: |
| data_type_size = 4; |
| break; |
| case AX_ENGINE_DT_FLOAT64: |
| data_type_size = 8; |
| break; |
| default: |
| data_type_size = 1; |
| break; |
| } |
|
|
| if (channel == 0 || height == 0 || width == 0 || size == 0 || data_type_size == 0) { |
| return -1; |
| } |
|
|
| if (input.pExtraMeta) { |
| switch (input.pExtraMeta->eColorSpace) { |
| case AX_ENGINE_CS_BGR: |
| input_size[0] = height; |
| input_size[1] = width; |
| eDtype = AX_FORMAT_BGR888; |
| break; |
| case AX_ENGINE_CS_RGB: |
| input_size[0] = height; |
| input_size[1] = width; |
| eDtype = AX_FORMAT_RGB888; |
| break; |
| case AX_ENGINE_CS_NV12: |
| input_size[0] = height * 2 / 3; |
| input_size[1] = width; |
| eDtype = AX_FORMAT_YUV420_SEMIPLANAR; |
| break; |
| case AX_ENGINE_CS_NV21: |
| input_size[0] = height * 2 / 3; |
| input_size[1] = width; |
| eDtype = AX_FORMAT_YUV420_SEMIPLANAR_VU; |
| break; |
| default: |
| input_size[0] = height * 2 / 3; |
| input_size[1] = width; |
| eDtype = AX_FORMAT_YUV420_SEMIPLANAR; |
| break; |
| } |
| } |
| else { |
| input_size[0] = height * 2 / 3; |
| input_size[1] = width; |
| eDtype = AX_FORMAT_YUV420_SEMIPLANAR; |
| } |
|
|
| ALOGD("eLayout:%d, eDataType:%d, channel:%d, height:%d, width:%d, size:%d, data_type_size:%d", |
| input.eLayout, input.eDataType, channel, input_size[0], input_size[1], size, data_type_size); |
|
|
| return 0; |
| } |
|
|
| static inline void brief_io_info(std::string strModel, const AX_ENGINE_IO_INFO_T* io_info) { |
| auto describe_shape_type = [](AX_ENGINE_TENSOR_LAYOUT_T type) -> const char* { |
| switch (type) { |
| case AX_ENGINE_TENSOR_LAYOUT_NHWC: |
| return "NHWC"; |
| case AX_ENGINE_TENSOR_LAYOUT_NCHW: |
| return "NCHW"; |
| default: |
| return "unknown"; |
| } |
| }; |
| auto describe_data_type = [](AX_ENGINE_DATA_TYPE_T type) -> const char* { |
| switch (type) { |
| case AX_ENGINE_DT_UINT8: |
| return "uint8"; |
| case AX_ENGINE_DT_UINT16: |
| return "uint16"; |
| case AX_ENGINE_DT_FLOAT32: |
| return "float32"; |
| case AX_ENGINE_DT_SINT16: |
| return "sint16"; |
| case AX_ENGINE_DT_SINT8: |
| return "sint8"; |
| case AX_ENGINE_DT_SINT32: |
| return "sint32"; |
| case AX_ENGINE_DT_UINT32: |
| return "uint32"; |
| case AX_ENGINE_DT_FLOAT64: |
| return "float64"; |
| case AX_ENGINE_DT_UINT10_PACKED: |
| return "uint10_packed"; |
| case AX_ENGINE_DT_UINT12_PACKED: |
| return "uint12_packed"; |
| case AX_ENGINE_DT_UINT14_PACKED: |
| return "uint14_packed"; |
| case AX_ENGINE_DT_UINT16_PACKED: |
| return "uint16_packed"; |
| default: |
| return "unknown"; |
| } |
| }; |
| auto describe_memory_type = [](AX_ENGINE_MEMORY_TYPE_T type) -> const char* { |
| switch (type) { |
| case AX_ENGINE_MT_PHYSICAL: |
| return "Physical"; |
| case AX_ENGINE_MT_VIRTUAL: |
| return "Virtual"; |
| default: |
| return "unknown"; |
| } |
| }; |
| auto describe_color_space = [](AX_ENGINE_COLOR_SPACE_T cs) -> const char* { |
| switch (cs) { |
| case AX_ENGINE_CS_FEATUREMAP: |
| return "FeatureMap"; |
| case AX_ENGINE_CS_BGR: |
| return "BGR"; |
| case AX_ENGINE_CS_RGB: |
| return "RGB"; |
| case AX_ENGINE_CS_RGBA: |
| return "RGBA"; |
| case AX_ENGINE_CS_GRAY: |
| return "GRAY"; |
| case AX_ENGINE_CS_NV12: |
| return "NV12"; |
| case AX_ENGINE_CS_NV21: |
| return "NV21"; |
| case AX_ENGINE_CS_YUV444: |
| return "YUV444"; |
| case AX_ENGINE_CS_RAW8: |
| return "RAW8"; |
| case AX_ENGINE_CS_RAW10: |
| return "RAW10"; |
| case AX_ENGINE_CS_RAW12: |
| return "RAW12"; |
| case AX_ENGINE_CS_RAW14: |
| return "RAW14"; |
| case AX_ENGINE_CS_RAW16: |
| return "RAW16"; |
| default: |
| return "unknown"; |
| } |
| }; |
| printf("Model Name: %s\n", strModel.c_str()); |
| printf("Max Batch Size %d\n", io_info->nMaxBatchSize); |
| printf("Support Dynamic Batch? %s\n", io_info->bDynamicBatchSize == AX_TRUE ? "Yes" : "No"); |
|
|
| for (uint32_t i = 0; i < io_info->nInputSize; ++i) { |
| auto& input = io_info->pInputs[i]; |
| printf("Input[%d]: %s\n", i, input.pName); |
| printf(" Shape ["); |
| for (uint32_t j = 0; j < input.nShapeSize; ++j) { |
| printf("%d", (int)input.pShape[j]); |
| if (j + 1 < input.nShapeSize) printf(", "); |
| } |
| printf("] %s %s %s %s\n", describe_shape_type(input.eLayout), describe_data_type(input.eDataType), |
| input.pExtraMeta ? describe_color_space(input.pExtraMeta->eColorSpace) : "", |
| input.nQuantizationValue > 0 ? ("Q=" + std::to_string(input.nQuantizationValue)).c_str() : ""); |
| printf(" Memory %s\n", describe_memory_type(input.eMemoryType)); |
| printf(" Size %u\n", input.nSize); |
| } |
| for (uint32_t i = 0; i < io_info->nOutputSize; ++i) { |
| auto& output = io_info->pOutputs[i]; |
| printf("Output[%d]: %s\n", i, output.pName); |
| printf(" Shape ["); |
| for (uint32_t j = 0; j < output.nShapeSize; ++j) { |
| printf("%d", (int)output.pShape[j]); |
| if (j + 1 < output.nShapeSize) printf(", "); |
| } |
| printf("] %s %s %s\n", describe_shape_type(output.eLayout), describe_data_type(output.eDataType), |
| output.nQuantizationValue > 0 ? ("Q=" + std::to_string(output.nQuantizationValue)).c_str() : ""); |
| printf(" Memory %s\n", describe_memory_type(output.eMemoryType)); |
| printf(" Size %u\n", output.nSize); |
| } |
| } |
|
|
| static inline AX_S32 alloc_engine_buffer(const std::string& token, const std::string& appendix, size_t index, const AX_ENGINE_IOMETA_T* pMeta, AX_ENGINE_IO_BUFFER_T* pBuf, IO_BUFFER_STRATEGY_T eStrategy = IO_BUFFER_STRATEGY_DEFAULT) { |
| AX_S32 ret = -1; |
| if (eStrategy != IO_BUFFER_STRATEGY_DEFAULT && eStrategy != IO_BUFFER_STRATEGY_CACHED) { |
| fprintf(stderr, "strategy %d not supported\n", (int)eStrategy); |
| return -1; |
| } |
| memset(pBuf, 0, sizeof(AX_ENGINE_IO_BUFFER_T)); |
| pBuf->nSize = pMeta->nSize; |
|
|
| const std::string token_name = "skel_" + token + appendix + std::to_string(index); |
|
|
| if (eStrategy == IO_BUFFER_STRATEGY_CACHED) { |
| ret = AX_SYS_MemAllocCached((AX_U64*)&pBuf->phyAddr, &pBuf->pVirAddr, pBuf->nSize, IO_CMM_ALIGN_SIZE, (const AX_S8*)token_name.c_str()); |
| } |
| else { |
| ret = AX_SYS_MemAlloc((AX_U64*)&pBuf->phyAddr, &pBuf->pVirAddr, pBuf->nSize, IO_CMM_ALIGN_SIZE, (const AX_S8*)token_name.c_str()); |
| } |
|
|
| return ret; |
| } |
|
|
| static inline AX_S32 free_engine_buffer(AX_ENGINE_IO_BUFFER_T* pBuf) { |
| if (pBuf->phyAddr == 0) { |
| delete[] reinterpret_cast<uint8_t*>(pBuf->pVirAddr); |
| } |
| else { |
| AX_SYS_MemFree(pBuf->phyAddr, pBuf->pVirAddr); |
| } |
| pBuf->phyAddr = 0; |
| pBuf->pVirAddr = nullptr; |
|
|
| return 0; |
| } |
|
|
| static inline void free_io_index(AX_ENGINE_IO_BUFFER_T* io_buf, size_t index) { |
| AX_ENGINE_IO_BUFFER_T* pBuf = io_buf + index; |
| free_engine_buffer(pBuf); |
| } |
|
|
| static inline void free_io(AX_ENGINE_IO_T &io) { |
| for (size_t j = 0; j < io.nInputSize; ++j) |
| { |
| AX_ENGINE_IO_BUFFER_T *pBuf = io.pInputs + j; |
| AX_SYS_MemFree(pBuf->phyAddr, pBuf->pVirAddr); |
| } |
| for (size_t j = 0; j < io.nOutputSize; ++j) |
| { |
| AX_ENGINE_IO_BUFFER_T *pBuf = io.pOutputs + j; |
| AX_SYS_MemFree(pBuf->phyAddr, pBuf->pVirAddr); |
| } |
| delete[] io.pInputs; |
| delete[] io.pOutputs; |
| } |
|
|
| static inline void free_io(AX_ENGINE_IO_T &io, std::vector<std::vector<AX_ENGINE_IO_BUFFER_T>> &vecOutputBuffer) { |
| if (io.pInputs) { |
| delete[] io.pInputs; |
| io.pInputs = nullptr; |
| } |
|
|
| if (io.pOutputs) { |
| for (size_t index = 0; index < vecOutputBuffer.size(); ++index) { |
| AX_ENGINE_IO_BUFFER_T *pOutputs = &vecOutputBuffer[index][0]; |
| for (size_t j = 0; j < io.nOutputSize; ++j) { |
| free_io_index(pOutputs, j); |
| } |
| } |
|
|
| delete[] io.pOutputs; |
| io.pOutputs = nullptr; |
| } |
| } |
|
|
| static inline int prepare_io(const std::string& token, const AX_ENGINE_IO_INFO_T* info, AX_ENGINE_IO_T &io, IO_BUFFER_STRATEGY_T strategy) { |
| auto ret = 0; |
|
|
| memset(&io, 0, sizeof(io)); |
|
|
| io.pInputs = new AX_ENGINE_IO_BUFFER_T[info->nInputSize]; |
|
|
| if (!io.pInputs) { |
| goto EXIT; |
| } |
|
|
| memset(io.pInputs, 0x00, sizeof(AX_ENGINE_IO_BUFFER_T) * info->nInputSize); |
| io.nInputSize = info->nInputSize; |
| for (AX_U32 i = 0; i < info->nInputSize; ++i) |
| { |
| auto meta = info->pInputs[i]; |
| auto buffer = &io.pInputs[i]; |
| ret = alloc_engine_buffer(token, "_input_", i, &meta, buffer, strategy); |
| if (ret != 0) |
| { |
| free_io_index(io.pInputs, i); |
| return ret; |
| } |
| } |
|
|
| io.pOutputs = new AX_ENGINE_IO_BUFFER_T[info->nOutputSize]; |
|
|
| if (!io.pOutputs) { |
| goto EXIT; |
| } |
|
|
| memset(io.pOutputs, 0x00, sizeof(AX_ENGINE_IO_BUFFER_T) * info->nOutputSize); |
| io.nOutputSize = info->nOutputSize; |
|
|
| for (size_t i = 0; i < info->nOutputSize; ++i) { |
| auto meta = info->pOutputs[i]; |
| auto buffer = &io.pOutputs[i]; |
| ret = alloc_engine_buffer(token, "_output_", i, &meta, buffer, strategy); |
| if (ret != 0) { |
| goto EXIT; |
| } |
| } |
|
|
| EXIT: |
| if (ret != 0) { |
| free_io(io); |
| return -1; |
| } |
|
|
| return 0; |
| } |
|
|
| static inline int prepare_io(const std::string& token, |
| const AX_ENGINE_IO_INFO_T* info, AX_ENGINE_IO_T &io, |
| std::vector<AX_ENGINE_IO_BUFFER_T> &vecOutputBuffer, |
| const IO_BUFFER_STRATEGY_T &strategy) { |
| AX_S32 ret = 0; |
| memset(&io, 0, sizeof(io)); |
|
|
| std::vector<AX_ENGINE_IO_BUFFER_T> outputBuffer; |
|
|
| if (1 != info->nInputSize) { |
| fprintf(stderr, "[ERR]: Only single input was accepted(got %u).\n", info->nInputSize); |
| return -1; |
| } |
|
|
| io.pInputs = new AX_ENGINE_IO_BUFFER_T[info->nInputSize]; |
|
|
| if (!io.pInputs) { |
| goto EXIT; |
| } |
|
|
| memset(io.pInputs, 0x00, sizeof(AX_ENGINE_IO_BUFFER_T) * info->nInputSize); |
| io.nInputSize = info->nInputSize; |
|
|
| for (AX_U32 i = 0; i < info->nInputSize; ++i) |
| { |
| auto meta = info->pInputs[i]; |
| auto buffer = &io.pInputs[i]; |
| ret = alloc_engine_buffer(token, "_input_", i, &meta, buffer, strategy); |
| if (ret != 0) |
| { |
| free_io_index(io.pInputs, i); |
| return ret; |
| } |
| } |
|
|
| io.pOutputs = new AX_ENGINE_IO_BUFFER_T[info->nOutputSize]; |
|
|
| if (!io.pOutputs) { |
| goto EXIT; |
| } |
|
|
| for (size_t i = 0; i < info->nOutputSize; ++i) { |
| auto meta = info->pOutputs[i]; |
| auto buffer = &io.pOutputs[i]; |
| ret = alloc_engine_buffer(token, "_output_", i, &meta, buffer, strategy); |
|
|
| if (ret != 0) { |
| goto EXIT; |
| } |
|
|
| vecOutputBuffer.push_back(*buffer); |
| } |
|
|
| memset(io.pOutputs, 0x00, sizeof(AX_ENGINE_IO_BUFFER_T) * info->nOutputSize); |
| io.nOutputSize = info->nOutputSize; |
|
|
| for (size_t i = 0; i < info->nOutputSize; ++i) { |
| auto buffer = &io.pOutputs[i]; |
| *buffer = vecOutputBuffer[i]; |
| } |
|
|
| EXIT: |
| if (ret != 0) { |
| free_io(io); |
| return -1; |
| } |
|
|
| return 0; |
| } |
|
|
| static inline AX_S32 push_io_output(const AX_ENGINE_IO_INFO_T* info, |
| AX_ENGINE_IO_T& io, |
| std::vector<AX_ENGINE_IO_BUFFER_T> &outputBuffer) { |
| for (size_t i = 0; i < info->nOutputSize; ++i) { |
| auto buffer = &io.pOutputs[i]; |
| *buffer = outputBuffer[i]; |
| } |
|
|
| return 0; |
| } |
|
|
| static inline AX_S32 cache_io_flush(const AX_ENGINE_IO_BUFFER_T *io_buf) { |
| if (io_buf->phyAddr != 0) { |
| AX_SYS_MflushCache(io_buf->phyAddr, io_buf->pVirAddr, io_buf->nSize); |
| } |
|
|
| return 0; |
| } |
|
|
| static inline AX_S32 push_io_input(void* input, int index, AX_ENGINE_IO_T& io) { |
| |
| AX_ENGINE_IO_BUFFER_T* pImg = &io.pInputs[index]; |
| |
| memcpy(pImg->pVirAddr, input, pImg->nSize); |
| cache_io_flush(pImg); |
| return 0; |
| } |
|
|
| static inline AX_S32 push_io_output(void* output, |
| int index, |
| AX_ENGINE_IO_T& io) { |
| AX_ENGINE_IO_BUFFER_T* pImg = &io.pOutputs[index]; |
| cache_io_flush(pImg); |
| memcpy(output, pImg->pVirAddr, pImg->nSize); |
| return 0; |
| } |
|
|
| static inline AX_S32 cpu_copy(AX_U64 nPhyAddrSrc, AX_U64 nPhyAddrDst, AX_U32 nLen) { |
| if (nPhyAddrSrc != 0 && nPhyAddrDst != 0 && nLen > 0) { |
| AX_VOID* pSrcVirAddr = AX_SYS_MmapCache(nPhyAddrSrc, nLen); |
| AX_VOID* pDstVirAddr = AX_SYS_MmapCache(nPhyAddrDst, nLen); |
|
|
| memcpy((AX_VOID*)pDstVirAddr, (AX_VOID*)pSrcVirAddr, nLen); |
|
|
| AX_SYS_Munmap(pSrcVirAddr, nLen); |
| AX_SYS_Munmap(pDstVirAddr, nLen); |
|
|
| return 0; |
| } |
|
|
| return -1; |
| } |
|
|
| static inline AX_S32 inc_io_ref_cnt(const AX_VIDEO_FRAME_T &stFrame) { |
| if (stFrame.u32BlkId[0] > 0) { |
| AX_POOL_IncreaseRefCnt(stFrame.u32BlkId[0]); |
| } |
| if (stFrame.u32BlkId[1] > 0) { |
| AX_POOL_IncreaseRefCnt(stFrame.u32BlkId[1]); |
| } |
| if (stFrame.u32BlkId[2] > 0) { |
| AX_POOL_IncreaseRefCnt(stFrame.u32BlkId[2]); |
| } |
|
|
| return 0; |
| } |
|
|
| static inline AX_S32 dec_io_ref_cnt(const AX_VIDEO_FRAME_T &stFrame) { |
| if (stFrame.u32BlkId[0] > 0) { |
| AX_POOL_DecreaseRefCnt(stFrame.u32BlkId[0]); |
| } |
| if (stFrame.u32BlkId[1] > 0) { |
| AX_POOL_DecreaseRefCnt(stFrame.u32BlkId[1]); |
| } |
| if (stFrame.u32BlkId[2] > 0) { |
| AX_POOL_DecreaseRefCnt(stFrame.u32BlkId[2]); |
| } |
|
|
| return 0; |
| } |
|
|
| static inline bool read_file(const char* path, std::vector<char>& data) { |
| std::fstream fs(path, std::ios::in | std::ios::binary); |
|
|
| if (!fs.is_open()) { |
| return false; |
| } |
|
|
| fs.seekg(std::ios::end); |
| auto fs_end = fs.tellg(); |
| fs.seekg(std::ios::beg); |
| auto fs_beg = fs.tellg(); |
|
|
| auto file_size = static_cast<size_t>(fs_end - fs_beg); |
| auto vector_size = data.size(); |
|
|
| data.reserve(vector_size + file_size); |
| data.insert(data.end(), std::istreambuf_iterator<char>(fs), std::istreambuf_iterator<char>()); |
|
|
| fs.close(); |
|
|
| return true; |
| } |
|
|
| static inline bool read_file(const char* path, AX_VOID **pModelBufferVirAddr, |
| AX_U64 &u64ModelBufferPhyAddr, AX_U32 &nModelBufferSize) { |
| std::fstream fs(path, std::ios::in | std::ios::binary); |
|
|
| if (!fs.is_open()) { |
| return false; |
| } |
|
|
| fs.seekg(0, std::ios::end); |
| int file_size = fs.tellg(); |
| fs.seekg(0, std::ios::beg); |
|
|
| nModelBufferSize = (AX_U32)file_size; |
|
|
| AX_SYS_MemAlloc(&u64ModelBufferPhyAddr, pModelBufferVirAddr, nModelBufferSize, 0x100, (AX_S8 *)"SKEL-CV"); |
|
|
| if (!pModelBufferVirAddr || (u64ModelBufferPhyAddr == 0)) { |
| return false; |
| } |
|
|
| fs.read((AX_CHAR *)*pModelBufferVirAddr, nModelBufferSize); |
|
|
| fs.close(); |
|
|
| return true; |
| } |
|
|
| static inline void dequant(float** pptrOutput, const AX_ENGINE_IOMETA_T& ptrIoInfo, const AX_ENGINE_IO_BUFFER_T& ioBuf, float zp, float scale) |
| { |
| if (ptrIoInfo.eDataType == AX_ENGINE_DT_FLOAT32) |
| { |
| *pptrOutput = (float*)ioBuf.pVirAddr; |
| return; |
| } |
|
|
| *pptrOutput = (float*)malloc(ptrIoInfo.nSize * sizeof(float)); |
| uint8_t *pBuf = (uint8_t*)ioBuf.pVirAddr; |
| float* pOutput = *pptrOutput; |
| |
| for (int i = 0; i < ptrIoInfo.nSize; i++) |
| { |
| pOutput[i] = ((float)pBuf[i] - zp) * scale; |
| } |
| } |
| } |
|
|
|
|