Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ python scripts/install.py [XMAKE_CONFIG_FLAGS]
| `--ascend-npu=[y\|n]` | 是否编译昇腾 NPU 接口实现 | n
| `--cambricon-mlu=[y\|n]` | 是否编译寒武纪 MLU 接口实现 | n
| `--metax-gpu=[y\|n]` | 是否编译沐曦 GPU 接口实现 | n
| `--use_mc=[y\|n]` | 是否沐曦 GPU 接口实现使用maca SDK | n
| `--moore-gpu=[y\|n]` | 是否编译摩尔线程 GPU 接口实现 | n
| `--iluvatar-gpu=[y\|n]` | 是否编译沐曦 GPU 接口实现 | n
| `--hygon-dcu=[y\|n]` | 是否编译海光 DCU 接口实现 | n
Expand Down
7 changes: 7 additions & 0 deletions src/infiniop/devices/metax/metax_common.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
#include "../../../utils.h"
#include "../pool.h"
#include "metax_handle.h"
#include "metax_ht2mc.h"

#ifdef ENABLE_METAX_MC_API
#include <mcblas/mcblas.h>
#include <mcdnn/mcdnn.h>
#else
#include <hcblas/hcblas.h>
#include <hcdnn/hcdnn.h>
#endif
#include <memory>

#define CHECK_MCBLAS(API) CHECK_INTERNAL(API, HCBLAS_STATUS_SUCCESS)
Expand Down
67 changes: 67 additions & 0 deletions src/infiniop/devices/metax/metax_ht2mc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#ifdef ENABLE_METAX_MC_API
#define hcblasHandle_t mcblasHandle_t
#define hcdnnHandle_t mcdnnHandle_t
#define hcStream_t mcStream_t
#define hcdnnDataType_t mcdnnDataType_t
#define hpcc_bfloat16 maca_bfloat16
#define hpcc_bfloat162 maca_bfloat162
#define __hpcc_bfloat16 __maca_bfloat16
#define __hpcc_bfloat16 __maca_bfloat16
#define hcError_t mcError_t
#define hcSuccess mcSuccess
#define hcDeviceProp_t mcDeviceProp_t
#define hcGetDeviceProperties mcGetDeviceProperties
#define HCBLAS_STATUS_SUCCESS MCBLAS_STATUS_SUCCESS
#define HCDNN_STATUS_SUCCESS MCDNN_STATUS_SUCCESS
#define hcblasCreate mcblasCreate
#define hcblasSetStream mcblasSetStream
#define hcGetDeviceCount mcGetDeviceCount
#define hcSetDevice mcSetDevice
#define hcDeviceSynchronize mcDeviceSynchronize
#define hcStreamCreate mcStreamCreate
#define hcStreamDestroy mcStreamDestroy
#define hcStreamSynchronize mcStreamSynchronize
#define hcdnnCreate mcdnnCreate
#define hcdnnSetStream mcdnnSetStream
#define hcEvent_t mcEvent_t
#define hcStreamWaitEvent mcStreamWaitEvent
#define HCDNN_DATA_HALF MCDNN_DATA_HALF
#define HCDNN_DATA_FLOAT MCDNN_DATA_FLOAT
#define HCDNN_DATA_DOUBLE MCDNN_DATA_DOUBLE
#define HCDNN_DATA_BFLOAT16 MCDNN_DATA_BFLOAT16
#define HCDNN_DATA_INT8 MCDNN_DATA_INT8
#define HCDNN_DATA_INT32 MCDNN_DATA_INT32
#define HCDNN_DATA_INT64 MCDNN_DATA_INT64
#define HCDNN_DATA_UINT8 MCDNN_DATA_UINT8
#define hcEventCreate mcEventCreate
#define hcEventRecord mcEventRecord
#define hcEventQuery mcEventQuery
#define hcEventSynchronize mcEventSynchronize
#define hcEventDestroy mcEventDestroy
#define hcMalloc mcMalloc
#define hpccDataType macaDataType
#define hcblasComputeType_t mcblasComputeType_t
#define hcMallocHost mcMallocHost
#define hcFree mcFree
#define hcFreeHost mcFreeHost
#define hcMemcpyKind mcMemcpyKind
#define hcMemcpyHostToDevice mcMemcpyHostToDevice
#define hcMemcpyDeviceToHost mcMemcpyDeviceToHost
#define hcMemcpyDeviceToDevice mcMemcpyDeviceToDevice
#define hcMemcpyHostToHost mcMemcpyHostToHost
#define hcMemcpyDefault mcMemcpyDefault
#define hcMemcpy mcMemcpy
#define hcMemcpyAsync mcMemcpyAsync
#define hcMallocAsync mcMallocAsync
#define hcFreeAsync mcFreeAsync
#define HPCC_R_16F MACA_R_16F
#define HPCC_R_16BF MACA_R_16BF
#define HPCC_R_32F MACA_R_32F
#define HCBLAS_COMPUTE_32F MCBLAS_COMPUTE_32F
#define HCBLAS_COMPUTE_32F_FAST_TF32 MCBLAS_COMPUTE_32F_FAST_TF32
#define HCBLAS_OP_N MCBLAS_OP_N
#define HCBLAS_OP_T MCBLAS_OP_T
#define HCBLAS_GEMM_DEFAULT_TENSOR_OP MCBLAS_GEMM_DEFAULT_TENSOR_OP
#define hcblasGemmStridedBatchedEx mcblasGemmStridedBatchedEx
#define hcLaunchKernel mcLaunchKernel
#endif
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#include "../../../devices/metax/metax_common.h"
#include "causal_softmax_metax.h"

#include <hccub/block/block_reduce.cuh>
#ifdef ENABLE_METAX_MC_API
#include <cub/block/block_reduce.cuh>
#else
#include <hccub/block/block_reduce.cuh>
#endif
#include "../../../devices/metax/metax_kernel_common.h"

#include "../../../reduce/cuda/reduce.cuh"
Expand Down
13 changes: 10 additions & 3 deletions src/infiniop/ops/random_sample/metax/random_sample_kernel.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
#include "../../../devices/metax/metax_kernel_common.h"
#include "infinicore.h"
#include <hccub/device/device_radix_sort.cuh>
#include <hccub/device/device_reduce.cuh>
#include <hccub/device/device_scan.cuh>

#ifdef ENABLE_METAX_MC_API
#include <cub/device/device_radix_sort.cuh>
#include <cub/device/device_reduce.cuh>
#include <cub/device/device_scan.cuh>
#else
#include <cub/device/device_radix_sort.cuh>
#include <cub/device/device_reduce.cuh>
#include <cub/device/device_scan.cuh>
#endif

namespace op::random_sample::metax {

Expand Down
1 change: 0 additions & 1 deletion src/infiniop/ops/random_sample/metax/random_sample_metax.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#ifndef __RANDOM_SAMPLE_METAX_H__
#define __RANDOM_SAMPLE_METAX_H__

#include "../random_sample.h"

DESCRIPTOR(metax)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "../../../devices/metax/metax_ht2mc.h"
#include "../../../devices/metax/metax_common.h"
#include "../../../devices/metax/metax_handle.h"
#include "../info.h"
Expand Down
3 changes: 0 additions & 3 deletions src/infiniop/ops/softplus/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/softplus_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
#include "metax/softplus_metax.h"
#endif

__C infiniStatus_t infiniopCreateSoftplusDescriptor(
infiniopHandle_t handle,
Expand Down
5 changes: 5 additions & 0 deletions src/infinirt/metax/infinirt_metax.cc
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
#include "infinirt_metax.h"
#include "../../utils.h"
#ifdef ENABLE_METAX_MC_API
#include <mcr/mc_runtime.h>
#include <mcr/mc_runtime_api.h>
#else
#include <hcr/hc_runtime.h>
#include <hcr/hc_runtime_api.h>
#endif

#define CHECK_MACART(RT_API) CHECK_INTERNAL(RT_API, hcSuccess)

Expand Down
1 change: 1 addition & 0 deletions src/infinirt/metax/infinirt_metax.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#ifndef __INFINIRT_MACA_H__
#define __INFINIRT_MACA_H__
#include "../../infiniop/devices/metax/metax_ht2mc.h"
#include "../infinirt_impl.h"

namespace infinirt::metax {
Expand Down
9 changes: 9 additions & 0 deletions xmake.lua
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,17 @@ option("metax-gpu")
set_description("Whether to compile implementations for MetaX GPU")
option_end()

option("use_mc")
set_default(false)
set_showmenu(true)
set_description("Use MC version")
option_end()

if has_config("metax-gpu") then
add_defines("ENABLE_METAX_API")
if has_config("use_mc") then
add_defines("ENABLE_METAX_MC_API")
end
includes("xmake/metax.lua")
end

Expand Down
25 changes: 19 additions & 6 deletions xmake/metax.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
local MACA_ROOT = os.getenv("MACA_PATH") or os.getenv("MACA_HOME") or os.getenv("MACA_ROOT")
add_includedirs(MACA_ROOT .. "/include")
add_linkdirs(MACA_ROOT .. "/lib")
add_links("hcdnn", "hcblas", "hcruntime")
if has_config("use_mc") then
add_links("mcdnn", "mcblas", "mcruntime")
else
add_links("hcdnn", "hcblas", "hcruntime")
end

rule("maca")
set_extensions(".maca")
Expand All @@ -14,11 +18,16 @@ rule("maca")
on_build_file(function (target, sourcefile)
local objectfile = target:objectfile(sourcefile)
os.mkdir(path.directory(objectfile))
local htcc = path.join(MACA_ROOT, "htgpu_llvm/bin/htcc")
local args
local htcc
if has_config("use_mc") then
htcc = path.join(MACA_ROOT, "mxgpu_llvm/bin/mxcc")
args = { "-x", "maca", "-c", sourcefile, "-o", objectfile, "-I" .. MACA_ROOT .. "/include", "-O3", "-fPIC", "-Werror", "-std=c++17"}
else
htcc = path.join(MACA_ROOT, "htgpu_llvm/bin/htcc")
args = { "-x", "hpcc", "-c", sourcefile, "-o", objectfile, "-I" .. MACA_ROOT .. "/include", "-O3", "-fPIC", "-Werror", "-std=c++17"}
end
local includedirs = table.concat(target:get("includedirs"), " ")

local args = { "-x", "hpcc", "-c", sourcefile, "-o", objectfile, "-I" .. MACA_ROOT .. "/include", "-O3", "-fPIC", "-Werror", "-std=c++17"}

for _, includedir in ipairs(target:get("includedirs")) do
table.insert(args, "-I" .. includedir)
end
Expand Down Expand Up @@ -66,7 +75,11 @@ target("infiniccl-metax")
add_cxflags("-fPIC")
end
if has_config("ccl") then
add_links("libhccl.so")
if has_config("use_mc") then
add_links("libmccl.so")
else
add_links("libhccl.so")
end
add_files("../src/infiniccl/metax/*.cc")
end
set_languages("cxx17")
Expand Down