diff --git a/src/codegen/llvm/codegen_amdgpu.cc b/src/codegen/llvm/codegen_amdgpu.cc index 4769efdb0405f870202516d7dcb803a13fc8dadd..146880b7dd89f3470188aed500cf81e8e3f2d124 100644 --- a/src/codegen/llvm/codegen_amdgpu.cc +++ b/src/codegen/llvm/codegen_amdgpu.cc @@ -136,7 +136,7 @@ runtime::Module BuildAMDGPU(Array<LoweredFunc> funcs, std::string target) { ) >= 4 && target.substr(0, 4) == "rocm"); llvm::TargetMachine* tm = \ - GetLLVMTargetMachine("-mtriple=amdgcn-amd-amdhsa-hcc -mcpu=gfx900" + \ + GetLLVMTargetMachine("-mtriple=amdgcn-amd-amdhsa-hcc -mcpu=gfx803" + \ target.substr(4, target.length() - 4)); std::unique_ptr<CodeGenAMDGPU> cg(new CodeGenAMDGPU()); diff --git a/src/runtime/rocm/rocm_module.cc b/src/runtime/rocm/rocm_module.cc index 2839e10945f8a2ac03d6bea0b69a358931c8a137..98ec6f63d74e93f72bacdea422e83af30aff6b36 100644 --- a/src/runtime/rocm/rocm_module.cc +++ b/src/runtime/rocm/rocm_module.cc @@ -152,7 +152,7 @@ class ROCMWrappedFunc { ThreadWorkLoad wl = thread_axis_cfg_.Extract(args); void* config[] = { - HIP_LAUNCH_PARAM_BUFFER_POINTER, &packed_args, + HIP_LAUNCH_PARAM_BUFFER_POINTER, packed_args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &packed_nbytes, HIP_LAUNCH_PARAM_END };