diff --git a/include/tvm/build_module.h b/include/tvm/build_module.h index dc724cc49ceb72fae11948b4f4a32f7a1101ae1c..f8a20ddaefac76ca448a36630d26a5ef042cb5a8 100644 --- a/include/tvm/build_module.h +++ b/include/tvm/build_module.h @@ -154,6 +154,10 @@ EXPORT Target rasp(const std::vector<std::string>& options = EXPORT Target mali(const std::vector<std::string>& options = std::vector<std::string>()); +/*! \return A target for Intel GPU */ +EXPORT Target intel_gpu(const std::vector<std::string>& options = + std::vector<std::string>()); + /*! \return A target for stackvm */ EXPORT Target stackvm(const std::vector<std::string>& options = std::vector<std::string>()); diff --git a/python/tvm/target.py b/python/tvm/target.py index 869b3f10bce33c8ea054d7cdc7575d72bf4cd7ae..3ca72bafdc85d1c170bc3ae85010b2e1f69887d4 100644 --- a/python/tvm/target.py +++ b/python/tvm/target.py @@ -76,6 +76,7 @@ class Target(NodeBase): - :any:`tvm.target.cuda` create CUDA target - :any:`tvm.target.rocm` create ROCM target - :any:`tvm.target.mali` create Mali target + - :any:`tvm.target.intel_gpu` create Intel GPU target """ def __init__(self, handle): super(Target, self).__init__(handle) @@ -397,6 +398,19 @@ def mali(options=None): return _api_internal._TargetCreate("opencl", *opts) +def intel_gpu(options=None): + """Returns an Intel GPU target. + + Parameters + ---------- + options : str or list of str + Additional options + """ + opts = ["-device=intel_gpu"] + opts = _merge_opts(opts, options) + return _api_internal._TargetCreate("opencl", *opts) + + def opengl(options=None): """Returns a OpenGL target. diff --git a/src/codegen/build_module.cc b/src/codegen/build_module.cc index ceeb3c0b1110e339393cef4c2ff990e9121953d1..e11ee773f1e0063a3c02ac0e3de46acde4af1b7f 100644 --- a/src/codegen/build_module.cc +++ b/src/codegen/build_module.cc @@ -74,6 +74,9 @@ Target CreateTarget(const std::string& target_name, t->keys_array.push_back(ir::StringImm::make("rocm")); t->keys_array.push_back(ir::StringImm::make("gpu")); t->max_num_threads = 256; + if (device_name == "intel_gpu") { + t->thread_warp_size = 16; + } } else if (target_name == "metal" || target_name == "vulkan") { t->device_type = static_cast<int>(target_name == "metal" ? kDLMetal : kDLVulkan); t->keys_array.push_back(ir::StringImm::make(target_name)); @@ -182,8 +185,6 @@ Target Target::create(const std::string& target_str) { if (device_name == "rasp") { return target::rasp(options); - } else if (device_name == "mail") { - return target::mali(options); } else { return CreateTarget(target_name, options); } @@ -264,6 +265,11 @@ Target mali(const std::vector<std::string>& options) { })); } +Target intel_gpu(const std::vector<std::string>& options) { + return CreateTarget("opencl", MergeOptions(options, { + "-device=intel_gpu" + })); +} Target stackvm(const std::vector<std::string>& options) { return CreateTarget("stackvm", options); diff --git a/src/codegen/codegen_opencl.cc b/src/codegen/codegen_opencl.cc index 03f4acdd057c5a50876eb5c1e0654ab85e2e6bd3..dd8bf7081a25ebe8610a0b0232ee78c1ab451b55 100644 --- a/src/codegen/codegen_opencl.cc +++ b/src/codegen/codegen_opencl.cc @@ -157,7 +157,8 @@ void CodeGenOpenCL::PrintVecStore(const Variable* buffer, void CodeGenOpenCL::PrintStorageSync(const Call* op) { const std::string& sync = op->args[0].as<StringImm>()->value; if (sync == "warp") { - LOG(FATAL) << "warp sync not supported in opencl"; + this->PrintIndent(); + this->stream << "sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"; } else if (sync == "shared") { this->PrintIndent(); this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n";