diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h index 6faf4d21de942bdded54e0aab100cd22a7ade0e6..33c46b40d670c46336e763ddf619a1e7a9e33452 100644 --- a/include/tvm/runtime/c_runtime_api.h +++ b/include/tvm/runtime/c_runtime_api.h @@ -373,34 +373,6 @@ TVM_DLL int TVMFuncListGlobalNames(int *out_size, const char*** out_array); // Array related apis for quick proptying -/*! - * \brief Initialize certain type of devices, this may - * not be necessary for all device types. But is needed for OpenCL. - * - * \param dev_mask The device mask of device type to be initialized - * \param option_keys Additional option keys to pass. - * \param option_vals Additional option values to pass - * \param num_options Number of options to be passed into it. - * \param out_code 1: success, 0: already initialized - * \return 0 when success, -1 when failure happens - */ -TVM_DLL int TVMDeviceInit(int dev_mask, - const char** option_keys, - const char** option_vals, - int num_options, - int *out_code); - - -/*! - * \brief Whether the specified context is enabled. - * - * \param ctx The context to be checked. - * \param out_enabled whether the ctx is enabled. - * \return Whether the function is successful. - */ -TVM_DLL int TVMContextEnabled(TVMContext ctx, - int* out_enabled); - /*! * \brief Allocate a nd-array's memory, * including space of shape, of given spec. diff --git a/include/tvm/runtime/packed_func.h b/include/tvm/runtime/packed_func.h index 558dd9b9541ea4c0fbc1f20c3380b6ce63aa30e2..592b418e591855e77964fde7d9c0206d69ec00ba 100644 --- a/include/tvm/runtime/packed_func.h +++ b/include/tvm/runtime/packed_func.h @@ -535,8 +535,9 @@ inline const char* TypeCode2Str(int type_code) { } inline std::ostream& operator<<(std::ostream& os, TVMType t) { // NOLINT(*) - os << TypeCode2Str(t.code) - << static_cast<int>(t.bits); + os << TypeCode2Str(t.code); + if (t.code == kHandle) return os; + os << static_cast<int>(t.bits); if (t.lanes != 1) { os << 'x' << static_cast<int>(t.lanes); } @@ -559,7 +560,7 @@ inline TVMType String2TVMType(std::string s) { t.code = kUInt; scan = s.c_str() + 4; } else if (s.substr(0, 5) == "float") { t.code = kFloat; scan = s.c_str() + 5; - } else if (s == "handle") { + } else if (s.substr(0, 6) == "handle") { t.code = kHandle; t.bits = 64; // handle uses 64 bit by default. scan = s.c_str() + 6; diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py index bd4ef614f5859c28ad78da0474151a85d634344b..4e294bdafdf296f379558b250c41a3005f861006 100644 --- a/python/tvm/__init__.py +++ b/python/tvm/__init__.py @@ -15,7 +15,7 @@ from . import schedule from . import module from . import ndarray as nd -from .ndarray import cpu, gpu, opencl, init_opencl, cl +from .ndarray import cpu, gpu, opencl, cl from ._base import TVMError from .api import * diff --git a/python/tvm/_ctypes/_ndarray.py b/python/tvm/_ctypes/_ndarray.py index b6fc4d4dfcacedbead5f87805955661e8b349966..91f800cdd5ba8d7966ce7c8f80b22f11ea87c3f1 100644 --- a/python/tvm/_ctypes/_ndarray.py +++ b/python/tvm/_ctypes/_ndarray.py @@ -7,10 +7,9 @@ import ctypes import numpy as np from .._base import _LIB, check_call -from .._base import c_array, c_str +from .._base import c_array from ._types import TVMType, tvm_index_t - class TVMContext(ctypes.Structure): """TVM context strucure.""" _fields_ = [("dev_mask", ctypes.c_int), @@ -29,12 +28,6 @@ class TVMContext(ctypes.Structure): return "%s(%d)" % ( TVMContext.MASK2STR[self.dev_mask], self.dev_id) - @property - def enabled(self): - ret = ctypes.c_int() - check_call(_LIB.TVMContextEnabled(self, ctypes.byref(ret))) - return ret.value != 0 - class TVMArray(ctypes.Structure): """TVMValue in C API""" @@ -141,30 +134,6 @@ def sync(ctx): check_call(_LIB.TVMSynchronize(ctx, None)) -def init_opencl(**kwargs): - """Initialize the opencl with the options. - - Parameters - ---------- - kwargs : dict - The options - """ - keys = [] - vals = [] - for k, v in kwargs.items(): - keys.append(c_str(k)) - vals.append(c_str(v)) - dev_mask = ctypes.c_int(4) - out_code = ctypes.c_int() - check_call(_LIB.TVMDeviceInit( - dev_mask, - c_array(ctypes.c_char_p, keys), - c_array(ctypes.c_char_p, vals), - ctypes.c_int(len(keys)), - ctypes.byref(out_code))) - return out_code.value != 0 - - class NDArrayBase(object): """A simple Device/CPU Array object in runtime.""" __slots__ = ["handle"] diff --git a/python/tvm/addon/testing.py b/python/tvm/addon/testing.py new file mode 100644 index 0000000000000000000000000000000000000000..642dadd37a6ade1a79a99cc7b629e48ee1c3d475 --- /dev/null +++ b/python/tvm/addon/testing.py @@ -0,0 +1,34 @@ +"""Utilities to make tempdir""" +from __future__ import absolute_import as _abs +import os +import tempfile +import shutil + +class TempDirectory(object): + """Helper object to manage temp directory during testing""" + def __init__(self): + self.temp_dir = tempfile.mkdtemp() + + def __del__(self): + shutil.rmtree(self.temp_dir) + + def relpath(self, name): + """Relative path in temp dir + + Parameters + ---------- + name : str + The name of the file. + """ + return os.path.join(self.temp_dir, name) + + +def tempdir(): + """Return a new temp dir which deletes the contents when exit + + Returns + ------- + temp : TempDirectory + The temp directory object + """ + return TempDirectory() diff --git a/python/tvm/libinfo.py b/python/tvm/libinfo.py index 43679dd73194e72995808e7f0fedd0b24af3e7ee..967f3e9c77ffda8c53c57dc7cc3328b63211bd9e 100644 --- a/python/tvm/libinfo.py +++ b/python/tvm/libinfo.py @@ -1,9 +1,11 @@ # coding: utf-8 """Information about nnvm.""" from __future__ import absolute_import +import sys import os import platform + def find_lib_path(): """Find dynamic library files. @@ -12,6 +14,7 @@ def find_lib_path(): lib_path : list(string) List of all found path to the libraries """ + use_runtime = os.environ.get("TVM_USE_RUNTIME_LIB", False) curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) api_path = os.path.join(curr_path, '../../lib/') cmake_build_path = os.path.join(curr_path, '../../build/Release/') @@ -26,15 +29,24 @@ def find_lib_path(): dll_path.append(os.path.join(curr_path, '../../windows', vs_configuration)) elif os.name == "posix" and os.environ.get('LD_LIBRARY_PATH', None): dll_path.extend([p.strip() for p in os.environ['LD_LIBRARY_PATH'].split(":")]) + if os.name == 'nt': - dll_path = [os.path.join(p, 'libtvm.dll') for p in dll_path] + lib_dll_path = [os.path.join(p, 'libtvm.dll') for p in dll_path] + runtime_dll_path = [os.path.join(p, 'libtvm_runtime.dll') for p in dll_path] else: - dll_path = [os.path.join(p, 'libtvm.so') for p in dll_path] - lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)] - if len(lib_path) == 0: + lib_dll_path = [os.path.join(p, 'libtvm.so') for p in dll_path] + runtime_dll_path = [os.path.join(p, 'libtvm_runtime.so') for p in dll_path] + + dll_path = runtime_dll_path if use_runtime else lib_dll_path + lib_found = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)] + + if len(lib_found) == 0: raise RuntimeError('Cannot find the files.\n' + 'List of candidates:\n' + str('\n'.join(dll_path))) - return lib_path + if use_runtime: + sys.stderr.write("Loading runtime library... this is execution only\n") + sys.stderr.flush() + return lib_found # current version diff --git a/python/tvm/ndarray.py b/python/tvm/ndarray.py index 2f2492eeafbcd5ee48d96deda577adb5c85b3c3c..d324a9ffddca90f9bd03b85a07cfd4c5c4dd7f24 100644 --- a/python/tvm/ndarray.py +++ b/python/tvm/ndarray.py @@ -9,7 +9,6 @@ import numpy as _np from ._ctypes._ndarray import TVMContext, TVMType, NDArrayBase from ._ctypes._ndarray import cpu, gpu, opencl, empty, sync from ._ctypes._ndarray import _init_ndarray_module -from ._ctypes._ndarray import init_opencl from ._ctypes._function import Function cl = opencl diff --git a/src/api/api_codegen.cc b/src/api/api_codegen.cc index 4f267038a7382c25e20fd9b70e5203312623e77b..9616dccb306a80406aa4ab5ed27daad136ea318e 100644 --- a/src/api/api_codegen.cc +++ b/src/api/api_codegen.cc @@ -21,7 +21,7 @@ TVM_REGISTER_API(_codegen_build) } }); -TVM_REGISTER_API(_codegen_target_enabled) +TVM_REGISTER_API(_codegen_enabled) .set_body([](TVMArgs args, TVMRetValue *ret) { *ret = TargetEnabled(args[0]); }); diff --git a/src/codegen/build_cuda.cc b/src/codegen/build_cuda.cc index 17dd4d0b183406399733714ebc63099602a116fb..a195cea8fa8dae9855671140c001c1176ae673b2 100644 --- a/src/codegen/build_cuda.cc +++ b/src/codegen/build_cuda.cc @@ -61,10 +61,13 @@ runtime::Module BuildCUDA(Array<LoweredFunc> funcs) { if (const auto* f = Registry::Get("tvm_callback_cuda_postproc")) { code = (*f)(code).operator std::string(); } - + std::string fmt = "ptx"; std::string ptx; if (const auto* f = Registry::Get("tvm_callback_cuda_compile")) { ptx = (*f)(code).operator std::string(); + // Dirty matching to check PTX vs cubin. + // TODO(tqchen) more reliable checks + if (ptx[0] != '/') fmt = "cubin"; } else { ptx = NVRTCCompile(code); } @@ -80,7 +83,7 @@ runtime::Module BuildCUDA(Array<LoweredFunc> funcs) { } fmap[f->name] = info; } - return CUDAModuleCreate(ptx, "ptx", fmap, code); + return CUDAModuleCreate(ptx, fmt, fmap, code); } TVM_REGISTER_API(_codegen_build_cuda) diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc index 925d1243da7af0cb0a19760e3cb127f71d7c1401..3ffe02ed518eb387d4e3508a03dc68df1a78f074 100644 --- a/src/runtime/c_runtime_api.cc +++ b/src/runtime/c_runtime_api.cc @@ -200,38 +200,6 @@ int TVMFuncCreateFromCFunc(TVMPackedCFunc func, API_END(); } -int TVMDeviceInit(int dev_mask, - const char** option_keys, - const char** option_vals, - int num_options, - int* out_code) { - API_BEGIN(); - *out_code = 1; - switch (dev_mask) { - case kOpenCL: { - *out_code = DeviceInit<kOpenCL>(option_keys, option_vals, num_options); - break; - } - default: break; - } - API_END(); -} - -int TVMContextEnabled(TVMContext ctx, - int* out_enabled) { - API_BEGIN(); - if (ctx.dev_mask == kGPU && TVM_CUDA_RUNTIME == 0) { - *out_enabled = 0; - } else if (ctx.dev_mask == kOpenCL && TVM_OPENCL_RUNTIME == 0) { - *out_enabled = 0; - } else { - TVM_DEVICE_SWITCH(ctx, { - *out_enabled = CheckEnabled<xpu>(ctx); - }); - } - API_END(); -} - int TVMArrayAlloc(const tvm_index_t* shape, tvm_index_t ndim, TVMType dtype, diff --git a/src/runtime/cuda/cuda_module.cc b/src/runtime/cuda/cuda_module.cc index 8b3a79c5a75d7f6fbb10edfcd145504fbfcbce0f..12bc1ca7fd40a5e5a7edbab79122d2112e00ab50 100644 --- a/src/runtime/cuda/cuda_module.cc +++ b/src/runtime/cuda/cuda_module.cc @@ -5,6 +5,7 @@ #include "./cuda_module.h" #if TVM_CUDA_RUNTIME + #include <tvm/runtime/registry.h> #include <cuda.h> #include <cuda_runtime.h> @@ -60,7 +61,12 @@ class CUDAModuleNode : public runtime::ModuleNode { void SaveToFile(const std::string& file_name, const std::string& format) final { - LOG(FATAL) << "Not implemented"; + std::string fmt = GetFileFormat(file_name, format); + CHECK_EQ(fmt, fmt_) + << "Can only save to format=" << fmt_; + std::string meta_file = GetMetaFilePath(file_name); + SaveMetaDataToFile(meta_file, fmap_); + SaveBinaryToFile(file_name, data_); } std::string GetSource(const std::string& format) final { @@ -212,9 +218,13 @@ Module CUDAModuleCreate( // Load module from module. Module CUDAModuleLoad(const std::string& file_name, const std::string& format) { + std::string data; + std::unordered_map<std::string, FunctionInfo> fmap; std::string fmt = GetFileFormat(file_name, format); - std::string data = LoadBinaryFile(file_name); - return CUDAModuleCreate(data, fmt, {{}}, std::string()); + std::string meta_file = GetMetaFilePath(file_name); + LoadBinaryFromFile(file_name, &data); + LoadMetaDataFromFile(meta_file, &fmap); + return CUDAModuleCreate(data, fmt, fmap, std::string()); } TVM_REGISTER_GLOBAL(_module_loadfile_cubin) diff --git a/src/runtime/device_api.h b/src/runtime/device_api.h index f551ca9ee8c840041de92a9ef55c37d6442758a0..82e998fcff3756390466797226916e9441d1c847 100644 --- a/src/runtime/device_api.h +++ b/src/runtime/device_api.h @@ -11,31 +11,6 @@ namespace tvm { namespace runtime { -/*! - * \brief Initialize the device. - * \param option_keys Additional option keys to pass. - * \param option_vals Additional option values to pass - * \param num_options Number of options to be passed into it. - * \return 0 if success, 1: if already initialized - * \tparam xpu The device mask. - */ -template<TVMDeviceMask xpu> -inline bool DeviceInit(const char** option_keys, - const char** option_vals, - int num_options) { - return true; -} - -/*! - * \brief Whether ctx is enabled. - * \param ctx The device context to perform operation. - * \tparam xpu The device mask. - */ -template<TVMDeviceMask xpu> -inline bool CheckEnabled(TVMContext ctx) { - return true; -} - /*! * \brief Allocate a data space on device. * \param ctx The device context to perform operation. diff --git a/src/runtime/file_util.cc b/src/runtime/file_util.cc new file mode 100644 index 0000000000000000000000000000000000000000..d69b39be1b1022ae453bf74234253663467d51e4 --- /dev/null +++ b/src/runtime/file_util.cc @@ -0,0 +1,112 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file file_util.cc + */ +#include <dmlc/json.h> +#include <dmlc/logging.h> +#include <tvm/runtime/packed_func.h> +#include <fstream> +#include "./file_util.h" + +namespace tvm { +namespace runtime { + +void FunctionInfo::Save(dmlc::JSONWriter* writer) const { + std::vector<std::string> sarg_types(arg_types.size()); + for (size_t i = 0; i < arg_types.size(); ++i) { + sarg_types[i] = TVMType2String(arg_types[i]); + } + writer->BeginObject(); + writer->WriteObjectKeyValue("name", name); + writer->WriteObjectKeyValue("arg_types", sarg_types); + writer->WriteObjectKeyValue("thread_axis_tags", thread_axis_tags); + writer->EndObject(); +} + +void FunctionInfo::Load(dmlc::JSONReader* reader) { + dmlc::JSONObjectReadHelper helper; + std::vector<std::string> sarg_types; + helper.DeclareField("name", &name); + helper.DeclareField("arg_types", &sarg_types); + helper.DeclareField("thread_axis_tags", &thread_axis_tags); + helper.ReadAllFields(reader); + arg_types.resize(sarg_types.size()); + for (size_t i = 0; i < arg_types.size(); ++i) { + arg_types[i] = String2TVMType(sarg_types[i]); + } +} + +std::string GetFileFormat(const std::string& file_name, + const std::string& format) { + std::string fmt = format; + if (fmt.length() == 0) { + size_t pos = file_name.find_last_of("."); + if (pos != std::string::npos) { + return file_name.substr(pos + 1, file_name.length() - pos - 1); + } else { + return ""; + } + } else { + return format; + } +} + +std::string GetMetaFilePath(const std::string& file_name) { + size_t pos = file_name.find_last_of("."); + if (pos != std::string::npos) { + return file_name.substr(0, pos) + ".tvm_meta.json"; + } else { + return file_name + ".tvm_meta.json"; + } +} + +void LoadBinaryFromFile(const std::string& file_name, + std::string* data) { + std::ifstream fs(file_name, std::ios::in | std::ios::binary); + CHECK(!fs.fail()) << "Cannot open " << file_name; + // get its size: + fs.seekg(0, std::ios::end); + size_t size = fs.tellg(); + fs.seekg(0, std::ios::beg); + data->resize(size); + fs.read(&(*data)[0], size); +} + +void SaveBinaryToFile( + const std::string& file_name, + const std::string& data) { + std::ofstream fs(file_name, std::ios::out | std::ios::binary); + CHECK(!fs.fail()) << "Cannot open " << file_name; + fs.write(&data[0], data.length()); +} + +void SaveMetaDataToFile( + const std::string& file_name, + const std::unordered_map<std::string, FunctionInfo>& fmap) { + std::string version = "0.1.0"; + std::ofstream fs(file_name.c_str()); + CHECK(!fs.fail()) << "Cannot open file " << file_name; + dmlc::JSONWriter writer(&fs); + writer.BeginObject(); + writer.WriteObjectKeyValue("tvm_version", version); + writer.WriteObjectKeyValue("func_info", fmap); + writer.EndObject(); + fs.close(); +} + +void LoadMetaDataFromFile( + const std::string& file_name, + std::unordered_map<std::string, FunctionInfo>* fmap) { + std::ifstream fs(file_name.c_str()); + CHECK(!fs.fail()) << "Cannot open file " << file_name; + std::string version; + dmlc::JSONReader reader(&fs); + dmlc::JSONObjectReadHelper helper; + helper.DeclareField("tvm_version", &version); + helper.DeclareField("func_info", fmap); + helper.ReadAllFields(&reader); + fs.close(); +} + +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/file_util.h b/src/runtime/file_util.h index 15759130682d72a989171ed717854baeb6dc1b1b..6f7d638c349732912ecd40f2fc6dd758e42034b4 100644 --- a/src/runtime/file_util.h +++ b/src/runtime/file_util.h @@ -6,9 +6,8 @@ #ifndef TVM_RUNTIME_FILE_UTIL_H_ #define TVM_RUNTIME_FILE_UTIL_H_ -#include <dmlc/logging.h> -#include <fstream> #include <string> +#include "./meta_data.h" namespace tvm { namespace runtime { @@ -17,39 +16,48 @@ namespace runtime { * \param file_name The name of the file. * \param format The format of the file. */ -inline std::string GetFileFormat(const std::string& file_name, - const std::string& format) { - std::string fmt = format; - if (fmt.length() == 0) { - size_t pos = file_name.find_last_of("."); - if (pos != std::string::npos) { - return file_name.substr(pos + 1, file_name.length() - pos - 1); - } else { - return ""; - } - } else { - return format; - } -} +std::string GetFileFormat(const std::string& file_name, + const std::string& format); + +/*! + * \brief Get meta file path given file name and format. + * \param file_name The name of the file. + */ +std::string GetMetaFilePath(const std::string& file_name); + +/*! + * \brief Load binary file into a in-memory buffer. + * \param file_name The name of the file. + * \param data The data to be loaded. + */ +void LoadBinaryFromFile(const std::string& file_name, + std::string* data); /*! * \brief Load binary file into a in-memory buffer. * \param file_name The name of the file. + * \param The binary + */ +void SaveBinaryToFile(const std::string& file_name, + const std::string& data); + +/*! + * \brief Save meta data to file. + * \param file_name The name of the file. + * \param fmap The function info map. */ -inline std::string LoadBinaryFile(const std::string& file_name) { - std::ifstream fs(file_name, std::ios::in | std::ios::binary); - CHECK(!fs.fail()) - << "Cannot open " << file_name; - // get its size: - fs.seekg(0, std::ios::end); - size_t size = fs.tellg(); - fs.seekg(0, std::ios::beg); - std::string data; - data.resize(size); - fs.read(&data[0], size); - return data; -} +void SaveMetaDataToFile( + const std::string& file_name, + const std::unordered_map<std::string, FunctionInfo>& fmap); +/*! + * \brief Load meta data to file. + * \param file_name The name of the file. + * \param fmap The function info map. + */ +void LoadMetaDataFromFile( + const std::string& file_name, + std::unordered_map<std::string, FunctionInfo>* fmap); } // namespace runtime } // namespace tvm #endif // TVM_RUNTIME_FILE_UTIL_H_ diff --git a/src/runtime/meta_data.h b/src/runtime/meta_data.h index 3e8a269351d0a06fc2d61c6aa65a3f8a88ebb506..69e6f6a50529cc07c780d8d92e76d23cfc6201ec 100644 --- a/src/runtime/meta_data.h +++ b/src/runtime/meta_data.h @@ -27,30 +27,8 @@ struct FunctionInfo { std::vector<TVMType> arg_types; std::vector<std::string> thread_axis_tags; - void Save(dmlc::JSONWriter *writer) const { - std::vector<std::string> sarg_types(arg_types.size()); - for (size_t i = 0; i < arg_types.size(); ++i) { - sarg_types[i] = TVMType2String(arg_types[i]); - } - writer->BeginObject(); - writer->WriteObjectKeyValue("name", name); - writer->WriteObjectKeyValue("arg_types", sarg_types); - writer->WriteObjectKeyValue("thread_axis_tags", thread_axis_tags); - writer->EndObject(); - } - - void Load(dmlc::JSONReader *reader) { - dmlc::JSONObjectReadHelper helper; - std::vector<std::string> sarg_types; - helper.DeclareField("name", &name); - helper.DeclareField("arg_types", &sarg_types); - helper.DeclareField("thread_axis_tags", &thread_axis_tags); - helper.ReadAllFields(reader); - arg_types.resize(sarg_types.size()); - for (size_t i = 0; i < arg_types.size(); ++i) { - arg_types[i] = String2TVMType(sarg_types[i]); - } - } + void Save(dmlc::JSONWriter *writer) const; + void Load(dmlc::JSONReader *reader); }; } // namespace runtime diff --git a/src/runtime/module.cc b/src/runtime/module.cc index 4b4ffea94d11115292ecbdeeef3cc9a6f8b2f2e8..0b630c1fa870f562d5e35ef6a814e7c4ea7a90cc 100644 --- a/src/runtime/module.cc +++ b/src/runtime/module.cc @@ -83,6 +83,25 @@ const PackedFunc* ModuleNode::GetFuncFromEnv(const std::string& name) { } } +bool RuntimeEnabled(const std::string& target) { + std::string load_f_name; + if (target == "cpu") { + return true; + } else if (target == "cuda" || target == "gpu") { + load_f_name = "_module_loadfile_ptx"; + } else if (target == "cl" || target == "opencl") { + load_f_name = "_module_loadfile_cl"; + } else { + LOG(FATAL) << "Unknown optional runtime " << target; + } + return runtime::Registry::Get(load_f_name) != nullptr; +} + +TVM_REGISTER_GLOBAL(_module_enabled) +.set_body([](TVMArgs args, TVMRetValue *ret) { + *ret = RuntimeEnabled(args[0]); + }); + TVM_REGISTER_GLOBAL(_module__GetSource) .set_body([](TVMArgs args, TVMRetValue *ret) { *ret = args[0].operator Module()->GetSource(args[1]); diff --git a/src/runtime/opencl/device_api_opencl.h b/src/runtime/opencl/device_api_opencl.h index 3d2c2d1b458d654fd4288bcc210418cac3515f60..792682bbe7fe6c570a533c462255f33dc491a065 100644 --- a/src/runtime/opencl/device_api_opencl.h +++ b/src/runtime/opencl/device_api_opencl.h @@ -15,121 +15,6 @@ namespace tvm { namespace runtime { -namespace cl { - -inline std::string GetPlatformInfo( - cl_platform_id pid, cl_platform_info param_name) { - size_t ret_size; - OPENCL_CALL(clGetPlatformInfo(pid, param_name, 0, nullptr, &ret_size)); - std::string ret; - ret.resize(ret_size); - OPENCL_CALL(clGetPlatformInfo(pid, param_name, ret_size, &ret[0], nullptr)); - return ret; -} - -inline std::string GetDeviceInfo( - cl_device_id pid, cl_device_info param_name) { - size_t ret_size; - OPENCL_CALL(clGetDeviceInfo(pid, param_name, 0, nullptr, &ret_size)); - std::string ret; - ret.resize(ret_size); - OPENCL_CALL(clGetDeviceInfo(pid, param_name, ret_size, &ret[0], nullptr)); - return ret; -} - -inline std::vector<cl_platform_id> GetPlatformIDs() { - cl_uint ret_size; - OPENCL_CALL(clGetPlatformIDs(0, nullptr, &ret_size)); - std::vector<cl_platform_id> ret; - ret.resize(ret_size); - OPENCL_CALL(clGetPlatformIDs(ret_size, &ret[0], nullptr)); - return ret; -} - -inline std::vector<cl_device_id> GetDeviceIDs( - cl_platform_id pid, std::string device_type) { - cl_device_type dtype = CL_DEVICE_TYPE_ALL; - if (device_type == "cpu") dtype = CL_DEVICE_TYPE_CPU; - if (device_type == "gpu") dtype = CL_DEVICE_TYPE_CPU; - if (device_type == "accelerator") dtype = CL_DEVICE_TYPE_ACCELERATOR; - cl_uint ret_size; - OPENCL_CALL(clGetDeviceIDs(pid, dtype, 0, nullptr, &ret_size)); - std::vector<cl_device_id> ret; - ret.resize(ret_size); - OPENCL_CALL(clGetDeviceIDs(pid, dtype, ret_size, &ret[0], nullptr)); - return ret; -} - -inline bool MatchPlatformInfo( - cl_platform_id pid, - cl_platform_info param_name, - std::string value) { - if (value.length() == 0) return true; - std::string param_value = GetPlatformInfo(pid, param_name); - return param_value.find(value) != std::string::npos; -} - -} // namespace cl - -template<> -inline bool DeviceInit<kOpenCL>(const char** option_keys, - const char** option_vals, - int num_options) { - cl::OpenCLWorkspace* w = cl::OpenCLWorkspace::Global(); - std::lock_guard<std::mutex>(w->mu); - if (w->initialized()) return false; - // matching conditions - std::string platform_name, device_type; - for (int i = 0; i < num_options; ++i) { - std::string key = option_keys[i]; - std::string val = option_vals[i]; - if (key == "platform_name") { - platform_name = val; - } else if (key == "device_type") { - device_type = val; - } else { - LOG(FATAL) << "unknown DeviceInit option " << key; - } - } - // matched platforms - std::vector<cl_platform_id> platform_matched; - for (cl_platform_id pid : cl::GetPlatformIDs()) { - bool matched = true; - if (!cl::MatchPlatformInfo(pid, CL_PLATFORM_NAME, platform_name)) matched = false; - if (matched) platform_matched.push_back(pid); - } - if (platform_matched.size() == 0) { - LOG(FATAL) << "No OpenCL platform matched given existing options ..."; - } - if (platform_matched.size() > 1) { - LOG(WARNING) << "Multiple OpenCL platforms matched, use the first one ... "; - } - w->platform_id = platform_matched[0]; - - LOG(INFO) << "Initialize OpenCL platform \'" - << cl::GetPlatformInfo(w->platform_id, CL_PLATFORM_NAME) << '\''; - std::vector<cl_device_id> devices_matched = - cl::GetDeviceIDs(w->platform_id, device_type); - CHECK_GT(devices_matched.size(), 0U) - << "No OpenCL device any device matched given the options"; - w->devices = devices_matched; - cl_int err_code; - w->context = clCreateContext( - nullptr, w->devices.size(), &(w->devices[0]), - nullptr, nullptr, &err_code); - OPENCL_CHECK_ERROR(err_code); - CHECK_EQ(w->queues.size(), 0U); - for (size_t i = 0; i < w->devices.size(); ++i) { - cl_device_id did = w->devices[i]; - w->queues.push_back( - clCreateCommandQueue(w->context, did, 0, &err_code)); - OPENCL_CHECK_ERROR(err_code); - LOG(INFO) << "opencl(" << i - << ")=\'" << cl::GetDeviceInfo(did, CL_DEVICE_NAME) - << "\' cl_device_id=" << did; - } - return true; -} template<> inline void* AllocDataSpace<kOpenCL>(TVMContext ctx, size_t size, size_t alignment) { diff --git a/src/runtime/opencl/opencl_module.cc b/src/runtime/opencl/opencl_module.cc index 6c0b4619bd1de4e3abb0b5c0ee76d77020823c88..ad3f2620eb0c5d577e74c081093a6022f0711373 100644 --- a/src/runtime/opencl/opencl_module.cc +++ b/src/runtime/opencl/opencl_module.cc @@ -7,11 +7,14 @@ #if TVM_OPENCL_RUNTIME +#include <tvm/runtime/registry.h> #include <vector> #include <string> #include <unordered_map> #include "../void_addr_args.h" #include "../thread_storage_scope.h" +#include "../meta_data.h" +#include "../file_util.h" namespace tvm { namespace runtime { @@ -67,7 +70,12 @@ class OpenCLModuleNode : public ModuleNode { void SaveToFile(const std::string& file_name, const std::string& format) final { - LOG(FATAL) << "Not implemented"; + std::string fmt = GetFileFormat(file_name, format); + CHECK_EQ(fmt, fmt_) + << "Can only save to format=" << fmt_; + std::string meta_file = GetMetaFilePath(file_name); + SaveMetaDataToFile(meta_file, fmap_); + SaveBinaryToFile(file_name, data_); } std::string GetSource(const std::string& format) final { @@ -294,6 +302,27 @@ Module OpenCLModuleCreate( return Module(n); } +// Load module from module. +Module OpenCLModuleLoad(const std::string& file_name, + const std::string& format) { + std::string data; + std::unordered_map<std::string, FunctionInfo> fmap; + std::string fmt = GetFileFormat(file_name, format); + std::string meta_file = GetMetaFilePath(file_name); + LoadBinaryFromFile(file_name, &data); + LoadMetaDataFromFile(meta_file, &fmap); + return OpenCLModuleCreate(data, fmt, fmap); +} + +TVM_REGISTER_GLOBAL(_module_loadfile_cl) +.set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = OpenCLModuleLoad(args[0], args[1]); + }); + +TVM_REGISTER_GLOBAL(_module_loadfile_clbin) +.set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = OpenCLModuleLoad(args[0], args[1]); + }); } // namespace runtime } // namespace tvm diff --git a/src/runtime/opencl/opencl_workspace.cc b/src/runtime/opencl/opencl_workspace.cc index 1f79f4280bb6b7884d59ea7804eaf3e039dd76ed..1a89efbbccd6447eaf2e8b0ed9ec57f58ce37ca6 100644 --- a/src/runtime/opencl/opencl_workspace.cc +++ b/src/runtime/opencl/opencl_workspace.cc @@ -6,6 +6,7 @@ #if TVM_OPENCL_RUNTIME +#include <tvm/runtime/registry.h> #include <dmlc/thread_local.h> namespace tvm { @@ -23,6 +24,123 @@ OpenCLThreadEntry* OpenCLThreadEntry::ThreadLocal() { return OpenCLThreadStore::Get(); } +std::string GetPlatformInfo( + cl_platform_id pid, cl_platform_info param_name) { + size_t ret_size; + OPENCL_CALL(clGetPlatformInfo(pid, param_name, 0, nullptr, &ret_size)); + std::string ret; + ret.resize(ret_size); + OPENCL_CALL(clGetPlatformInfo(pid, param_name, ret_size, &ret[0], nullptr)); + return ret; +} + +std::string GetDeviceInfo( + cl_device_id pid, cl_device_info param_name) { + size_t ret_size; + OPENCL_CALL(clGetDeviceInfo(pid, param_name, 0, nullptr, &ret_size)); + std::string ret; + ret.resize(ret_size); + OPENCL_CALL(clGetDeviceInfo(pid, param_name, ret_size, &ret[0], nullptr)); + return ret; +} + +std::vector<cl_platform_id> GetPlatformIDs() { + cl_uint ret_size; + OPENCL_CALL(clGetPlatformIDs(0, nullptr, &ret_size)); + std::vector<cl_platform_id> ret; + ret.resize(ret_size); + OPENCL_CALL(clGetPlatformIDs(ret_size, &ret[0], nullptr)); + return ret; +} + +std::vector<cl_device_id> GetDeviceIDs( + cl_platform_id pid, std::string device_type) { + cl_device_type dtype = CL_DEVICE_TYPE_ALL; + if (device_type == "cpu") dtype = CL_DEVICE_TYPE_CPU; + if (device_type == "gpu") dtype = CL_DEVICE_TYPE_CPU; + if (device_type == "accelerator") dtype = CL_DEVICE_TYPE_ACCELERATOR; + cl_uint ret_size; + OPENCL_CALL(clGetDeviceIDs(pid, dtype, 0, nullptr, &ret_size)); + std::vector<cl_device_id> ret; + ret.resize(ret_size); + OPENCL_CALL(clGetDeviceIDs(pid, dtype, ret_size, &ret[0], nullptr)); + return ret; +} + +bool MatchPlatformInfo( + cl_platform_id pid, + cl_platform_info param_name, + std::string value) { + if (value.length() == 0) return true; + std::string param_value = GetPlatformInfo(pid, param_name); + return param_value.find(value) != std::string::npos; +} + +bool InitOpenCL(TVMArgs args, TVMRetValue* rv) { + cl::OpenCLWorkspace* w = cl::OpenCLWorkspace::Global(); + std::lock_guard<std::mutex>(w->mu); + if (w->initialized()) return false; + // matching conditions + std::string platform_name, device_type; + + for (size_t i = 0; i < args.num_args; ++i) { + std::string arg = args[i]; + size_t pos = arg.find_first_of('='); + CHECK_EQ(pos, std::string::npos) + << "Argumentes need to be key=value"; + std::string key = arg.substr(0, pos); + std::string val = arg.substr(pos + 1, arg.length() - pos - 1); + if (key == "platform_name") { + platform_name = val; + } else if (key == "device_type") { + device_type = val; + } else { + LOG(FATAL) << "unknown DeviceInit option " << key; + } + } + // matched platforms + std::vector<cl_platform_id> platform_matched; + for (cl_platform_id pid : cl::GetPlatformIDs()) { + bool matched = true; + if (!cl::MatchPlatformInfo(pid, CL_PLATFORM_NAME, platform_name)) matched = false; + if (matched) platform_matched.push_back(pid); + } + if (platform_matched.size() == 0) { + LOG(FATAL) << "No OpenCL platform matched given existing options ..."; + } + if (platform_matched.size() > 1) { + LOG(WARNING) << "Multiple OpenCL platforms matched, use the first one ... "; + } + w->platform_id = platform_matched[0]; + + LOG(INFO) << "Initialize OpenCL platform \'" + << cl::GetPlatformInfo(w->platform_id, CL_PLATFORM_NAME) << '\''; + std::vector<cl_device_id> devices_matched = + cl::GetDeviceIDs(w->platform_id, device_type); + CHECK_GT(devices_matched.size(), 0U) + << "No OpenCL device any device matched given the options"; + w->devices = devices_matched; + cl_int err_code; + w->context = clCreateContext( + nullptr, w->devices.size(), &(w->devices[0]), + nullptr, nullptr, &err_code); + OPENCL_CHECK_ERROR(err_code); + CHECK_EQ(w->queues.size(), 0U); + for (size_t i = 0; i < w->devices.size(); ++i) { + cl_device_id did = w->devices[i]; + w->queues.push_back( + clCreateCommandQueue(w->context, did, 0, &err_code)); + OPENCL_CHECK_ERROR(err_code); + LOG(INFO) << "opencl(" << i + << ")=\'" << cl::GetDeviceInfo(did, CL_DEVICE_NAME) + << "\' cl_device_id=" << did; + } + return true; +} + +TVM_REGISTER_GLOBAL(_module_init_opencl) +.set_body(InitOpenCL); + } // namespace cl } // namespace runtime } // namespace tvm diff --git a/tests/python/integration/test_ewise.py b/tests/python/integration/test_ewise.py index b87000f3f5d9889e5ed7f90a724865dfd376b5be..75798f0c4c2a3675091d63705f42cc100bf6612e 100644 --- a/tests/python/integration/test_ewise.py +++ b/tests/python/integration/test_ewise.py @@ -20,9 +20,9 @@ def test_add(): # one line to build the function. def check_device(device, host="stackvm"): - if not tvm.codegen.target_enabled(host): + if not tvm.codegen.enabled(host): return - if not tvm.codegen.target_enabled(device): + if not tvm.codegen.enabled(device): return fadd = tvm.build(s, [A, B, C], device, host, @@ -37,7 +37,8 @@ def test_add(): np.testing.assert_allclose( c.asnumpy(), a.asnumpy() + b.asnumpy()) - tvm.init_opencl() + if tvm.module.enabled("opencl"): + tvm.module.init_opencl() check_device("cuda", "llvm") check_device("opencl") diff --git a/tests/python/integration/test_gemm.py b/tests/python/integration/test_gemm.py index 9beaa5b516c91958cb6efe1d6337089c86704f75..e557ac0ff807b00212f0fcd61e01a8e5858fe3da 100644 --- a/tests/python/integration/test_gemm.py +++ b/tests/python/integration/test_gemm.py @@ -54,9 +54,9 @@ def test_gemm(): # one line to build the function. def check_device(device, host="stackvm"): - if not tvm.codegen.target_enabled(host): + if not tvm.codegen.enabled(host): return - if not tvm.codegen.target_enabled(device): + if not tvm.codegen.enabled(device): return f = tvm.build(s, [A, B, C], device, host, @@ -76,8 +76,9 @@ def test_gemm(): np.testing.assert_allclose( c.asnumpy(), np.dot(a_np, b_np.T), rtol=1e-5) + if tvm.module.enabled("opencl"): + tvm.module.init_opencl() check_device("cuda") - tvm.init_opencl() check_device("opencl") if __name__ == "__main__": diff --git a/tests/python/integration/test_reduce.py b/tests/python/integration/test_reduce.py index 499a4606b9d2ab4c992ddec77e0be01fc62f1c02..7f2950b4469ea056e7841a809209b618ad6a4b62 100644 --- a/tests/python/integration/test_reduce.py +++ b/tests/python/integration/test_reduce.py @@ -19,9 +19,9 @@ def test_sum(): # one line to build the function. def check_device(device, host="stackvm"): - if not tvm.codegen.target_enabled(host): + if not tvm.codegen.enabled(host): return - if not tvm.codegen.target_enabled(device): + if not tvm.codegen.enabled(device): return ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) fsum = tvm.build(s, @@ -37,7 +37,9 @@ def test_sum(): np.testing.assert_allclose( b.asnumpy(), np.sum(a.asnumpy(), axis=1), rtol=1e-4) - tvm.init_opencl() + if tvm.module.enabled("opencl"): + tvm.module.init_opencl() + check_device("cuda") check_device("opencl") diff --git a/tests/python/integration/test_scan.py b/tests/python/integration/test_scan.py index d194c9a62d986f0a720604ca4875db4f3ec98c48..ca0e169d8e33f63989cdceda1f6fca24b4f89f5b 100644 --- a/tests/python/integration/test_scan.py +++ b/tests/python/integration/test_scan.py @@ -23,9 +23,9 @@ def test_scan(): # one line to build the function. def check_device(device, host="stackvm"): - if not tvm.codegen.target_enabled(host): + if not tvm.codegen.enabled(host): return - if not tvm.codegen.target_enabled(device): + if not tvm.codegen.enabled(device): return fscan = tvm.build(s, [X, res], device, host, @@ -41,7 +41,9 @@ def test_scan(): np.testing.assert_allclose( b.asnumpy(), np.cumsum(a_np, axis=0)) - tvm.init_opencl() + if tvm.module.enabled("opencl"): + tvm.module.init_opencl() + check_device("cuda") check_device("opencl") diff --git a/tests/python/unittest/test_codegen_device.py b/tests/python/unittest/test_codegen_device.py index 171cc082677dffbd74ac1c115468b501342d149d..42f60cdef979b32dee0128281e4540104f88918e 100644 --- a/tests/python/unittest/test_codegen_device.py +++ b/tests/python/unittest/test_codegen_device.py @@ -1,4 +1,5 @@ import tvm +from tvm.addon import testing import numpy as np def test_add_pipeline(): @@ -27,9 +28,9 @@ def test_add_pipeline(): fsplits = tvm.ir_pass.SplitHostDevice(fapi) def check_target(device, host="stackvm"): - if not tvm.codegen.target_enabled(host): + if not tvm.codegen.enabled(host): return - if not tvm.codegen.target_enabled(device): + if not tvm.codegen.enabled(device): return ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) mhost = tvm.codegen.build(fsplits[0], host) @@ -47,8 +48,33 @@ def test_add_pipeline(): np.testing.assert_allclose( c.asnumpy(), a.asnumpy() + b.asnumpy()) + def check_module_save(device, host="stackvm"): + if not tvm.codegen.enabled(host): + return + if not tvm.codegen.enabled(device): + return + ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) + fmt = "ptx" if device == "cuda" else "cl" + mhost = tvm.codegen.build(fsplits[0], host) + mdev = tvm.codegen.build(fsplits[1:], device) + temp = testing.tempdir() + mpath = temp.relpath("test.%s" % fmt) + mdev.save(mpath) + mdev2 = tvm.module.load(mpath) + mhost.import_module(mdev2) + f = mhost.entry_func + # launch the kernel. + n = 1027 + a = tvm.nd.array(np.random.uniform(size=n).astype(Ab.dtype), ctx) + b = tvm.nd.array(np.random.uniform(size=n).astype(Bb.dtype), ctx) + c = tvm.nd.array(np.zeros(n, dtype=Cb.dtype), ctx) + f(a, b, c) + np.testing.assert_allclose( + c.asnumpy(), a.asnumpy() + b.asnumpy()) + check_target("cuda", host="stackvm") check_target("cuda", host="llvm") + check_module_save("cuda", host="stackvm") if __name__ == "__main__": diff --git a/tests/python/unittest/test_codegen_stack_llvm.py b/tests/python/unittest/test_codegen_stack_llvm.py index 019f5fbf4e8ed72a7bbc7416eaa8c950662d510d..caaa056baa01219fb8399e563d80321f0467f75b 100644 --- a/tests/python/unittest/test_codegen_stack_llvm.py +++ b/tests/python/unittest/test_codegen_stack_llvm.py @@ -8,7 +8,7 @@ def tvm_call_packed(*args): def run_jit(fapi, check): for target in ["llvm", "stackvm"]: - if not tvm.codegen.target_enabled(target): + if not tvm.codegen.enabled(target): continue f = tvm.codegen.build(fapi, target) s = f.get_source() @@ -95,7 +95,7 @@ def test_llvm_add_pipeline(): fapi = tvm.ir_pass.MakeAPI(stmt, "myadd", [Ab, Bb, Cb], 0) def check_llvm(): - if not tvm.codegen.target_enabled("llvm"): + if not tvm.codegen.enabled("llvm"): return # build and invoke the kernel. f = tvm.codegen.build(fapi, "llvm") diff --git a/tests/python/unittest/test_module_load.py b/tests/python/unittest/test_module_load.py index 89e015c28da68eb5c9987498e0e831729a1c7d12..59b5391b1ee45710992653d007fcedf7337b83e5 100644 --- a/tests/python/unittest/test_module_load.py +++ b/tests/python/unittest/test_module_load.py @@ -1,14 +1,29 @@ import tvm -from tvm.addon import cc_compiler as cc +from tvm.addon import cc_compiler as cc, testing import os -import tempfile import numpy as np +import subprocess + +runtime_py = """ +import os +import sys +os.environ["TVM_USE_RUNTIME_LIB"] = "1" +import tvm +import numpy as np +path_dso = sys.argv[1] +dtype = sys.argv[2] +ff = tvm.module.load(path_dso) +a = tvm.nd.array(np.zeros(10, dtype=dtype)) +ff(a) +np.testing.assert_equal(a.asnumpy(), np.arange(a.shape[0])) +print("Finish runtime checking...") +""" def test_dso_module_load(): - if not tvm.codegen.target_enabled("llvm"): + if not tvm.codegen.enabled("llvm"): return dtype = 'int64' - temp_dir = tempfile.mkdtemp() + temp = testing.tempdir() def save_object(names): n = tvm.Var('n') @@ -25,10 +40,10 @@ def test_dso_module_load(): for name in names: m.save(name) - path_obj = "%s/test.o" % temp_dir - path_ll = "%s/test.ll" % temp_dir - path_bc = "%s/test.bc" % temp_dir - path_dso = "%s/test.so" % temp_dir + path_obj = temp.relpath("test.o") + path_ll = temp.relpath("test.ll") + path_bc = temp.relpath("test.bc") + path_dso = temp.relpath("test.so") save_object([path_obj, path_ll, path_bc]) cc.create_shared(path_dso, [path_obj]) @@ -41,14 +56,14 @@ def test_dso_module_load(): a = tvm.nd.array(np.zeros(10, dtype=dtype)) f2(a) np.testing.assert_equal(a.asnumpy(), np.arange(a.shape[0])) - files = [path_obj, path_ll, path_bc, path_dso] - for f in files: - os.remove(f) - os.rmdir(temp_dir) + path_runtime_py = temp.relpath("runtime.py") + with open(path_runtime_py, "w") as fo: + fo.write(runtime_py) -def test_cuda_module_load(): - pass + subprocess.check_call( + "python %s %s %s" % (path_runtime_py, path_dso, dtype), + shell=True) if __name__ == "__main__": test_dso_module_load() diff --git a/tests/python/unittest/test_runtime_ndarray.py b/tests/python/unittest/test_runtime_ndarray.py index 6731c8f2394a2c794341c670fbe40626462a5815..1c3e5f70d1788ec3b3056aaab4423f9bb1cb71cf 100644 --- a/tests/python/unittest/test_runtime_ndarray.py +++ b/tests/python/unittest/test_runtime_ndarray.py @@ -2,9 +2,11 @@ import tvm import numpy as np def enabled_ctx_list(): - tvm.init_opencl() - ctx_list = [tvm.cpu(0), tvm.gpu(0), tvm.opencl(0)] - ctx_list = [ctx for ctx in ctx_list if ctx.enabled] + if tvm.module.enabled("opencl"): + tvm.module.init_opencl() + + ctx_list = [('cpu', tvm.cpu(0)), ('gpu', tvm.gpu(0)), ('cl', tvm.opencl(0))] + ctx_list = [x[1] for x in ctx_list if tvm.module.enabled(x[0])] return ctx_list ENABLED_CTX_LIST = enabled_ctx_list()