add query for shared memory size (#1083)

2e17e850 · Lianmin Zheng · Tianqi Chen · 3d67ea17 · 2e17e850 · 2e17e850
Commit 2e17e850 authored 7 years ago by Lianmin Zheng Committed by Tianqi Chen 7 years ago
--- a/include/tvm/runtime/device_api.h
+++ b/include/tvm/runtime/device_api.h
@@ -19,7 +19,8 @@ enum DeviceAttrKind : int {
  kExist = 0,
  kMaxThreadsPerBlock = 1,
  kWarpSize = 2,
-  kComputeVersion = 3,
+  kMaxSharedMemoryPerBlock = 3,
+  kComputeVersion = 4,
 };

 /*! \brief Number of bytes each allocation must align to */

--- a/python/tvm/_ffi/runtime_ctypes.py
+++ b/python/tvm/_ffi/runtime_ctypes.py
@@ -140,6 +140,12 @@ class TVMContext(ctypes.Structure):
        return _api_internal._GetDeviceAttr(
            self.device_type, self.device_id, 2)

+    @property
+    def max_shared_memory_per_block(self):
+        """Total amount of shared memory per block in bytes"""
+        return _api_internal._GetDeviceAttr(
+            self.device_type, self.device_id, 3)
+
    @property
    def compute_version(self):
        """Get compute verison number in string.
@@ -152,7 +158,7 @@ class TVMContext(ctypes.Structure):
            The version string in `major.minor` format.
        """
        return _api_internal._GetDeviceAttr(
-            self.device_type, self.device_id, 3)
+            self.device_type, self.device_id, 4)

    def sync(self):
        """Synchronize until jobs finished at the context."""

--- a/src/runtime/cuda/cuda_device_api.cc
+++ b/src/runtime/cuda/cuda_device_api.cc
@@ -40,6 +40,11 @@ class CUDADeviceAPI final : public DeviceAPI {
            &value, cudaDevAttrWarpSize, ctx.device_id));
        break;
      }
+      case kMaxSharedMemoryPerBlock: {
+        CUDA_CALL(cudaDeviceGetAttribute(
+            &value, cudaDevAttrMaxSharedMemoryPerBlock, ctx.device_id));
+        break;
+      }
      case kComputeVersion: {
        std::ostringstream os;
        CUDA_CALL(cudaDeviceGetAttribute(