From 80f9ca00b2643000c619288f3d7a855083571195 Mon Sep 17 00:00:00 2001 From: Zhixun Tan <phisiart@gmail.com> Date: Thu, 25 Jan 2018 13:35:24 -0500 Subject: [PATCH] [OpenGL] Let OpenGL texture always be 1024 x nrows. (#817) * OpenGL texture is always 1024 x nrows. * Address review comments. --- src/codegen/codegen_opengl.cc | 11 +-- src/runtime/opengl/opengl_device_api.cc | 91 +++++++++++++++++++++---- src/runtime/opengl/opengl_module.h | 13 ++++ tests/webgl/test_local_gemm.py | 2 +- 4 files changed, 98 insertions(+), 19 deletions(-) diff --git a/src/codegen/codegen_opengl.cc b/src/codegen/codegen_opengl.cc index e645e7f6c..496b15b34 100644 --- a/src/codegen/codegen_opengl.cc +++ b/src/codegen/codegen_opengl.cc @@ -154,7 +154,8 @@ void CodeGenOpenGL::BindThreadIndex(const IterVar& iv) { // Declare threadIdx local variable. this->PrintIndent(); - this->stream << "ivec2 threadIdx = ivec2(gl_FragCoord.xy);\n"; + this->stream << "ivec2 threadIdx = ivec2(" << runtime::kTextureRowSize + << " * int(gl_FragCoord.y) + int(gl_FragCoord.x), 0);\n"; // Return directly if threadIdx.x >= thread_extent. this->PrintIndent(); @@ -192,12 +193,14 @@ void CodeGenOpenGL::VisitStmt_(const Store* op) { } } -// texelFetch(tex, ivec2(idx, 0), 0).r +// texelFetch(tex, ivec2(idx & kTextureRowMask, idx >> kTextureRowBits), 0).r std::string CodeGenOpenGL::TexelFetch(const Variable* buffer, Expr index) { std::ostringstream os; - os << "texelFetch(" << GetVarID(buffer) << ", ivec2("; + os << "texelFetch(" << GetVarID(buffer) << ", ivec2(int("; PrintExpr(index, os); - os << ", 0), 0).r"; + os << ") & " << runtime::kTextureRowMask << ", int("; + PrintExpr(index, os); + os << ") >> " << runtime::kTextureRowBits << "), 0).r"; return os.str(); } diff --git a/src/runtime/opengl/opengl_device_api.cc b/src/runtime/opengl/opengl_device_api.cc index 798003af9..d90d12034 100644 --- a/src/runtime/opengl/opengl_device_api.cc +++ b/src/runtime/opengl/opengl_device_api.cc @@ -3,6 +3,7 @@ * \file opengl_device_api.cc */ #include "./opengl_common.h" +#include "./opengl_module.h" #if TVM_OPENGL_RUNTIME @@ -347,8 +348,9 @@ Texture OpenGLWorkspace::CreateTexture(TVMType type, size_t nbytes) { // Use glTexImage2D with nullptr data to specify GPU data storage. auto texture_format = GetTextureFormat(type); - auto width = static_cast<GLsizei>(nbytes / (type.bits / 8)); - auto height = GLsizei(1); + auto nelems = static_cast<GLsizei>(nbytes / (type.bits / 8)); + auto height = (nelems + kTextureRowSize - 1) / kTextureRowSize; + auto width = (height == 1) ? nelems : kTextureRowSize; OPENGL_CALL(gl->TexImage2D(GL_TEXTURE_2D, /*level=*/0, texture_format.internal_format, width, height, /*border=*/0, @@ -402,6 +404,51 @@ Program OpenGLWorkspace::CreateProgram(GLuint fragment_shader) { return Program(this, program); } +/*! + * \brief Visit a 1D range of an OpenGL texture-backed TVM array. + * When getting/setting a sub image of a texture, we can only specify a 2D + * block (xbeg, ybeg, width, height). + * Since we are storing all TVM arrays using (kTextureRowSize x nrows) 2D + * textures (row-major), a range in an array does not necessarily map to a 2D + * block. + * This function split a 1D range into 3 2D blocks. + * \param beg The index of the first element in the 1D range. + * \param end The index of the last + 1 element in the 1D range. + * \param on_2d_block Callback for each 2D block. Must have interface + * void(GLint xbeg, GLint ybeg, GLsizei width, GLsizei height). + */ +template <typename F> +static void Visit1DRange(GLint beg, GLint end, F&& on_2d_block) { + CHECK_LE(beg, end) << "Invalid range."; + + // xbeg kTextureRowSize + // ybeg ....************ + // **************** + // **************** + // ylast *********....... + // xlast + GLint xbeg = beg % kTextureRowSize; + GLint ybeg = beg / kTextureRowSize; + GLint xlast = (end - 1) % kTextureRowSize; + GLint ylast = (end - 1) / kTextureRowSize; + + if (ybeg == ylast) { // Only one row. + on_2d_block(xbeg, ybeg, end - beg, 1); + return; + } + + // First row. + on_2d_block(xbeg, ybeg, kTextureRowSize - xbeg, 1); + + // Middle block. + if (ylast - ybeg > 1) { + on_2d_block(0, ybeg + 1, kTextureRowSize, ylast - ybeg - 1); + } + + // Last row. + on_2d_block(0, ylast, xlast + 1, 1); +} + void OpenGLWorkspace::PutTextureData(Texture *texture, GLint begin, GLsizei nelems, @@ -409,12 +456,17 @@ void OpenGLWorkspace::PutTextureData(Texture *texture, // Bind to temporary unit. BindTextureUnit(NumTextureUnits() - 1, texture->texture()); - // Similar to cudaMemcpy. - OPENGL_CALL(gl->TexSubImage2D(GL_TEXTURE_2D, /*level=*/0, - /*xoffset=*/begin, /*yoffset=*/0, - /*width=*/nelems, /*height=*/1, - texture->format_.format, texture->format_.type, - data)); + Visit1DRange(begin, begin + nelems, [&](GLint xbeg, GLint ybeg, + GLsizei width, GLsizei height) { + auto offset = (ybeg * kTextureRowSize + xbeg - begin) * texture->elemsz(); + const GLvoid* ptr = static_cast<const char*>(data) + offset; + + // Similar to cudaMemcpy. + OPENGL_CALL(gl->TexSubImage2D(GL_TEXTURE_2D, /*level=*/0, + xbeg, ybeg, width, height, + texture->format_.format, + texture->format_.type, ptr)); + }); } void OpenGLWorkspace::GetTextureData(const Texture *texture, @@ -453,18 +505,29 @@ void OpenGLWorkspace::GetTextureData(const Texture *texture, auto nchannels = 4; auto padded_data_size = nchannels * nelems * elemsz; auto padded_data = std::unique_ptr<char[]>(new char[padded_data_size]); - OPENGL_CALL(gl->ReadPixels(/*x=*/begin, /*y=*/0, /*width=*/nelems, - /*height=*/1, GL_RGBA, GL_FLOAT, - padded_data.get())); + Visit1DRange(begin, begin + nelems, [&](GLint xbeg, GLint ybeg, + GLsizei width, GLsizei height) { + auto data_offset = (ybeg * kTextureRowSize + xbeg - begin) * elemsz; + auto padded_data_offset = data_offset * nchannels; + OPENGL_CALL(gl->ReadPixels(xbeg, ybeg, width, height, + GL_RGBA, GL_FLOAT, + padded_data.get() + padded_data_offset)); + }); for (GLsizei i = 0; i != nelems; ++i) { auto dst = reinterpret_cast<char *>(data) + i * elemsz; auto src = padded_data.get() + nchannels * i * elemsz; std::memcpy(dst, src, elemsz); } #else - OPENGL_CALL(gl->ReadPixels(/*x=*/begin, /*y=*/0, /*width=*/nelems, - /*height=*/1, texture->format_.format, - texture->format_.type, data)); + Visit1DRange(begin, begin + nelems, [&](GLint xbeg, GLint ybeg, + GLsizei width, GLsizei height) { + auto offset = (ybeg * kTextureRowSize + xbeg - begin) * texture->elemsz(); + GLvoid* ptr = static_cast<char*>(data) + offset; + + OPENGL_CALL(gl->ReadPixels(xbeg, ybeg, width, height, + texture->format_.format, texture->format_.type, + ptr)); + }); #endif OPENGL_CALL(gl->DeleteFramebuffers(1, &frame_buffer)); diff --git a/src/runtime/opengl/opengl_module.h b/src/runtime/opengl/opengl_module.h index 1913878c3..a4cfa20bd 100644 --- a/src/runtime/opengl/opengl_module.h +++ b/src/runtime/opengl/opengl_module.h @@ -17,6 +17,19 @@ namespace tvm { namespace runtime { +/*! + * \brief The fixed row size of all OpenGL textures in TVM. + * + * OpenGL has texture size limit on each dimension. Suppose we have a limit of + * 1024, then we can have a 2D texture of size (2^10 x 2^10) but not (2^20 x 1). + * This means we don't want to just use (n x 1) 2D textures for all arrays, + * because that would limit our array size to be 1024. Here we use (1024 x m) + * 2D textures. Then we can have arrays of size up to 2^20. + */ +static constexpr int kTextureRowBits = 10; +static constexpr int kTextureRowSize = 1 << kTextureRowBits; +static constexpr int kTextureRowMask = kTextureRowSize - 1; + /*! * \brief Determines how we supply arguments. */ diff --git a/tests/webgl/test_local_gemm.py b/tests/webgl/test_local_gemm.py index 18d2d1d8b..0dd1c0fc7 100644 --- a/tests/webgl/test_local_gemm.py +++ b/tests/webgl/test_local_gemm.py @@ -7,7 +7,7 @@ def test_local_gemm(): if not tvm.module.enabled("llvm"): return - nn = 2 + nn = 1024 n = tvm.var('n') n = tvm.convert(nn) m = n -- GitLab