From cdd7f37fefb417caba51ee5ad0c4c849b777f744 Mon Sep 17 00:00:00 2001 From: Leyuan Wang <laurawly@gmail.com> Date: Fri, 14 Dec 2018 13:32:15 -0800 Subject: [PATCH] [TOPI] NCHWc added input shape 4 condition, intel graphics conv2d schedule debugged for inception_v3 workloads (#2265) --- nnvm/python/nnvm/top/nn.py | 9 ++- topi/python/topi/intel_graphics/conv2d.py | 83 ++++++++--------------- 2 files changed, 34 insertions(+), 58 deletions(-) diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py index 74196c078..a37a5d7e0 100644 --- a/nnvm/python/nnvm/top/nn.py +++ b/nnvm/python/nnvm/top/nn.py @@ -1,4 +1,4 @@ -# pylint: disable=invalid-name, unused-argument +# pylint: disable=invalid-name, unused-argument, missing-docstring, no-else-return """Definition of nn ops""" from __future__ import absolute_import @@ -170,8 +170,11 @@ def compute_contrib_conv2d_NCHWc(attrs, inputs, _): out_layout = attrs.get_string("out_layout") out_dtype = attrs.get_string("out_dtype") out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype - _, in_channel_chunk, _, _, in_channel_block = get_const_tuple(inputs[0].shape) - in_channel = in_channel_chunk * in_channel_block + if layout == "NCHW": + _, in_channel, _, _ = get_const_tuple(inputs[0].shape) + else: + _, in_channel_chunk, _, _, in_channel_block = get_const_tuple(inputs[0].shape) + in_channel = in_channel_chunk * in_channel_block assert dilation == (1, 1), "not support dilate now" if groups == 1: # pylint: disable=assignment-from-no-return diff --git a/topi/python/topi/intel_graphics/conv2d.py b/topi/python/topi/intel_graphics/conv2d.py index f6767b68a..d712e7141 100644 --- a/topi/python/topi/intel_graphics/conv2d.py +++ b/topi/python/topi/intel_graphics/conv2d.py @@ -1,4 +1,4 @@ -# pylint: disable=invalid-name,unused-variable,unused-argument,no-else-return, too-many-arguments, too-many-locals, too-many-statements, no-member, too-many-branches +# pylint: disable=invalid-name,unused-variable,unused-argument,no-else-return, too-many-arguments, too-many-locals, too-many-statements, no-member, too-many-branches, too-many-boolean-expressions """conv2d schedule on Intel Graphics""" from __future__ import absolute_import as _abs @@ -61,7 +61,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos): return sym.contrib.conv2d_NCHWc(*copy_inputs, **new_attrs) @conv2d_NCHWc.register(["intel_graphics"]) -def _decl_conv2d(data, kernel, stride, padding, layout, out_layout, out_dtype='float32'): +def _decl_conv2d(data, kernel, stride, padding, dilation, layout, out_layout, out_dtype='float32'): """Conv2D operator for Intel Graphics backend. Parameters @@ -126,8 +126,7 @@ def schedule_conv2d_NCHWc(outs): for tensor in op.input_tensors: if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) - if "4_5" in op.tag or "4_4" in op.tag or "2_7" in op.tag or "2_14" in op.tag \ - or "1_16" in op.tag: + if 'conv2d' in op.tag: _schedule_cl_spatialpack_NCHWc(s, op) scheduled_ops.append(op) @@ -156,31 +155,30 @@ def _decl_cl_spatialpack_NCHWc(data, kernel, stride, padding, out_dtype='float16 ry = tvm.reduce_axis((0, kernel_h), name='ry') rx = tvm.reduce_axis((0, kernel_w), name='rx') - block_w = 0 - block_h = 0 + block_w = 1 + block_h = 1 if stride_h == 2: if num_filter + kernel_h == 515: - conv_tag = "4_4" block_h = 4 block_w = 4 else: - conv_tag = "4_5" block_h = 4 block_w = 5 elif kernel_h == 3: if num_filter == 512: - conv_tag = "2_7" block_h = 2 block_w = 7 else: - conv_tag = "2_14" block_h = 2 block_w = 14 + elif kernel_h == 7 and padding == 3 and stride == 1: + block_h = 3 + block_w = 4 else: - conv_tag = "1_16" block_h = 1 block_w = 16 + attrs = {'block_h': block_h, 'block_w' : block_w} c_h = out_height c_w = out_width @@ -202,13 +200,13 @@ def _decl_cl_spatialpack_NCHWc(data, kernel, stride, padding, out_dtype='float16 tvm.sum( temp[nn, rc, yy * stride_h + ry, xx * stride_w + rx].astype(out_dtype) * kernel[ff, rc, ry, rx, vc].astype(out_dtype), - axis=[rc, ry, rx]), tag=conv_tag, name='conv') + axis=[rc, ry, rx]), name='conv', attrs=attrs) output = tvm.compute( oshape, lambda nn, ff, yy, xx: conv[nn][ff//nv][yy][xx][ff%nv], - name='output_unpack', tag=conv_tag) + name='output_unpack', tag='conv2d') return output @@ -224,21 +222,10 @@ def _schedule_cl_spatialpack_NCHWc(s, op): kernel_L = s.cache_read(kernel, "local", [conv_L]) _, in_channel, temp_h, temp_w = [util.get_const_int(x) for x in temp.shape] - if "1_16" in s[conv].op.tag: - OUTPUT_BLOCK_HEIGHT = 1 - OUTPUT_BLOCK_WIDTH = 16 - elif "2_14" in s[conv].op.tag: - OUTPUT_BLOCK_HEIGHT = 2 - OUTPUT_BLOCK_WIDTH = 14 - elif "2_7" in s[conv].op.tag: - OUTPUT_BLOCK_HEIGHT = 2 - OUTPUT_BLOCK_WIDTH = 7 - elif "4_5" in s[conv].op.tag: - OUTPUT_BLOCK_HEIGHT = 4 - OUTPUT_BLOCK_WIDTH = 5 - elif "4_4" in s[conv].op.tag: - OUTPUT_BLOCK_HEIGHT = 4 - OUTPUT_BLOCK_WIDTH = 4 + + attrs = s[conv].op.attrs + OUTPUT_BLOCK_HEIGHT = attrs['block_h'] + OUTPUT_BLOCK_WIDTH = attrs['block_w'] # schedule conv z_factor = 1 @@ -308,7 +295,7 @@ def _schedule_cl_spatialpack_NCHWc(s, op): @conv2d.register(["intel_graphics"]) -def decl_conv2d(data, kernel, stride, padding, layout='NCHW', out_dtype='float32'): +def decl_conv2d(data, kernel, stride, padding, dilation, layout='NCHW', out_dtype='float32'): """Conv2D operator for Intel Graphics backend. Parameters @@ -368,8 +355,7 @@ def schedule_conv2d_nchw(outs): for tensor in op.input_tensors: if tensor.op.input_tensors and tensor.op not in scheduled_ops: traverse(tensor.op) - if "4_5" in op.tag or "4_4" in op.tag or "2_7" in op.tag or "2_14" in op.tag \ - or "1_16" in op.tag: + if 'conv2d' in op.tag: _schedule_cl_spatialpack(s, op) scheduled_ops.append(op) @@ -396,31 +382,30 @@ def _decl_cl_spatialpack(data, kernel, stride, padding, layout, out_dtype='float ry = tvm.reduce_axis((0, kernel_h), name='ry') rx = tvm.reduce_axis((0, kernel_w), name='rx') - block_w = 0 - block_h = 0 + block_w = 1 + block_h = 1 if stride_h == 2: if num_filter + kernel_h == 515: - conv_tag = "4_4" block_h = 4 block_w = 4 else: - conv_tag = "4_5" block_h = 4 block_w = 5 elif kernel_h == 3: if num_filter == 512: - conv_tag = "2_7" block_h = 2 block_w = 7 else: - conv_tag = "2_14" block_h = 2 block_w = 14 + elif kernel_h == 7 and padding == 3 and stride == 1: + block_h = 3 + block_w = 4 else: - conv_tag = "1_16" block_h = 1 block_w = 16 + attrs = {'block_h': block_h, 'block_w' : block_w} c_h = out_height c_w = out_width @@ -453,13 +438,13 @@ def _decl_cl_spatialpack(data, kernel, stride, padding, layout, out_dtype='float tvm.sum( temp[nn, rc, yy * stride_h + ry, xx * stride_w + rx].astype(out_dtype) * kernel_vec[ff, rc, ry, rx, vc].astype(out_dtype), - axis=[rc, ry, rx]), tag=conv_tag, name='conv') + axis=[rc, ry, rx]), name='conv', attrs=attrs) output = tvm.compute( oshape, lambda nn, ff, yy, xx: conv[nn][ff//nv][yy][xx][ff%nv], - name='output_unpack', tag=conv_tag) + name='output_unpack', tag='conv2d') return output @@ -477,21 +462,9 @@ def _schedule_cl_spatialpack(s, op): kernel_L = s.cache_read(kernel_vec, "local", [conv_L]) _, in_channel, temp_h, temp_w = [util.get_const_int(x) for x in temp.shape] - if "1_16" in s[conv].op.tag: - OUTPUT_BLOCK_HEIGHT = 1 - OUTPUT_BLOCK_WIDTH = 16 - elif "2_14" in s[conv].op.tag: - OUTPUT_BLOCK_HEIGHT = 2 - OUTPUT_BLOCK_WIDTH = 14 - elif "2_7" in s[conv].op.tag: - OUTPUT_BLOCK_HEIGHT = 2 - OUTPUT_BLOCK_WIDTH = 7 - elif "4_5" in s[conv].op.tag: - OUTPUT_BLOCK_HEIGHT = 4 - OUTPUT_BLOCK_WIDTH = 5 - elif "4_4" in s[conv].op.tag: - OUTPUT_BLOCK_HEIGHT = 4 - OUTPUT_BLOCK_WIDTH = 4 + attrs = s[conv].op.attrs + OUTPUT_BLOCK_HEIGHT = attrs['block_h'] + OUTPUT_BLOCK_WIDTH = attrs['block_w'] # schedule conv z_factor = 1 -- GitLab