diff --git a/topi/python/topi/x86/conv2d_avx_1x1.py b/topi/python/topi/x86/conv2d_avx_1x1.py index cc264d04ac24ced1de984f8fa4c21b30ad5afef4..afd0be2e2ded3950588fbcd9ae91834f3ac1c7ba 100644 --- a/topi/python/topi/x86/conv2d_avx_1x1.py +++ b/topi/python/topi/x86/conv2d_avx_1x1.py @@ -77,9 +77,6 @@ def _schedule_conv(s, data, data_pad, data_vec, kernel, kernel_vec, conv_out, ou batch, ic_chunk, ih, ic_block, iw = s[A1].op.axis parallel_axis = s[A1].fuse(ic_chunk, ih) s[A1].parallel(parallel_axis) - s[A1].pragma(batch, "parallel_launch_point") - s[A1].pragma(parallel_axis, "parallel_stride_pattern") - s[A1].pragma(batch, "parallel_barrier_when_finish") # schedule kernel pack oc_chunk, ic_chunk, oh, ow, ic_block, oc_block = s[W].op.axis @@ -88,9 +85,6 @@ def _schedule_conv(s, data, data_pad, data_vec, kernel, kernel_vec, conv_out, ou s[W].vectorize(oc_block) parallel_axis = s[W].fuse(oc_chunk, oh) s[W].parallel(parallel_axis) - s[W].pragma(parallel_axis, "parallel_launch_point") - s[W].pragma(parallel_axis, "parallel_stride_pattern") - s[W].pragma(parallel_axis, "parallel_barrier_when_finish") C, O0, O = conv_out, output, last CC = s.cache_write(C, 'global') @@ -128,8 +122,5 @@ def _schedule_conv(s, data, data_pad, data_vec, kernel, kernel_vec, conv_out, ou s[O].vectorize(oc_block) s[O].parallel(parallel_axis) - s[O].pragma(batch, "parallel_launch_point") - s[O].pragma(parallel_axis, "parallel_stride_pattern") - s[O].pragma(batch, "parallel_barrier_when_finish") return s diff --git a/topi/python/topi/x86/conv2d_avx_common.py b/topi/python/topi/x86/conv2d_avx_common.py index 4f5be019f45a06d6399c06696b1ef150b2f718a8..f4c0e453e643087f7e4f6c5fc2c2249b244643a6 100644 --- a/topi/python/topi/x86/conv2d_avx_common.py +++ b/topi/python/topi/x86/conv2d_avx_common.py @@ -90,9 +90,6 @@ def _schedule_conv(s, data, data_pad, data_vec, kernel, kernel_vec, conv_out, ou batch, ic_chunk, ih, ic_block, iw = s[A1].op.axis parallel_axis = s[A1].fuse(ic_chunk, ih) s[A1].parallel(parallel_axis) - s[A1].pragma(batch, "parallel_launch_point") - s[A1].pragma(parallel_axis, "parallel_stride_pattern") - s[A1].pragma(batch, "parallel_barrier_when_finish") # schedule kernel pack oc_chunk, ic_chunk, oh, ow, ic_block, oc_block = s[W].op.axis @@ -101,9 +98,6 @@ def _schedule_conv(s, data, data_pad, data_vec, kernel, kernel_vec, conv_out, ou s[W].vectorize(oc_block) parallel_axis = s[W].fuse(oc_chunk, oh) s[W].parallel(parallel_axis) - s[W].pragma(parallel_axis, "parallel_launch_point") - s[W].pragma(parallel_axis, "parallel_stride_pattern") - s[W].pragma(parallel_axis, "parallel_barrier_when_finish") # schedule conv C, O0, O = conv_out, output, last @@ -144,8 +138,5 @@ def _schedule_conv(s, data, data_pad, data_vec, kernel, kernel_vec, conv_out, ou s[O].vectorize(oc_block) s[O].parallel(parallel_axis) - s[O].pragma(batch, "parallel_launch_point") - s[O].pragma(parallel_axis, "parallel_stride_pattern") - s[O].pragma(batch, "parallel_barrier_when_finish") return s