From e94e24509c8b7868c22b9007712ba0ed88ca1c93 Mon Sep 17 00:00:00 2001 From: Yida Wang <yidawa@gmail.com> Date: Thu, 1 Mar 2018 12:14:44 -0800 Subject: [PATCH] remove the pragma primitives for better performance when the threads are binded (#949) --- topi/python/topi/x86/conv2d_avx_1x1.py | 9 --------- topi/python/topi/x86/conv2d_avx_common.py | 9 --------- 2 files changed, 18 deletions(-) diff --git a/topi/python/topi/x86/conv2d_avx_1x1.py b/topi/python/topi/x86/conv2d_avx_1x1.py index cc264d04a..afd0be2e2 100644 --- a/topi/python/topi/x86/conv2d_avx_1x1.py +++ b/topi/python/topi/x86/conv2d_avx_1x1.py @@ -77,9 +77,6 @@ def _schedule_conv(s, data, data_pad, data_vec, kernel, kernel_vec, conv_out, ou batch, ic_chunk, ih, ic_block, iw = s[A1].op.axis parallel_axis = s[A1].fuse(ic_chunk, ih) s[A1].parallel(parallel_axis) - s[A1].pragma(batch, "parallel_launch_point") - s[A1].pragma(parallel_axis, "parallel_stride_pattern") - s[A1].pragma(batch, "parallel_barrier_when_finish") # schedule kernel pack oc_chunk, ic_chunk, oh, ow, ic_block, oc_block = s[W].op.axis @@ -88,9 +85,6 @@ def _schedule_conv(s, data, data_pad, data_vec, kernel, kernel_vec, conv_out, ou s[W].vectorize(oc_block) parallel_axis = s[W].fuse(oc_chunk, oh) s[W].parallel(parallel_axis) - s[W].pragma(parallel_axis, "parallel_launch_point") - s[W].pragma(parallel_axis, "parallel_stride_pattern") - s[W].pragma(parallel_axis, "parallel_barrier_when_finish") C, O0, O = conv_out, output, last CC = s.cache_write(C, 'global') @@ -128,8 +122,5 @@ def _schedule_conv(s, data, data_pad, data_vec, kernel, kernel_vec, conv_out, ou s[O].vectorize(oc_block) s[O].parallel(parallel_axis) - s[O].pragma(batch, "parallel_launch_point") - s[O].pragma(parallel_axis, "parallel_stride_pattern") - s[O].pragma(batch, "parallel_barrier_when_finish") return s diff --git a/topi/python/topi/x86/conv2d_avx_common.py b/topi/python/topi/x86/conv2d_avx_common.py index 4f5be019f..f4c0e453e 100644 --- a/topi/python/topi/x86/conv2d_avx_common.py +++ b/topi/python/topi/x86/conv2d_avx_common.py @@ -90,9 +90,6 @@ def _schedule_conv(s, data, data_pad, data_vec, kernel, kernel_vec, conv_out, ou batch, ic_chunk, ih, ic_block, iw = s[A1].op.axis parallel_axis = s[A1].fuse(ic_chunk, ih) s[A1].parallel(parallel_axis) - s[A1].pragma(batch, "parallel_launch_point") - s[A1].pragma(parallel_axis, "parallel_stride_pattern") - s[A1].pragma(batch, "parallel_barrier_when_finish") # schedule kernel pack oc_chunk, ic_chunk, oh, ow, ic_block, oc_block = s[W].op.axis @@ -101,9 +98,6 @@ def _schedule_conv(s, data, data_pad, data_vec, kernel, kernel_vec, conv_out, ou s[W].vectorize(oc_block) parallel_axis = s[W].fuse(oc_chunk, oh) s[W].parallel(parallel_axis) - s[W].pragma(parallel_axis, "parallel_launch_point") - s[W].pragma(parallel_axis, "parallel_stride_pattern") - s[W].pragma(parallel_axis, "parallel_barrier_when_finish") # schedule conv C, O0, O = conv_out, output, last @@ -144,8 +138,5 @@ def _schedule_conv(s, data, data_pad, data_vec, kernel, kernel_vec, conv_out, ou s[O].vectorize(oc_block) s[O].parallel(parallel_axis) - s[O].pragma(batch, "parallel_launch_point") - s[O].pragma(parallel_axis, "parallel_stride_pattern") - s[O].pragma(batch, "parallel_barrier_when_finish") return s -- GitLab