diff --git a/src/codegen/codegen_cuda.h b/src/codegen/codegen_cuda.h index 85da32320b71bce2ac1e7ad5513c04eda52f91c3..dc95a08c904a108206e41e7706f16418a668f075 100644 --- a/src/codegen/codegen_cuda.h +++ b/src/codegen/codegen_cuda.h @@ -38,7 +38,7 @@ class CodeGenCUDA final : public CodeGenC { private: // magic number to add pragma unroll to it. // used to generate code that is compact but still unrolls. - int max_auto_unroll_{256}; + int max_auto_unroll_{32}; // Whether global barrier is needed. bool need_global_barrier_{false}; // Global barrier state