diff --git a/python/tvm/build.py b/python/tvm/build.py index 6f1580e948d9d397956b6eb69023e7bbf17652dd..089547d164bb3ed4eb794f715d17cf8827ff89d9 100644 --- a/python/tvm/build.py +++ b/python/tvm/build.py @@ -12,10 +12,12 @@ from . import ir_pass from . import collections from . import codegen + def lower(sch, args, name="default_function", binds=None, + with_api_wrapper=True, max_auto_unroll_step=8): """Lowering step before build into target. @@ -34,13 +36,17 @@ def lower(sch, Dictionary that maps the binding of symbolic buffer to Tensor. By default, a new buffer is created for each tensor in the argument. + with_api_wrapper : bool, optional + Whether add API wrapper during lowering. + max_auto_unroll_step: int, optional Maximum step to perform automatic unrolling Returns ------- - f : LoweredFunc - The result function. + f : LoweredFunc or Stmt + The result function, if with_api_wrapper=False + Then the Stmt before make api is returned. """ binds = {} if binds is None else binds.copy() arg_list = [] @@ -67,8 +73,9 @@ def lower(sch, stmt = ir_pass.LiftAllocate(stmt) stmt = ir_pass.UnrollLoop(stmt, max_auto_unroll_step) stmt = ir_pass.Simplify(stmt) - fapi = ir_pass.MakeAPI(stmt, name, arg_list, 0) - return fapi + if not with_api_wrapper: + return stmt + return ir_pass.MakeAPI(stmt, name, arg_list, 0) def build(sch, diff --git a/src/schedule/schedule_dataflow_rewrite.cc b/src/schedule/schedule_dataflow_rewrite.cc index c0debcc29e19a2202fafd1dabc40d91bae4f738e..49a64714a6a70ebe437b8f9e690d5da8391a3a51 100644 --- a/src/schedule/schedule_dataflow_rewrite.cc +++ b/src/schedule/schedule_dataflow_rewrite.cc @@ -170,7 +170,7 @@ void RebaseNonZeroMinLoop(const Schedule& sch) { if (idx < leaf_vars->data.size()) { // insert rebase IterVar rebased = IterVarNode::make( - Range(), iv->var.copy_with_suffix(".rb"), iv->iter_type); + Range(), iv->var.copy_with_suffix(""), iv->iter_type); s->relations.push_back(RebaseNode::make(iv, rebased)); leaf_vars->data[idx] = rebased.node_; rebase_map[iv] = rebased; diff --git a/src/schedule/schedule_ops.cc b/src/schedule/schedule_ops.cc index fe29e08bf83b8ae22f1e40b0de93a3c7dc06cfed..dab1318c199221fc1e1fcad29916ac1feb9dce28 100644 --- a/src/schedule/schedule_ops.cc +++ b/src/schedule/schedule_ops.cc @@ -162,7 +162,7 @@ class SchedulePostProc : public IRMutator { // delete duplicated thread extent attr auto it = thread_extent_scope_.find(op->node.get()); if (it != thread_extent_scope_.end()) { - CHECK(is_zero(ir::Simplify(it->second- op->value))); + CHECK(is_zero(ir::Simplify(it->second - op->value))); return this->Mutate(op->body); } else { thread_extent_scope_[op->node.get()] = op->value; diff --git a/tutorials/python/get_started.py b/tutorials/python/get_started.py index 4dffd8fbc425451322eb6f0bcd05181fbef15dc4..539f88ceac78f208925ae29f303c058b03ab1af8 100644 --- a/tutorials/python/get_started.py +++ b/tutorials/python/get_started.py @@ -17,8 +17,7 @@ import numpy as np # Vector Add Example # ------------------ # In this tutorial, we will use a vector addition example to demonstrate -# the workflow in TVM. We will demonstrate how we can describe and compile -# vector addition code that runs on GPU. +# the workflow. # ######################################################################