diff --git a/nnvm/Makefile b/nnvm/Makefile
index 9b7e78953d1c3806c954a7599c2af7b09363f0cf..515856c5f078b90787676166d3f47cf330e76625 100644
--- a/nnvm/Makefile
+++ b/nnvm/Makefile
@@ -53,10 +53,10 @@ else
 	NO_WHOLE_ARCH= --no-whole-archive
 endif
 
-all: lib/libnnvm.a lib/libnnvm_top.$(SHARED_LIBRARY_SUFFIX) lib/libnnvm_top_runtime.$(SHARED_LIBRARY_SUFFIX)
+all: lib/libnnvm.a lib/libnnvm_top.$(SHARED_LIBRARY_SUFFIX)
 
 SRC = $(wildcard src/*.cc src/c_api/*.cc src/core/*.cc src/pass/*.cc)
-SRC_TOP = $(wildcard src/top/*/*.cc src/runtime/*.cc src/compiler/*.cc src/compiler/*/*.cc)
+SRC_TOP = $(wildcard src/top/*/*.cc src/compiler/*.cc src/compiler/*/*.cc)
 ALL_OBJ = $(patsubst %.cc, build/%.o, $(SRC))
 TOP_OBJ = $(patsubst %.cc, build/%.o, $(SRC_TOP))
 ALL_DEP = $(ALL_OBJ)
@@ -78,10 +78,6 @@ lib/libnnvm_top.$(SHARED_LIBRARY_SUFFIX): lib/libnnvm.a ${TOP_OBJ}
 	@mkdir -p $(@D)
 	$(CXX) $(CFLAGS) -shared -o $@ $(filter %.o, $^) $(LDFLAGS) -Wl,${WHOLE_ARCH} lib/libnnvm.a -Wl,${NO_WHOLE_ARCH}
 
-lib/libnnvm_top_runtime.$(SHARED_LIBRARY_SUFFIX): deploy/nnvm_runtime.cc
-	@mkdir -p $(@D)
-	$(CXX) $(CFLAGS) -shared -o $@ $(filter %.cc, $^) $(LDFLAGS)
-
 cython:
 	cd python; python setup.py build_ext --inplace
 
diff --git a/nnvm/deploy/REAMD.md b/nnvm/deploy/REAMD.md
deleted file mode 100644
index 96ab18d7514b6dc406e1d01996a9b385034d27fb..0000000000000000000000000000000000000000
--- a/nnvm/deploy/REAMD.md
+++ /dev/null
@@ -1,4 +0,0 @@
-All in One Deployment File
-==========================
-This folder contains an all in one deployment file that contains minimum dependencies
-needed to run nnvm top runtime.
\ No newline at end of file
diff --git a/nnvm/deploy/nnvm_runtime.cc b/nnvm/deploy/nnvm_runtime.cc
deleted file mode 100644
index df72af46cb28d57c2f6332c1d3d57207f1ab369e..0000000000000000000000000000000000000000
--- a/nnvm/deploy/nnvm_runtime.cc
+++ /dev/null
@@ -1,13 +0,0 @@
-/*!
- *  Copyright (c) 2017 by Contributors
- *  All in one runtime
- * \file nnvm_runtime.cc
- */
-/*
-#include "../src/core/graph.cc"
-#include "../src/core/node.cc"
-#include "../src/core/pass.cc"
-#include "../src/core/op.cc"
-#include "../src/pass/saveload_json.cc"
-#include "../src/runtime/graph_executor.cc"*/
-#include "../src/runtime/graph_runtime.cc"
diff --git a/nnvm/python/nnvm/graph.py b/nnvm/python/nnvm/graph.py
index 932d2d131550f301cf723daa5e9bf2f5716ca4bd..bfe5251e2bd89f061e015e57c9c08a8235c78c83 100644
--- a/nnvm/python/nnvm/graph.py
+++ b/nnvm/python/nnvm/graph.py
@@ -174,6 +174,10 @@ class Graph(object):
         check_call(_LIB.NNGraphGetSymbol(self.handle, ctypes.byref(shandle)))
         return Symbol(shandle)
 
+    def _tvm_graph_json(self):
+        """Get TVM graph json"""
+        return self.apply("SaveJSON").json_attr("json")
+
     @property
     def index(self):
         if not self._index:
diff --git a/nnvm/python/nnvm/runtime.py b/nnvm/python/nnvm/runtime.py
deleted file mode 100644
index dd9866cb4b3add79771f72d0705991cdfd40953a..0000000000000000000000000000000000000000
--- a/nnvm/python/nnvm/runtime.py
+++ /dev/null
@@ -1,111 +0,0 @@
-"""Runtime environment for nnvm relies on TVM."""
-import tvm
-from tvm.contrib import rpc
-
-class Module(object):
-    """Wrapper runtime module.
-
-    This is a thin wrapper of the underlying TVM module.
-    you can also directly call set_input, run, and get_output
-    of underlying module functions
-
-    Parameters
-    ----------
-    tvm_module : tvm.Module
-        The interal tvm module
-    """
-    def __init__(self, tvm_module):
-        self.tvm_module = tvm_module
-        self._set_input = tvm_module["set_input"]
-        self._run = tvm_module["run"]
-        self._get_output = tvm_module["get_output"]
-
-    def set_input(self, key=None, value=None, **params):
-        """Set inputs to the module via kwargs
-
-        Parameters
-        ----------
-        key : int or str
-           The input key
-
-        value : the input value.
-           The input key
-
-        params : dict of str to NDArray
-           Additonal arguments
-        """
-        if key:
-            self._set_input(key, tvm.nd.array(value))
-        for k, v in params.items():
-            self._set_input(k, tvm.nd.array(v))
-        return self
-
-    def run(self, **input_dict):
-        """Run forward execution of the graph
-
-        Parameters
-        ----------
-        input_dict: dict of str to NDArray
-            List of input values to be feed to
-        """
-        if input_dict:
-            self.set_input(**input_dict)
-        self._run()
-
-    def get_output(self, index, out):
-        """Get index-th output to out
-
-        Parameters
-        ----------
-        index : int
-            The input index
-
-        out : tvm.NDArray
-            The output array container
-        """
-        self._get_output(index, out)
-        return out
-
-    def __getitem__(self, key):
-        """Get internal module function
-
-        Parameters
-        ----------
-        key : str
-            The key to the module.
-        """
-        return self.tvm_module[key]
-
-
-
-def create(graph, libmod, ctx):
-    """Create a runtime executor module given the graph and module.
-
-    Parameters
-    ----------
-    graph : The graph to be deployed
-        The graph to be loaded.
-
-    libmod : tvm.Module
-        The module of the corresponding function
-
-    ctx : TVMContext
-        The context to deploy the module, can be local or remote.
-
-    Returns
-    -------
-    graph_module : tvm.Module
-        Runtime graph module to execute the graph.
-    """
-    json_str = graph if isinstance(graph, str) else graph.apply("SaveJSON").json_attr("json")
-    device_type = ctx.device_type
-    device_id = ctx.device_id
-    if device_type >= rpc.RPC_SESS_MASK:
-        assert libmod.type_key == "rpc"
-        assert rpc._SessTableIndex(libmod) == ctx._rpc_sess._tbl_index
-        hmod = rpc._ModuleHandle(libmod)
-        fcreate = ctx._rpc_sess.get_function("nnvm.runtime.remote_create")
-        device_type = device_type % rpc.RPC_SESS_MASK
-        return Module(fcreate(json_str, hmod, device_type, device_id))
-    fcreate = tvm.get_global_func("nnvm.runtime.create")
-    return Module(fcreate(json_str, libmod, device_type, device_id))
diff --git a/nnvm/src/compiler/graph_fuse.cc b/nnvm/src/compiler/graph_fuse.cc
index a6395760f19f641bb65999a5437f74d5a42a9d70..bbbe2d834d5361c9a83c23ffb594dc91c18f324d 100644
--- a/nnvm/src/compiler/graph_fuse.cc
+++ b/nnvm/src/compiler/graph_fuse.cc
@@ -14,11 +14,31 @@
 #include <tvm/runtime/packed_func.h>
 #include <tvm/lowered_func.h>
 #include "./compile_engine.h"
-#include "../runtime/graph_executor.h"
+#include "../../tvm/src/runtime/graph/graph_runtime.h"
 
 namespace nnvm {
 namespace compiler {
 
+using tvm::runtime::TVMOpParam;
+
+// parser
+inline void TVMOpParamParser(nnvm::NodeAttrs* attrs) {
+  TVMOpParam param;
+  param.Init(attrs->dict);
+  attrs->parsed = std::move(param);
+}
+
+NNVM_REGISTER_OP(tvm_op)
+.set_attr_parser(TVMOpParamParser)
+.set_num_inputs([](const NodeAttrs& attrs) {
+    const TVMOpParam& param = nnvm::get<TVMOpParam>(attrs.parsed);
+    return param.num_inputs;
+  })
+.set_num_outputs([](const NodeAttrs& attrs) {
+    const TVMOpParam& param = nnvm::get<TVMOpParam>(attrs.parsed);
+    return param.num_outputs;
+  });
+
 using namespace tvm;
 
 // The single fuse rule.
diff --git a/nnvm/src/runtime/graph_executor.cc b/nnvm/src/runtime/graph_executor.cc
deleted file mode 100644
index bf72dc56d53e6a0bbc6f3ebe7b221e1362934e88..0000000000000000000000000000000000000000
--- a/nnvm/src/runtime/graph_executor.cc
+++ /dev/null
@@ -1,344 +0,0 @@
-/*!
- *  Copyright (c) 2017 by Contributors
- * \file graph_executor.cc
- */
-#include <dmlc/memory_io.h>
-#include <tvm/runtime/registry.h>
-#include <numeric>
-#include "./graph_executor.h"
-
-namespace nnvm {
-namespace runtime {
-
-/*! \brief macro to do C API call */
-#define TVM_CCALL(func)                                            \
-  {                                                                \
-    int ret = (func);                                              \
-    CHECK_EQ(ret, 0)                                               \
-        << TVMGetLastError();                                      \
-  }
-
-using ::tvm::runtime::PackedFunc;
-using ::tvm::runtime::TVMArgs;
-using ::tvm::runtime::TVMRetValue;
-
-PackedFunc GraphExecutor::GetFunction(
-    const std::string& name,
-    const std::shared_ptr<ModuleNode>& sptr_to_self) {
-  // return member functions during query.
-  if (name == "set_input") {
-    return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        if (args[0].type_code() == kStr) {
-          this->SetInput(this->GetInputIndex(args[0]), args[1]);
-        } else {
-          this->SetInput(args[0], args[1]);
-        }
-      });
-  } else if (name == "get_output") {
-    return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        this->GetOutput(args[0], args[1]);
-      });
-  } else if (name == "run") {
-    return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        this->Run();
-      });
-  } else if (name == "load_params") {
-    return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        this->LoadParams(args[0].operator std::string());
-      });
-  } else {
-    return PackedFunc();
-  }
-}
-
-GraphExecutor::~GraphExecutor() {
-  for (DLTensor* t : storage_pool_) {
-    TVM_CCALL(TVMArrayFree(t));
-  }
-}
-
-void GraphExecutor::Run() {
-  // setup the array and requirements.
-  for (size_t i = 0; i < op_execs_.size(); ++i) {
-    if (op_execs_[i]) op_execs_[i]();
-  }
-}
-
-void GraphExecutor::Init(Graph graph,
-                         tvm::runtime::Module module,
-                         TVMContext ctx) {
-  graph_ = std::move(graph);
-  module_ = std::move(module);
-  ctx_ = ctx;
-  this->SetupStorage();
-  this->SetupOpExecs();
-}
-
-int GraphExecutor::GetInputIndex(const std::string& name) {
-  const auto& idx = graph_.indexed_graph();
-  for (size_t i = 0; i< idx.input_nodes().size(); ++i) {
-    if (idx[idx.input_nodes()[i]].source->attrs.name == name) {
-      return static_cast<int>(i);
-    }
-  }
-  LOG(FATAL) << "cannot find " << name << " among input";
-  return -1;
-}
-
-void GraphExecutor::SetInput(int index, DLTensor* data_in) {
-  const auto& idx = graph_.indexed_graph();
-  CHECK_LT(static_cast<size_t>(index), idx.input_nodes().size());
-  uint32_t eid = idx.entry_id(idx.input_nodes()[index], 0);
-  TVM_CCALL(TVMArrayCopyFromTo(data_in, &data_entry_[eid], nullptr));
-}
-
-void GraphExecutor::GetOutput(int index, DLTensor* data_out) {
-  const auto& idx = graph_.indexed_graph();
-  CHECK_LT(static_cast<size_t>(index), idx.outputs().size());
-  uint32_t eid = idx.entry_id(idx.outputs()[index]);
-  TVM_CCALL(TVMArrayCopyFromTo(&data_entry_[eid], data_out, nullptr));
-}
-
-bool LoadDLTensor(dmlc::Stream* strm, DLTensor* tensor) {
-  uint64_t header, reserved;
-  CHECK(strm->Read(&header, sizeof(header)))
-      << "Invalid DLTensor file format";
-  CHECK(strm->Read(&reserved, sizeof(reserved)))
-      << "Invalid DLTensor file format";
-  CHECK(header == kTVMNDArrayMagic)
-      << "Invalid DLTensor file format";
-
-  CHECK(strm->Read(&tensor->ctx, sizeof(tensor->ctx)))
-      << "Invalid DLTensor file format";
-  CHECK(strm->Read(&tensor->ndim, sizeof(tensor->ndim)))
-      << "Invalid DLTensor file format";
-  CHECK(strm->Read(&tensor->dtype, sizeof(tensor->dtype)))
-      << "Invalid DLTensor file format";
-
-  int ndim = tensor->ndim;
-  CHECK(strm->Read(tensor->shape, sizeof(int64_t) * ndim))
-      << "Invalid DLTensor file format";
-
-  int64_t size = 1;
-  int type_size = tensor->dtype.bits / 8;
-  for (int i = 0; i < ndim; ++i) {
-    size *= tensor->shape[i];
-  }
-  int64_t data_byte_size;
-  CHECK(strm->Read(&data_byte_size, sizeof(data_byte_size)))
-      << "Invalid DLTensor file format";
-  CHECK(data_byte_size == type_size * size)
-      << "Invalid DLTensor file format";
-  CHECK(strm->Read(tensor->data, type_size * size))
-      << "Invalid DLTensor file format";
-  return true;
-}
-
-void GraphExecutor::LoadParams(dmlc::Stream* strm) {
-  uint64_t header, reserved;
-  CHECK(strm->Read(&header))
-      << "Invalid parameters file format";
-  CHECK(header == kTVMNDArrayListMagic)
-      << "Invalid parameters file format";
-  CHECK(strm->Read(&reserved))
-      << "Invalid parameters file format";
-
-  std::vector<std::string> names;
-  CHECK(strm->Read(&names))
-      << "Invalid parameters file format";
-
-  std::unordered_map<std::string, size_t> name_eid;
-  const auto& idx = graph_.indexed_graph();
-  for (int nid : idx.input_nodes()) {
-    name_eid.emplace(idx[nid].source->attrs.name, idx.entry_id(nid, 0));
-  }
-
-  uint64_t sz;
-  strm->Read(&sz, sizeof(sz));
-  size_t size = static_cast<size_t>(sz);
-  CHECK(size == names.size())
-      << "Invalid parameters file format";
-  for (size_t i = 0; i < size; ++i) {
-    auto iter = name_eid.find(names[i]);
-    CHECK(iter != name_eid.end());
-    CHECK(LoadDLTensor(strm, &data_entry_[iter->second]))
-        << "Invalid parameters file format";
-  }
-}
-
-void GraphExecutor::LoadParams(const std::string& param_blob) {
-  dmlc::MemoryStringStream strm(const_cast<std::string*>(&param_blob));
-  this->LoadParams(&strm);
-}
-
-void GraphExecutor::SetupStorage() {
-  const auto& idx = graph_.indexed_graph();
-  // Grab saved optimization plan from graph.
-  auto vstorage = graph_.MoveCopyAttr<StorageVector>("storage_id");
-  std::vector<TVMType> vtype;
-  for (const std::string& s_type :
-           graph_.GetAttr<std::vector<std::string> >("dltype")) {
-    vtype.push_back(tvm::runtime::String2TVMType(s_type));
-  }
-  data_shape_ = graph_.GetAttr<ShapeVector>("shape");
-  data_entry_.resize(idx.num_node_entries());
-  // Find the maximum space size.
-  int max_id = 0;
-  for (size_t i = 0; i < data_shape_.size(); ++i) {
-    max_id = std::max(vstorage[i] + 1, max_id);
-  }
-  for (const auto& e : idx.input_nodes()) {
-    vstorage[idx.entry_id(e, 0)] = max_id++;
-  }
-  // size of each storage pool entry
-  std::vector<size_t> pool_entry_bytes;
-  // Find the maximum space size.
-  for (size_t i = 0; i < data_shape_.size(); ++i) {
-    int storage_id = vstorage[i];
-    size_t size = data_shape_[i].Size();
-    CHECK_GE(storage_id, 0) << "Do not support runtime shape op";
-    DLDataType t = vtype[i];
-    size_t bits = t.bits * t.lanes;
-    CHECK_EQ(bits % 8U, 0U);
-    size_t bytes = (bits / 8U) * size;
-
-    size_t sid = static_cast<size_t>(storage_id);
-    if (sid >= pool_entry_bytes.size()) {
-      pool_entry_bytes.resize(sid + 1, 0);
-    }
-    pool_entry_bytes[sid] = std::max(pool_entry_bytes[sid], bytes);
-  }
-  // Allocate the space.
-  for (size_t i = 0; i < pool_entry_bytes.size(); ++i) {
-    TShape shape{static_cast<int64_t>(pool_entry_bytes[i] + 3) / 4};
-    DLTensor* tensor;
-    TVM_CCALL(TVMArrayAlloc(
-        shape.data(), 1, kFloat, 32, 1, ctx_.device_type, ctx_.device_id, &tensor));
-    storage_pool_.push_back(tensor);
-  }
-  // Assign the pooled entries.
-  for (size_t i = 0; i < data_entry_.size(); ++i) {
-    int storage_id = vstorage[i];
-    data_entry_[i] = *storage_pool_[storage_id];
-    data_entry_[i].shape = const_cast<int64_t*>(data_shape_[i].data());
-    data_entry_[i].ndim = data_shape_[i].ndim();
-    data_entry_[i].dtype = vtype[i];
-  }
-}
-
-void GraphExecutor::SetupOpExecs() {
-  static const nnvm::Op* tvm_op = nnvm::Op::Get("tvm_op");
-  const auto& idx = graph_.indexed_graph();
-  op_execs_.resize(idx.num_nodes());
-  // setup the array and requirements.
-  for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
-    const auto& inode = idx[nid];
-    if (inode.source->is_variable()) continue;
-    std::vector<DLTensor> args;
-    for (const auto& e : inode.inputs) {
-      args.push_back(data_entry_[idx.entry_id(e)]);
-    }
-    for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
-      uint32_t eid = idx.entry_id(nid, index);
-      args.push_back(data_entry_[eid]);
-    }
-    CHECK_EQ(inode.source->op(), tvm_op)
-        << "transform the graph to tvm op";
-    op_execs_[nid] = CreateTVMOp(
-        inode.source->attrs, args, inode.inputs.size());
-  }
-}
-
-std::function<void()> GraphExecutor::CreateTVMOp(
-    const nnvm::NodeAttrs& attrs,
-    const std::vector<DLTensor>& args,
-    size_t num_inputs) {
-  struct OpArgs {
-    std::vector<DLTensor> args;
-    std::vector<TVMValue> arg_values;
-    std::vector<int> arg_tcodes;
-    std::vector<int64_t> shape_data;
-  };
-  const TVMOpParam& param = nnvm::get<TVMOpParam>(attrs.parsed);
-  std::shared_ptr<OpArgs> arg_ptr = std::make_shared<OpArgs>();
-  // setup address.
-  arg_ptr->args = std::move(args);
-  if (param.flatten_data) {
-    arg_ptr->shape_data.resize(arg_ptr->args.size());
-  }
-  for (size_t i = 0; i < arg_ptr->args.size(); ++i) {
-    TVMValue v;
-    DLTensor* t = &(arg_ptr->args[i]);
-    v.v_handle = t;
-    arg_ptr->arg_values.push_back(v);
-    arg_ptr->arg_tcodes.push_back(kArrayHandle);
-    if (param.flatten_data) {
-      arg_ptr->shape_data[i] = std::accumulate(
-          t->shape, t->shape + t->ndim, 1, std::multiplies<int64_t>());
-      t->ndim = 1;
-      t->shape = &(arg_ptr->shape_data[i]);
-    }
-  }
-  // get compiled function from module.
-  tvm::runtime::PackedFunc pf = module_.GetFunction(param.func_name, false);
-  CHECK(pf != nullptr) << "no such function in module: " << param.func_name;
-  auto fexec = [arg_ptr, pf] () {
-    TVMRetValue rv;
-    TVMArgs targs(arg_ptr->arg_values.data(),
-                  arg_ptr->arg_tcodes.data(),
-                  static_cast<int>(arg_ptr->arg_values.size()));
-    pf.CallPacked(targs, &rv);
-  };
-  return fexec;
-}
-
-// parser
-inline void TVMOpParamParser(nnvm::NodeAttrs* attrs) {
-  TVMOpParam param;
-  param.Init(attrs->dict);
-  attrs->parsed = std::move(param);
-}
-
-
-NNVM_REGISTER_OP(tvm_op)
-.set_attr_parser(TVMOpParamParser)
-.set_num_inputs([](const NodeAttrs& attrs) {
-    const TVMOpParam& param = nnvm::get<TVMOpParam>(attrs.parsed);
-    return param.num_inputs;
-  })
-.set_num_outputs([](const NodeAttrs& attrs) {
-    const TVMOpParam& param = nnvm::get<TVMOpParam>(attrs.parsed);
-    return param.num_outputs;
-  });
-
-tvm::runtime::Module RuntimeCreate(std::string sym_json,
-                                   tvm::runtime::Module m,
-                                   int device_type,
-                                   int device_id) {
-  TVMContext ctx;
-  ctx.device_type = static_cast<DLDeviceType>(device_type);
-  ctx.device_id   = device_id;
-  // load graph from json string
-  nnvm::Graph g;
-  g.attrs["json"] = std::make_shared<nnvm::any>(sym_json);
-  g = nnvm::ApplyPass(std::move(g), "LoadJSON");
-  std::shared_ptr<GraphExecutor> exec = std::make_shared<GraphExecutor>();
-  exec->Init(g, m, ctx);
-  return tvm::runtime::Module(exec);
-}
-
-TVM_REGISTER_GLOBAL("nnvm.runtime.createx")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    *rv = RuntimeCreate(args[0], args[1], args[2], args[3]);
-  });
-
-TVM_REGISTER_GLOBAL("nnvm.runtime.remote_createx")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    void* mhandle = args[1];
-    *rv = RuntimeCreate(args[0],
-                        *static_cast<tvm::runtime::Module*>(mhandle),
-                        args[2], args[3]);
-  });
-
-}  // namespace runtime
-}  // namespace nnvm
diff --git a/nnvm/src/runtime/graph_executor.h b/nnvm/src/runtime/graph_executor.h
deleted file mode 100644
index 21524061065775b327385a3f62a1444a9d7aaf1a..0000000000000000000000000000000000000000
--- a/nnvm/src/runtime/graph_executor.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*!
- *  Copyright (c) 2017 by Contributors
- *
- *  Runtime module for graph deployment.
- *
- * \file graph_executor.h
- */
-#ifndef NNVM_RUNTIME_GRAPH_EXECUTOR_H_
-#define NNVM_RUNTIME_GRAPH_EXECUTOR_H_
-
-#include <dmlc/io.h>
-#include <tvm/runtime/packed_func.h>
-#include <tvm/runtime/module.h>
-#include <nnvm/graph.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/tuple.h>
-#include <nnvm/pass.h>
-#include <vector>
-#include <string>
-#include "./graph_runtime.h"
-
-namespace nnvm {
-namespace runtime {
-
-/*!
- * \brief TVM Graph Executor.
- *  This is a minimum graph executor, embedded in TVM runtime
- *  without any framework dependency.
- *
- *  This runtime can be acccesibly in various language via
- *  TVM runtime PackedFunc API.
- */
-class GraphExecutor : public ::tvm::runtime::ModuleNode {
- public:
-  /*!
-   * \return The type key of the executor.
-   */
-  const char* type_key() const final {
-    return "GraphExecutor";
-  }
-  /*!
-   * \brief Get member function to front-end
-   * \param name The name of the function.
-   * \param sptr_to_self The pointer to the module node.
-   * \return The corresponding member function.
-   */
-  tvm::runtime::PackedFunc GetFunction(
-      const std::string& name,
-      const std::shared_ptr<ModuleNode>& sptr_to_self) final;
-  /*! \brief destructor */
-  ~GraphExecutor();
-  /*!
-   * \brief Initialize the graph executor with graph and context.
-   * \param graph The execution graph.
-   * \param module The module containing the compiled functions.
-   * \param ctx The context where the graph should sit on
-   */
-  void Init(Graph graph,
-            tvm::runtime::Module module,
-            TVMContext ctx);
-  /*!
-   * \brief Get the input index given the name of input.
-   * \param name The name of the input.
-   * \return The index of input.
-   */
-  int GetInputIndex(const std::string& name);
-  /*!
-   * \brief set index-th input to the graph.
-   * \param index The input index.
-   * \param data The input data.
-   */
-  void SetInput(int index, DLTensor* data);
-  /*!
-   * \brief Copy index-th output to data_out.
-   * \param index The output index.
-   * \param data_out the output data.
-   */
-  void GetOutput(int index, DLTensor* data_out);
-  /*!
-   * \brief Load parameters from binary stream
-   * \param strm The input stream.
-   */
-  void LoadParams(dmlc::Stream* strm);
-  /*!
-   * \brief Load parameters from parameter blob.
-   * \param param_blob A binary blob of parameter.
-   */
-  void LoadParams(const std::string& param_blob);
-  /*!
-   * \brief Execute the graph, update output.
-   */
-  void Run();
-
- private:
-  /*! \brief Setup the temporal storage */
-  void SetupStorage();
-  /*! \brief Setup the executors */
-  void SetupOpExecs();
-  /*!
-   * \brief Create a executtion function given input.
-   * \param attrs The node attributes
-   * \param args The arguments to the functor, including inputs and outputs.
-   * \param num_inputs Number of inputs
-   * \return The created executor.
-   */
-  std::function<void()> CreateTVMOp(const NodeAttrs& attrs,
-                                    const std::vector<DLTensor>& args,
-                                    size_t num_inputs);
-  /*! \brief The graph */
-  Graph graph_;
-  /*! \brief The code module */
-  tvm::runtime::Module module_;
-  /*! \brief execution context */
-  TVMContext ctx_;
-  /*! \brief common storage pool */
-  std::vector<DLTensor*> storage_pool_;
-  /*! \brief data shape of each node entry */
-  std::vector<TShape> data_shape_;
-  /*! \brief data entry of each node */
-  std::vector<DLTensor> data_entry_;
-  /*! \brief operator on each node */
-  std::vector<std::function<void()> > op_execs_;
-};
-
-}  // namespace runtime
-}  // namespace nnvm
-
-#endif  // NNVM_RUNTIME_GRAPH_EXECUTOR_H_
diff --git a/nnvm/src/runtime/graph_runtime.cc b/nnvm/src/runtime/graph_runtime.cc
deleted file mode 100644
index d9faeb45f78316d50e5bf1c5257cf5316dd4bab3..0000000000000000000000000000000000000000
--- a/nnvm/src/runtime/graph_runtime.cc
+++ /dev/null
@@ -1,563 +0,0 @@
-/*!
- *  Copyright (c) 2017 by Contributors
- * \file graph_executor.cc
- */
-#include <dmlc/memory_io.h>
-#include <dmlc/json.h>
-#include <tvm/runtime/packed_func.h>
-#include <tvm/runtime/registry.h>
-#include <numeric>
-#include "./graph_runtime.h"
-
-namespace nnvm {
-namespace runtime {
-
-/*! \brief macro to do C API call */
-#define TVM_CCALL(func)                                            \
-  {                                                                \
-    int ret = (func);                                              \
-    CHECK_EQ(ret, 0)                                               \
-        << TVMGetLastError();                                      \
-  }
-
-using ::tvm::runtime::PackedFunc;
-using ::tvm::runtime::TVMArgs;
-using ::tvm::runtime::TVMRetValue;
-
-/*!
- * \brief Minimum graph structure for deployment
- *  This is a minimum graph executor, embedded in TVM runtime
- *  without any framework dependency.
- *
- *  This runtime can be acccesibly in various language via
- *  TVM runtime PackedFunc API.
- */
-class GraphRuntime : public ::tvm::runtime::ModuleNode {
- public:
-  ~GraphRuntime() {
-    for (DLTensor* t : storage_pool_) {
-      TVM_CCALL(TVMArrayFree(t));
-    }
-  }
-  /*!
-   * \brief Get member function to front-end
-   * \param name The name of the function.
-   * \param sptr_to_self The pointer to the module node.
-   * \return The corresponding member function.
-   */
-  tvm::runtime::PackedFunc GetFunction(
-      const std::string& name,
-      const std::shared_ptr<ModuleNode>& sptr_to_self) final;
-
-  /*!
-   * \return The type key of the executor.
-   */
-  const char* type_key() const final {
-    return "GraphRuntime";
-  }
-  void Run() {
-    // setup the array and requirements.
-    for (size_t i = 0; i < op_execs_.size(); ++i) {
-      if (op_execs_[i]) op_execs_[i]();
-    }
-  }
-  /*!
-   * \brief Initialize the graph executor with graph and context.
-   * \param graph The execution graph.
-   * \param module The module containing the compiled functions.
-   * \param ctx The context where the graph should sit on
-   */
-  void Init(const std::string& graph_json,
-            tvm::runtime::Module module,
-            TVMContext ctx) {
-    std::istringstream is(graph_json);
-    dmlc::JSONReader reader(&is);
-    this->Load(&reader);
-    module_ = module;
-    ctx_ = ctx;
-    this->SetupStorage();
-    this->SetupOpExecs();
-  }
-  /*!
-   * \brief Get the input index given the name of input.
-   * \param name The name of the input.
-   * \return The index of input.
-   */
-  int GetInputIndex(const std::string& name) {
-    for (size_t i = 0; i< input_nodes_.size(); ++i) {
-      uint32_t nid = input_nodes_[i];
-      if (nodes_[nid].name == name) {
-        return static_cast<int>(i);
-      }
-    }
-    LOG(FATAL) << "cannot find " << name << " among input";
-    return -1;
-  }
-  /*!
-   * \brief set index-th input to the graph.
-   * \param index The input index.
-   * \param data The input data.
-   */
-  void SetInput(int index, DLTensor* data_in) {
-    CHECK_LT(static_cast<size_t>(index), input_nodes_.size());
-    uint32_t eid = this->entry_id(input_nodes_[index], 0);
-    TVM_CCALL(TVMArrayCopyFromTo(data_in, &data_entry_[eid], nullptr));
-  }
-  /*!
-   * \brief Copy index-th output to data_out.
-   * \param index The output index.
-   * \param data_out the output data.
-   */
-  void GetOutput(int index, DLTensor* data_out) {
-    CHECK_LT(static_cast<size_t>(index), outputs_.size());
-    uint32_t eid = this->entry_id(outputs_[index]);
-    TVM_CCALL(TVMArrayCopyFromTo(&data_entry_[eid], data_out, nullptr));
-  }
-
-  /*!
-   * \brief Load parameters from binary stream
-   * \param strm The input stream.
-   */
-  void LoadParams(dmlc::Stream* strm);
-  /*!
-   * \brief Load parameters from parameter blob.
-   * \param param_blob A binary blob of parameter.
-   */
-  void LoadParams(const std::string& param_blob) {
-    dmlc::MemoryStringStream strm(const_cast<std::string*>(&param_blob));
-    this->LoadParams(&strm);
-  }
-
- private:
-  // Node entry
-  struct NodeEntry {
-    uint32_t node_id;
-    uint32_t index;
-    uint32_t version;
-    // JSON Loader
-    void Load(dmlc::JSONReader *reader) {
-      reader->BeginArray();
-      CHECK(reader->NextArrayItem()) << "invalid json format";
-      reader->Read(&node_id);
-      CHECK(reader->NextArrayItem()) << "invalid json format";
-      reader->Read(&index);
-      if (reader->NextArrayItem()) {
-        reader->Read(&version);
-        CHECK(!reader->NextArrayItem()) << "invalid json format";
-      } else {
-        version = 0;
-      }
-    }
-  };
-  // Node
-  struct Node {
-    // operator type in string
-    std::string op_type;
-    // name of the op
-    std::string name;
-    // parameters
-    TVMOpParam param;
-    // inputs
-    std::vector<NodeEntry> inputs;
-    // control deps
-    std::vector<uint32_t> control_deps;
-    // JSON Loader
-    void Load(dmlc::JSONReader *reader) {
-      reader->BeginObject();
-      std::unordered_map<std::string, std::string> dict;
-      int bitmask = 0;
-      std::string key;
-      while (reader->NextObjectItem(&key)) {
-        if (key == "op") {
-          reader->Read(&op_type);
-          bitmask |= 1;
-        } else if (key == "name") {
-          reader->Read(&name);
-          bitmask |= 2;
-        } else if (key == "inputs") {
-          reader->Read(&inputs);
-          bitmask |= 4;
-        } else if (key == "attr") {
-          reader->Read(&dict);
-          param.Init(dict);
-        } else if (key == "control_deps") {
-          reader->Read(&control_deps);
-        } else {
-          LOG(FATAL) << "do not support key" << key;
-        }
-      }
-      CHECK_EQ(bitmask, 1|2|4) << "invalid format";
-    }
-  };
-  struct GraphAttr {
-    size_t storage_num_not_alloctaed{0};
-    std::vector<int> storage_id;
-    std::vector<std::string> dltype;
-    std::vector<std::vector<int64_t> > shape;
-    // The graph attribute fields.
-    void Load(dmlc::JSONReader *reader) {
-      reader->BeginObject();
-      int bitmask = 0;
-      std::string key, type;
-      while (reader->NextObjectItem(&key)) {
-        if (key == "dltype") {
-          reader->BeginArray();
-          CHECK(reader->NextArrayItem());
-          reader->Read(&type);
-          CHECK_EQ(type, "list_str");
-          CHECK(reader->NextArrayItem());
-          reader->Read(&dltype);
-          CHECK(!reader->NextArrayItem());
-          bitmask |= 1;
-        } else if (key == "storage_id") {
-          reader->BeginArray();
-          CHECK(reader->NextArrayItem());
-          reader->Read(&type);
-          CHECK_EQ(type, "list_int");
-          CHECK(reader->NextArrayItem());
-          reader->Read(&storage_id);
-          CHECK(!reader->NextArrayItem());
-          bitmask |= 2;
-        } else if (key == "shape") {
-          reader->BeginArray();
-          CHECK(reader->NextArrayItem());
-          reader->Read(&type);
-          CHECK_EQ(type, "list_shape");
-          CHECK(reader->NextArrayItem());
-          reader->Read(&shape);
-          CHECK(!reader->NextArrayItem());
-          bitmask |= 4;
-        } else {
-          reader->BeginArray();
-          CHECK(reader->NextArrayItem());
-          reader->Read(&type);
-          if (type == "list_int") {
-            CHECK(reader->NextArrayItem());
-            std::vector<int> temp;
-            reader->Read(&temp);
-          } else if (type == "size_t") {
-            CHECK(reader->NextArrayItem());
-            size_t temp;
-            reader->Read(&temp);
-          } else {
-            LOG(FATAL) << "cannot skip graph attr " << key;
-          }
-          CHECK(!reader->NextArrayItem());
-        }
-      }
-      CHECK_EQ(bitmask, 1|2|4) << "invalid format";
-    }
-  };
-  // The graph attribute fields.
-  void Load(dmlc::JSONReader *reader) {
-      reader->BeginObject();
-      int bitmask = 0;
-      std::string key;
-      while (reader->NextObjectItem(&key)) {
-        if (key == "nodes") {
-          reader->Read(&nodes_);
-          bitmask |= 1;
-        } else if (key == "arg_nodes") {
-          reader->Read(&input_nodes_);
-          bitmask |= 2;
-        } else if (key == "node_row_ptr") {
-          reader->Read(&node_row_ptr_);
-          bitmask |= 4;
-        } else if (key == "heads") {
-          reader->Read(&outputs_);
-          bitmask |= 8;
-        } else if (key == "attrs") {
-          reader->Read(&attrs_);
-          bitmask |= 16;
-        }
-      }
-      CHECK_EQ(bitmask, 1|2|4|8|16) << "invalid format";
-  }
-  bool LoadDLTensor(dmlc::Stream* strm, DLTensor* tensor);
-  /*! \brief Setup the temporal storage */
-  void SetupStorage();
-  /*! \brief Setup the executors */
-  void SetupOpExecs();
-  /*!
-   * \brief Create a executtion function given input.
-   * \param attrs The node attributes
-   * \param args The arguments to the functor, including inputs and outputs.
-   * \param num_inputs Number of inputs
-   * \return The created executor.
-   */
-  std::function<void()> CreateTVMOp(const TVMOpParam& attrs,
-                                    const std::vector<DLTensor>& args,
-                                    size_t num_inputs);
-  // Get node entry index.
-  uint32_t entry_id(uint32_t nid, uint32_t index) const {
-    return node_row_ptr_[nid] + index;
-  }
-  // Get node entry index.
-  uint32_t entry_id(const NodeEntry& e) const {
-    return entry_id(e.node_id, e.index);
-  }
-  // Number of node entries
-  uint32_t num_node_entries() const {
-    return node_row_ptr_.back();
-  }
-  // Number of nodes.
-  uint32_t num_nodes() const {
-    return static_cast<uint32_t>(nodes_.size());
-  }
-  // The graph nodes.
-  std::vector<Node> nodes_;
-  // The argument nodes.
-  std::vector<uint32_t> input_nodes_;
-  // used or quick entry indexing
-  std::vector<uint32_t> node_row_ptr_;
-  // output entries
-  std::vector<NodeEntry> outputs_;
-  // Additional graph attributes
-  GraphAttr attrs_;
-  /*! \brief The code module */
-  tvm::runtime::Module module_;
-  /*! \brief execution context */
-  TVMContext ctx_;
-  /*! \brief common storage pool */
-  std::vector<DLTensor*> storage_pool_;
-  /*! \brief data entry of each node */
-  std::vector<DLTensor> data_entry_;
-  /*! \brief operator on each node */
-  std::vector<std::function<void()> > op_execs_;
-};
-
-DMLC_REGISTER_PARAMETER(TVMOpParam);
-
-bool GraphRuntime::LoadDLTensor(dmlc::Stream* strm, DLTensor* tensor) {
-  uint64_t header, reserved;
-  CHECK(strm->Read(&header, sizeof(header)))
-      << "Invalid DLTensor file format";
-  CHECK(strm->Read(&reserved, sizeof(reserved)))
-      << "Invalid DLTensor file format";
-  CHECK(header == kTVMNDArrayMagic)
-      << "Invalid DLTensor file format";
-
-  CHECK(strm->Read(&tensor->ctx, sizeof(tensor->ctx)))
-      << "Invalid DLTensor file format";
-  CHECK(strm->Read(&tensor->ndim, sizeof(tensor->ndim)))
-      << "Invalid DLTensor file format";
-  CHECK(strm->Read(&tensor->dtype, sizeof(tensor->dtype)))
-      << "Invalid DLTensor file format";
-
-  int ndim = tensor->ndim;
-  CHECK(strm->Read(tensor->shape, sizeof(int64_t) * ndim))
-      << "Invalid DLTensor file format";
-
-  int64_t size = 1;
-  int type_size = tensor->dtype.bits / 8;
-  for (int i = 0; i < ndim; ++i) {
-    size *= tensor->shape[i];
-  }
-  int64_t data_byte_size;
-  CHECK(strm->Read(&data_byte_size, sizeof(data_byte_size)))
-      << "Invalid DLTensor file format";
-  CHECK(data_byte_size == type_size * size)
-      << "Invalid DLTensor file format";
-  CHECK(strm->Read(tensor->data, type_size * size))
-      << "Invalid DLTensor file format";
-  return true;
-}
-
-void GraphRuntime::LoadParams(dmlc::Stream* strm) {
-  uint64_t header, reserved;
-  CHECK(strm->Read(&header))
-      << "Invalid parameters file format";
-  CHECK(header == kTVMNDArrayListMagic)
-      << "Invalid parameters file format";
-  CHECK(strm->Read(&reserved))
-      << "Invalid parameters file format";
-
-  std::vector<std::string> names;
-  CHECK(strm->Read(&names))
-      << "Invalid parameters file format";
-  uint64_t sz;
-  strm->Read(&sz, sizeof(sz));
-  size_t size = static_cast<size_t>(sz);
-
-  CHECK(size == names.size())
-      << "Invalid parameters file format";
-
-  for (size_t i = 0; i < size; ++i) {
-    uint32_t in_idx = GetInputIndex(names[i]);
-    CHECK(LoadDLTensor(strm, &data_entry_[this->entry_id(input_nodes_[in_idx], 0)]))
-        << "Invalid parameters file format";
-  }
-}
-
-void GraphRuntime::SetupStorage() {
-  // Grab saved optimization plan from graph.
-  std::vector<TVMType> vtype;
-  for (const std::string& s_type : attrs_.dltype) {
-    vtype.push_back(tvm::runtime::String2TVMType(s_type));
-  }
-  data_entry_.resize(num_node_entries());
-  // Find the maximum space size.
-  int max_id = 0;
-  for (size_t i = 0; i < attrs_.shape.size(); ++i) {
-    max_id = std::max(attrs_.storage_id[i] + 1, max_id);
-  }
-  for (uint32_t nid : input_nodes_) {
-    attrs_.storage_id[this->entry_id(nid, 0)] = max_id++;
-  }
-  // size of each storage pool entry
-  std::vector<size_t> pool_entry_bytes;
-  // Find the maximum space size.
-  for (size_t i = 0; i < attrs_.shape.size(); ++i) {
-    int storage_id = attrs_.storage_id[i];
-    size_t size = 1;
-    for (int64_t sz : attrs_.shape[i]) {
-      size *= static_cast<size_t>(sz);
-    }
-    CHECK_GE(storage_id, 0) << "Do not support runtime shape op";
-    DLDataType t = vtype[i];
-    size_t bits = t.bits * t.lanes;
-    CHECK_EQ(bits % 8U, 0U);
-    size_t bytes = (bits / 8U) * size;
-
-    size_t sid = static_cast<size_t>(storage_id);
-    if (sid >= pool_entry_bytes.size()) {
-      pool_entry_bytes.resize(sid + 1, 0);
-    }
-    pool_entry_bytes[sid] = std::max(pool_entry_bytes[sid], bytes);
-  }
-  // Allocate the space.
-  for (size_t i = 0; i < pool_entry_bytes.size(); ++i) {
-    int64_t shape[] = {static_cast<int64_t>(pool_entry_bytes[i] + 3) / 4};
-    DLTensor* tensor;
-    TVM_CCALL(TVMArrayAlloc(
-        shape, 1, kFloat, 32, 1, ctx_.device_type, ctx_.device_id, &tensor));
-    storage_pool_.push_back(tensor);
-  }
-  // Assign the pooled entries.
-  for (size_t i = 0; i < data_entry_.size(); ++i) {
-    int storage_id = attrs_.storage_id[i];
-    data_entry_[i] = *storage_pool_[storage_id];
-    data_entry_[i].shape = const_cast<int64_t*>(attrs_.shape[i].data());
-    data_entry_[i].ndim = static_cast<int>(attrs_.shape[i].size());
-    data_entry_[i].dtype = vtype[i];
-  }
-}
-
-/*! \brief Setup the executors */
-void GraphRuntime::SetupOpExecs() {
-  op_execs_.resize(this->num_nodes());
-  // setup the array and requirements.
-  for (uint32_t nid = 0; nid < this->num_nodes(); ++nid) {
-    const auto& inode = nodes_[nid];
-    if (inode.op_type == "null") continue;
-    std::vector<DLTensor> args;
-    for (const auto& e : inode.inputs) {
-      args.push_back(data_entry_[this->entry_id(e)]);
-    }
-    for (uint32_t index = 0; index < inode.param.num_outputs; ++index) {
-      uint32_t eid = this->entry_id(nid, index);
-      args.push_back(data_entry_[eid]);
-    }
-    CHECK_EQ(inode.op_type, "tvm_op")
-        << "transform the graph to tvm op5A";
-    op_execs_[nid] = CreateTVMOp(inode.param, args, inode.inputs.size());
-  }
-}
-
-std::function<void()> GraphRuntime::CreateTVMOp(
-    const TVMOpParam& param,
-    const std::vector<DLTensor>& args,
-    size_t num_inputs) {
-  struct OpArgs {
-    std::vector<DLTensor> args;
-    std::vector<TVMValue> arg_values;
-    std::vector<int> arg_tcodes;
-    std::vector<int64_t> shape_data;
-  };
-  std::shared_ptr<OpArgs> arg_ptr = std::make_shared<OpArgs>();
-  // setup address.
-  arg_ptr->args = std::move(args);
-  if (param.flatten_data) {
-    arg_ptr->shape_data.resize(arg_ptr->args.size());
-  }
-  for (size_t i = 0; i < arg_ptr->args.size(); ++i) {
-    TVMValue v;
-    DLTensor* t = &(arg_ptr->args[i]);
-    v.v_handle = t;
-    arg_ptr->arg_values.push_back(v);
-    arg_ptr->arg_tcodes.push_back(kArrayHandle);
-    if (param.flatten_data) {
-      arg_ptr->shape_data[i] = std::accumulate(
-          t->shape, t->shape + t->ndim, 1, std::multiplies<int64_t>());
-      t->ndim = 1;
-      t->shape = &(arg_ptr->shape_data[i]);
-    }
-  }
-  // get compiled function from module.
-  tvm::runtime::PackedFunc pf = module_.GetFunction(param.func_name, false);
-  CHECK(pf != nullptr) << "no such function in module: " << param.func_name;
-  auto fexec = [arg_ptr, pf] () {
-    TVMRetValue rv;
-    TVMArgs targs(arg_ptr->arg_values.data(),
-                  arg_ptr->arg_tcodes.data(),
-                  static_cast<int>(arg_ptr->arg_values.size()));
-    pf.CallPacked(targs, &rv);
-  };
-  return fexec;
-}
-
-PackedFunc GraphRuntime::GetFunction(
-    const std::string& name,
-    const std::shared_ptr<ModuleNode>& sptr_to_self) {
-  // return member functions during query.
-  if (name == "set_input") {
-    return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        if (args[0].type_code() == kStr) {
-          this->SetInput(this->GetInputIndex(args[0]), args[1]);
-        } else {
-          this->SetInput(args[0], args[1]);
-        }
-      });
-  } else if (name == "get_output") {
-    return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        this->GetOutput(args[0], args[1]);
-      });
-  } else if (name == "run") {
-    return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        this->Run();
-      });
-  } else if (name == "load_params") {
-    return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-        this->LoadParams(args[0].operator std::string());
-      });
-  } else {
-    return PackedFunc();
-  }
-}
-
-tvm::runtime::Module GraphRuntimeCreate(std::string sym_json,
-                                        tvm::runtime::Module m,
-                                        int device_type,
-                                        int device_id) {
-  TVMContext ctx;
-  ctx.device_type = static_cast<DLDeviceType>(device_type);
-  ctx.device_id   = device_id;
-  std::shared_ptr<GraphRuntime> exec = std::make_shared<GraphRuntime>();
-  exec->Init(sym_json, m, ctx);
-  return tvm::runtime::Module(exec);
-}
-
-TVM_REGISTER_GLOBAL("nnvm.runtime.create")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    *rv = GraphRuntimeCreate(args[0], args[1], args[2], args[3]);
-  });
-
-TVM_REGISTER_GLOBAL("nnvm.runtime.remote_create")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    void* mhandle = args[1];
-    *rv = GraphRuntimeCreate(args[0],
-                             *static_cast<tvm::runtime::Module*>(mhandle),
-                             args[2], args[3]);
-  });
-}  // namespace runtime
-}  // namespace nnvm
diff --git a/nnvm/src/runtime/graph_runtime.h b/nnvm/src/runtime/graph_runtime.h
deleted file mode 100644
index 6ed130f1dd5ced7f167803d52e3770f6d5bd9f70..0000000000000000000000000000000000000000
--- a/nnvm/src/runtime/graph_runtime.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*!
- *  Copyright (c) 2017 by Contributors
- *
- *  Runtime module for graph deployment.
- *
- * \file graph_executor.h
- */
-#ifndef NNVM_RUNTIME_GRAPH_RUNTIME_H_
-#define NNVM_RUNTIME_GRAPH_RUNTIME_H_
-
-#include <dmlc/parameter.h>
-#include <string>
-
-namespace nnvm {
-namespace runtime {
-
-/*! \brief Magic number for NDArray file */
-constexpr uint64_t kTVMNDArrayMagic = 0xDD5E40F096B4A13F;
-/*! \brief Magic number for NDArray list file  */
-constexpr uint64_t kTVMNDArrayListMagic = 0xF7E58D4F05049CB7;
-
-/*! \brief operator attributes about tvm op */
-struct TVMOpParam : public dmlc::Parameter<TVMOpParam> {
-  std::string func_name;
-  uint32_t num_inputs;
-  uint32_t num_outputs;
-  uint32_t flatten_data;
-
-  DMLC_DECLARE_PARAMETER(TVMOpParam) {
-    DMLC_DECLARE_FIELD(func_name);
-    DMLC_DECLARE_FIELD(num_inputs).set_default(1);
-    DMLC_DECLARE_FIELD(num_outputs).set_default(1);
-    DMLC_DECLARE_FIELD(flatten_data).set_default(0);
-  }
-};
-
-}  // namespace runtime
-}  // namespace nnvm
-
-#endif  // NNVM_RUNTIME_GRAPH_RUNTIME_H_
diff --git a/nnvm/tests/python/compiler/test_build.py b/nnvm/tests/python/compiler/test_build.py
index 59220a7ca63e85aee15c3153d319303a47a5025a..936631185032b2ce6c4160432aaec7e498a9af31 100644
--- a/nnvm/tests/python/compiler/test_build.py
+++ b/nnvm/tests/python/compiler/test_build.py
@@ -1,9 +1,9 @@
 import numpy as np
 
 import tvm
+from tvm.contrib import graph_runtime
 import nnvm.symbol as sym
 import nnvm.compiler
-import nnvm.runtime
 from nnvm.compiler.build_module import _run_graph, precompute_prune
 
 def test_compile():
@@ -14,7 +14,7 @@ def test_compile():
     dtype = tvm.float32
     shape_dict = {"x": shape, "y": shape}
     def verify(graph, lib):
-        m = nnvm.runtime.create(graph, lib, tvm.cpu(0))
+        m = graph_runtime.create(graph, lib, tvm.cpu(0))
         # get member functions
         set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
         na = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
@@ -67,7 +67,7 @@ def test_precompute_prune():
     graph, lib, params = nnvm.compiler.build(
         z, "llvm", shape={"y": ny.shape}, params=params)
     assert graph.index.num_nodes == 4
-    m = nnvm.runtime.create(graph, lib, tvm.cpu(0))
+    m = graph_runtime.create(graph, lib, tvm.cpu(0))
     params["y"] = ny
     res = tvm.nd.empty(shape)
     m.run(**params)
diff --git a/nnvm/tests/python/compiler/test_compiler_cache.py b/nnvm/tests/python/compiler/test_compiler_cache.py
index f7666b39f00558aaf93375ab6a1f9b3dc11910fe..970b193a6875ca04129fcde59dc8f034a0c616a2 100644
--- a/nnvm/tests/python/compiler/test_compiler_cache.py
+++ b/nnvm/tests/python/compiler/test_compiler_cache.py
@@ -1,8 +1,8 @@
 import numpy as np
 import tvm
+from tvm.contrib import graph_runtime
 import nnvm.symbol as sym
 import nnvm.compiler
-import nnvm.runtime
 
 def test_compile_cache():
     x = sym.Variable("x")
@@ -12,7 +12,7 @@ def test_compile_cache():
     dtype = tvm.float32
     shape_dict = {"x": shape, "y": shape}
     def verify(graph, lib):
-        m = nnvm.runtime.create(graph, lib, tvm.cpu(0))
+        m = graph_runtime.create(graph, lib, tvm.cpu(0))
         # get member functions
         na = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
         nb = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
diff --git a/nnvm/tests/python/compiler/test_op_fusion.py b/nnvm/tests/python/compiler/test_op_fusion.py
index a7c6ca4b288d8f3db79ca882974c2a9d7bceba53..4c079b41bb81965c79d663537769690c2c7ee1e5 100644
--- a/nnvm/tests/python/compiler/test_op_fusion.py
+++ b/nnvm/tests/python/compiler/test_op_fusion.py
@@ -2,6 +2,7 @@ import nnvm
 import numpy as np
 import tvm
 import topi
+from tvm.contrib import graph_runtime
 from nnvm import symbol as sym
 from nnvm.compiler import graph_util, graph_attr
 from nnvm.testing import ctx_list
@@ -17,7 +18,7 @@ def test_ewise_injective():
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
         assert graph.index.num_nodes == 2
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         x_np = np.random.uniform(size=dshape).astype(dtype)
         m.run(x=x_np)
         out = m.get_output(0, tvm.nd.empty((10, 6)))
@@ -39,7 +40,7 @@ def test_conv_ewise_injective():
 
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         # print(graph.ir(join_entry_attrs=["shape"]))
         assert graph.index.num_nodes == 5
         # set input
@@ -66,7 +67,7 @@ def test_injective_reduce_injective():
 
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         assert graph.index.num_nodes == 2
         data = np.random.uniform(size=dshape).astype(dtype)
         m.run(x=data)
diff --git a/nnvm/tests/python/compiler/test_rpc_exec.py b/nnvm/tests/python/compiler/test_rpc_exec.py
index 4a94a1d7686e4ed3d34dc69664c2e7411aa8be19..4cfd87ad243fcc1dfe6728a3c7d5d48ca8072e32 100644
--- a/nnvm/tests/python/compiler/test_rpc_exec.py
+++ b/nnvm/tests/python/compiler/test_rpc_exec.py
@@ -1,8 +1,7 @@
-from tvm.contrib import util, rpc
 import tvm
+from tvm.contrib import util, rpc, graph_runtime
 import nnvm.symbol as sym
 import nnvm.compiler
-import nnvm.runtime
 import numpy as np
 
 def test_rpc_executor():
@@ -29,7 +28,7 @@ def test_rpc_executor():
     rlib = remote.load_module("net.o")
 
     # Create remotemodule
-    m = nnvm.runtime.create(graph, rlib, remote.cpu(0))
+    m = graph_runtime.create(graph, rlib, remote.cpu(0))
     # get member functions
     set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
     na = tvm.nd.array(np.ones(shape).astype(dtype), ctx)
diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py
index 0686f4412733b0af429bda6f80f55e1fac03f89c..83f1c25d25dfb0913a7bd6301c106c017f28cc6b 100644
--- a/nnvm/tests/python/compiler/test_top_level1.py
+++ b/nnvm/tests/python/compiler/test_top_level1.py
@@ -1,9 +1,9 @@
 import numpy as np
 import tvm
+from tvm.contrib import graph_runtime
 import topi
 import nnvm.symbol as sym
 import nnvm.compiler
-import nnvm.runtime
 from nnvm.testing.config import ctx_list
 
 def test_relu():
@@ -15,7 +15,7 @@ def test_relu():
     oshape = dshape
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         data = np.random.uniform(size=dshape).astype(dtype)
         m.run(x=data)
         data = (data < 0) * data * 0.3 + (data>0) * data - 0.2
@@ -32,7 +32,7 @@ def test_exp():
     oshape = dshape
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         data = np.random.uniform(size=dshape).astype(dtype)
         m.run(x=data)
         out = m.get_output(0, tvm.nd.empty(oshape, dtype))
@@ -49,7 +49,7 @@ def test_log():
     for target, ctx in ctx_list():
         with nnvm.compiler.build_config(opt_level=1):
             graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         data = np.random.uniform(size=dshape).astype(dtype)
         m.run(x=data)
         out = m.get_output(0, tvm.nd.empty(oshape, dtype))
@@ -66,7 +66,7 @@ def test_tanh():
     for target, ctx in ctx_list():
         with nnvm.compiler.build_config(opt_level=1):
             graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         data = np.random.uniform(size=dshape).astype(dtype)
         m.run(x=data)
         out = m.get_output(0, tvm.nd.empty(oshape, dtype))
@@ -82,7 +82,7 @@ def test_sigmoid():
     oshape = dshape
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         data = np.random.uniform(size=dshape).astype(dtype)
         m.run(x=data)
         out = m.get_output(0, tvm.nd.empty(oshape, dtype))
@@ -99,7 +99,7 @@ def test_softmax():
     for target, ctx in ctx_list():
         with nnvm.compiler.build_config(opt_level=1):
             graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         data = np.random.uniform(size=dshape).astype(dtype)
         m.run(x=data)
         out = m.get_output(0, tvm.nd.empty(oshape, dtype))
@@ -116,7 +116,7 @@ def test_log_softmax():
     for target, ctx in ctx_list():
         with nnvm.compiler.build_config(opt_level=1):
             graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         data = np.random.uniform(size=dshape).astype(dtype)
         m.run(x=data)
         out = m.get_output(0, tvm.nd.empty(oshape, dtype))
@@ -136,7 +136,7 @@ def test_dense():
     }
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, shape)
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         x_np = np.random.uniform(size=shape["x"]).astype(dtype)
         w_np = np.random.uniform(size=shape["dense_weight"]).astype(dtype)
         b_np = np.random.uniform(size=shape["dense_bias"]).astype(dtype)
@@ -162,7 +162,7 @@ def test_batchnorm():
 
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, "llvm", {"x": shape})
-        m = nnvm.runtime.create(graph, lib, tvm.cpu(0))
+        m = graph_runtime.create(graph, lib, tvm.cpu(0))
         x_np = np.random.uniform(size=shape).astype(dtype)
         mean_np = np.random.uniform(size=shape[1]).astype(dtype)
         var_np = np.random.uniform(size=shape[1]).astype(dtype)
diff --git a/nnvm/tests/python/compiler/test_top_level2.py b/nnvm/tests/python/compiler/test_top_level2.py
index c6f38e06fb789b650fd895fa2a676a0a14931a3d..312094cd31d4cf99aefef03eb737f67114c223e4 100644
--- a/nnvm/tests/python/compiler/test_top_level2.py
+++ b/nnvm/tests/python/compiler/test_top_level2.py
@@ -1,10 +1,10 @@
 import numpy as np
 
 import tvm
+from tvm.contrib import graph_runtime
 import topi
 import nnvm.symbol as sym
 import nnvm.compiler
-import nnvm.runtime
 from nnvm.testing.config import ctx_list
 
 
@@ -19,7 +19,7 @@ def test_conv2d():
     shape_dict = {"x": dshape}
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
         kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
         bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype))
@@ -42,7 +42,7 @@ def test_grouped_conv2d():
     shape_dict = {"x": dshape}
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
         kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
         bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype))
@@ -63,7 +63,7 @@ def test_max_pool2d():
     shape_dict = {"x": dshape}
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
         m.run(x=data)
         out = m.get_output(0, tvm.nd.empty(oshape, dtype))
@@ -80,7 +80,7 @@ def test_avg_pool2d():
     shape_dict = {"x": dshape}
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
         m.run(x=data)
         out = m.get_output(0, tvm.nd.empty(oshape, dtype))
@@ -97,7 +97,7 @@ def test_global_max_pool2d():
     shape_dict = {"x": dshape}
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
         m.run(x=data)
         out = m.get_output(0, tvm.nd.empty(oshape, dtype))
@@ -114,7 +114,7 @@ def test_global_avg_pool2d():
     shape_dict = {"x": dshape}
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
         m.run(x=data)
         out = m.get_output(0, tvm.nd.empty(oshape, dtype))
diff --git a/nnvm/tests/python/compiler/test_top_level4.py b/nnvm/tests/python/compiler/test_top_level4.py
index eac3178e3e45820153a5c38ffc3c9a9504f184a0..bc2c5465d4614940ecbc7cdf5277410934e248b4 100644
--- a/nnvm/tests/python/compiler/test_top_level4.py
+++ b/nnvm/tests/python/compiler/test_top_level4.py
@@ -1,9 +1,9 @@
 import numpy as np
 import tvm
+from tvm.contrib import graph_runtime
 import topi
 import nnvm.symbol as sym
 import nnvm.compiler
-import nnvm.runtime
 from nnvm.testing.config import ctx_list
 
 def verify_transpose(dshape, axes):
@@ -16,7 +16,7 @@ def verify_transpose(dshape, axes):
     dtype = "float32"
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         # set input
         data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
         m.run(x=data)
@@ -31,7 +31,7 @@ def verify_reduce(dshape, fnp, fsym, **kwargs):
     dtype = "float32"
     for target, ctx in ctx_list():
         graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         # set input
         data = np.random.uniform(size=dshape).astype(dtype)
         out_np = fnp(data + 1, **kwargs)
diff --git a/nnvm/tests/python/frontend/mxnet/test_forward.py b/nnvm/tests/python/frontend/mxnet/test_forward.py
index 380d0bca1d3a3575fe902a234c73f72d9da8c240..07acb74cfaa707aca6250893234c22dc4fc28905 100644
--- a/nnvm/tests/python/frontend/mxnet/test_forward.py
+++ b/nnvm/tests/python/frontend/mxnet/test_forward.py
@@ -2,9 +2,9 @@ import numpy as np
 
 import topi
 import tvm
+from tvm.contrib import graph_runtime
 import nnvm.symbol as sym
 import nnvm.compiler
-import nnvm.runtime
 from nnvm.testing.config import ctx_list
 from nnvm import frontend
 import mxnet as mx
@@ -28,7 +28,7 @@ def test_mxnet_frontend_impl(mx_symbol, data_shape=(1, 3, 224, 224), out_shape=(
         dshape = x.shape
         shape_dict = {'data': dshape}
         graph, lib, params = nnvm.compiler.build(new_sym, target, shape_dict, params=params)
-        m = nnvm.runtime.create(graph, lib, ctx)
+        m = graph_runtime.create(graph, lib, ctx)
         # set inputs
         m.set_input("data", tvm.nd.array(x.astype(dtype)))
         m.set_input(**params)
diff --git a/nnvm/tutorials/mobilenet_inference_gpu.py b/nnvm/tutorials/mobilenet_inference_gpu.py
index 9343316b38962d68a3ee4331fc8e438fa8819f9d..3f68c6ff255f6664fd88af3cec3db320b21e322c 100644
--- a/nnvm/tutorials/mobilenet_inference_gpu.py
+++ b/nnvm/tutorials/mobilenet_inference_gpu.py
@@ -8,10 +8,9 @@ This is an example of using NNVM to compile MobileNet model and deploy its infer
 To begin with, we import nnvm(for compilation) and TVM(for deployment).
 """
 import tvm
+from tvm.contrib import nvcc, graph_runtime
 import nnvm.compiler
-import nnvm.runtime
 import nnvm.testing
-from tvm.contrib import nvcc
 
 ######################################################################
 # Register the NVCC Compiler Option
@@ -56,7 +55,7 @@ net, params = nnvm.testing.mobilenet.get_workload(
 #
 # To compile the graph, we call the build function with the graph
 # configuration and parameters.
-# When parameters are provided, NNVM will pre-compute certain part of the graph if possible, 
+# When parameters are provided, NNVM will pre-compute certain part of the graph if possible,
 # the new parameter set returned as the third return value.
 
 graph, lib, params = nnvm.compiler.build(
@@ -66,12 +65,14 @@ graph, lib, params = nnvm.compiler.build(
 # Run the Compiled Module
 # -----------------------
 #
-# To deploy the module, we call :any:`nnvm.runtime.create` passing in the graph the lib and context.
+# To deploy the module, we call :any:`tvm.contrib.graph_runtime.create` passing in the graph the lib and context.
 # Thanks to TVM, we can deploy the compiled module to many platforms and languages.
 # The deployment module is designed to contain minimum dependencies.
 # This example runs on the same machine.
+#
+# Note that the code below no longer depends on NNVM, and only relies TVM's runtime to run(deploy).
 
-module = nnvm.runtime.create(graph, lib, ctx)
+module = graph_runtime.create(graph, lib, ctx)
 # set input
 module.set_input(**params)
 # run