diff --git a/src/codegen/llvm/codegen_amdgpu.cc b/src/codegen/llvm/codegen_amdgpu.cc
index 9cccdf4466fd6a432b1829095a1113e1425a06a5..d1a0716bc1d9760093bf0fbaaadcaa109a4e3080 100644
--- a/src/codegen/llvm/codegen_amdgpu.cc
+++ b/src/codegen/llvm/codegen_amdgpu.cc
@@ -47,8 +47,10 @@ class CodeGenAMDGPU : public CodeGenLLVM {
       if (info.scope.rank == runtime::StorageRank::kLocal) {
         // const int local_address_space = 5;
         // TODO(tqchen): for higher version of LLVM, local address space can be set.
-        llvm::AllocaInst* alloca = builder_->CreateAlloca(
-            LLVMType(op->type), ConstInt32(constant_size));
+        llvm::AllocaInst* alloca = WithFunctionEntry([&]() {
+            return builder_->CreateAlloca(
+                LLVMType(op->type), ConstInt32(constant_size));
+          });
         if (alloca->getAlignment() < static_cast<uint32_t>(info.alignment)) {
           alloca->setAlignment(info.alignment);
         }
diff --git a/src/codegen/llvm/codegen_cpu.cc b/src/codegen/llvm/codegen_cpu.cc
index 436c727f86f0b606156ca13828de2c568e5d2030..4e005346624b327d57cf1b44765e530ef43581f6 100644
--- a/src/codegen/llvm/codegen_cpu.cc
+++ b/src/codegen/llvm/codegen_cpu.cc
@@ -503,7 +503,9 @@ llvm::Value* CodeGenCPU::GetPackedFuncHandle(const std::string& fname) {
       handle_not_null, end_block, init_block, md_very_likely_branch_);
   // Initialize the handle if needed.
   builder_->SetInsertPoint(init_block);
-  llvm::Value* out = builder_->CreateAlloca(t_tvm_func_handle_);
+  llvm::Value* out = WithFunctionEntry([&]() {
+      return builder_->CreateAlloca(t_tvm_func_handle_);
+    });
   llvm::LoadInst* ctx = builder_->CreateAlignedLoad(
       gv_mod_ctx_, gv_mod_ctx_->getAlignment());
   ctx->setMetadata(
@@ -513,6 +515,8 @@ llvm::Value* CodeGenCPU::GetPackedFuncHandle(const std::string& fname) {
       RuntimeTVMGetFuncFromEnv(), {ctx, GetConstString(fname), out});
   init_block = CheckCallSuccess(retcode);
   llvm::Value* loaded_handle = builder_->CreateAlignedLoad(out, align);
+  // Store the handle
+  builder_->CreateStore(loaded_handle, hptr);
   builder_->CreateBr(end_block);
   // end block
   builder_->SetInsertPoint(end_block);
@@ -637,19 +641,23 @@ llvm::Value* CodeGenCPU::CreateIntrinsic(const Call* op) {
   } else if (op->is_intrinsic(intrinsic::tvm_stack_alloca)) {
     CHECK_EQ(op->args.size(), 2U);
     const std::string& type = op->args[0].as<StringImm>()->value;
-    llvm::Value* num = MakeValue(op->args[1]);
-    if (type == "shape") {
-      return builder_->CreateAlloca(t_tvm_shape_index_, num);
-    } else if (type == "arg_value") {
-      return builder_->CreateAlloca(t_tvm_value_, num);
-    } else if (type == "arg_tcode") {
-      return builder_->CreateAlloca(t_int_, num);
-    } else if (type == "array") {
-      return builder_->CreateAlloca(t_tvm_array_, num);
-    } else {
-      LOG(FATAL) << "Unknown stack alloca type " << type;
-      return nullptr;
-    }
+    return WithFunctionEntry([&]() -> llvm::AllocaInst* {
+        const int64_t* pval = as_const_int(op->args[1]);
+        CHECK(pval) << "require stack alloca to contain constant value";
+        llvm::Value* num = ConstInt32(pval[0]);
+        if (type == "shape") {
+          return builder_->CreateAlloca(t_tvm_shape_index_, num);
+        } else if (type == "arg_value") {
+          return builder_->CreateAlloca(t_tvm_value_, num);
+        } else if (type == "arg_tcode") {
+          return builder_->CreateAlloca(t_int_, num);
+        } else if (type == "array") {
+          return builder_->CreateAlloca(t_tvm_array_, num);
+        } else {
+          LOG(FATAL) << "Unknown stack alloca type " << type;
+          return nullptr;
+        }
+      });
   } else {
     return CodeGenLLVM::CreateIntrinsic(op);
   }
diff --git a/src/codegen/llvm/codegen_llvm.cc b/src/codegen/llvm/codegen_llvm.cc
index c1b1fe24f0a8e79343b5e36de4dc82effd1e12d1..22319aa926fb767b7b0e76b33e6a2094a955ec30 100644
--- a/src/codegen/llvm/codegen_llvm.cc
+++ b/src/codegen/llvm/codegen_llvm.cc
@@ -1049,8 +1049,10 @@ void CodeGenLLVM::VisitStmt_(const Allocate* op) {
     if (info.alignment > 16) {
       info.alignment = 16;
     }
-    llvm::AllocaInst* alloca = builder_->CreateAlloca(
-        LLVMType(op->type), ConstInt32(constant_size));
+    llvm::AllocaInst* alloca = WithFunctionEntry([&]() {
+        return builder_->CreateAlloca(
+            LLVMType(op->type), ConstInt32(constant_size));
+      });
     if (alloca->getAlignment() < static_cast<uint32_t>(info.alignment)) {
       alloca->setAlignment(info.alignment);
     }
diff --git a/src/codegen/llvm/codegen_llvm.h b/src/codegen/llvm/codegen_llvm.h
index d0cee581a0b63f6e142025d201ced5615928851f..080306310370961f46f25264e198c5c1e7e364b5 100644
--- a/src/codegen/llvm/codegen_llvm.h
+++ b/src/codegen/llvm/codegen_llvm.h
@@ -132,6 +132,26 @@ class CodeGenLLVM :
     /*! \brief The alignment of allocation */
     int alignment{0};
   };
+  /*!
+   * \brief Execute falloca at the beginning of the
+   *  currrent function and obtain its return value.
+   *
+   *  This is a helper function to make sure that
+   *  alloca always happen in the beginning of the function.
+   *
+   * \param falloca The allocation function to be executed.
+   * \tparam F The function to be executed.
+   * \return The result.
+   */
+  template<typename F>
+  inline llvm::AllocaInst* WithFunctionEntry(F falloca) {
+    llvm::BasicBlock* current = builder_->GetInsertBlock();
+    llvm::BasicBlock* entry = &(function_->getEntryBlock());
+    builder_->SetInsertPoint(entry, entry->begin());
+    llvm::AllocaInst* res = falloca();
+    builder_->SetInsertPoint(current);
+    return res;
+  }
   // create intrinstic given call
   virtual llvm::Value* CreateIntrinsic(const Call* op);
   // create extern function call
diff --git a/src/codegen/llvm/codegen_nvptx.cc b/src/codegen/llvm/codegen_nvptx.cc
index 6bc6ccaff5829b0f5da69c8eb2235a627756a0c7..2d416d34ea0c7b78feb5f17c996d61d188505571 100644
--- a/src/codegen/llvm/codegen_nvptx.cc
+++ b/src/codegen/llvm/codegen_nvptx.cc
@@ -49,8 +49,10 @@ class CodeGenNVPTX : public CodeGenLLVM {
       if (info.scope.rank == runtime::StorageRank::kLocal) {
         // const int local_address_space = 5;
         // TODO(tqchen): for higher version of LLVM, local address space can be set.
-        llvm::AllocaInst* alloca = builder_->CreateAlloca(
-            LLVMType(op->type), ConstInt32(constant_size));
+        llvm::AllocaInst* alloca = WithFunctionEntry([&]() {
+            return builder_->CreateAlloca(
+                LLVMType(op->type), ConstInt32(constant_size));
+          });
         if (alloca->getAlignment() < static_cast<uint32_t>(info.alignment)) {
           alloca->setAlignment(info.alignment);
         }