From fdf035e82521fa9bc74aaceb3e8bdf645c283b41 Mon Sep 17 00:00:00 2001 From: Tianqi Chen <tqchen@users.noreply.github.com> Date: Tue, 6 Nov 2018 08:47:49 -0800 Subject: [PATCH] [CODEGEN][LLVM] Cache packed func ptr, lift alloca (#2070) --- src/codegen/llvm/codegen_amdgpu.cc | 6 +++-- src/codegen/llvm/codegen_cpu.cc | 36 ++++++++++++++++++------------ src/codegen/llvm/codegen_llvm.cc | 6 +++-- src/codegen/llvm/codegen_llvm.h | 20 +++++++++++++++++ src/codegen/llvm/codegen_nvptx.cc | 6 +++-- 5 files changed, 54 insertions(+), 20 deletions(-) diff --git a/src/codegen/llvm/codegen_amdgpu.cc b/src/codegen/llvm/codegen_amdgpu.cc index 9cccdf446..d1a0716bc 100644 --- a/src/codegen/llvm/codegen_amdgpu.cc +++ b/src/codegen/llvm/codegen_amdgpu.cc @@ -47,8 +47,10 @@ class CodeGenAMDGPU : public CodeGenLLVM { if (info.scope.rank == runtime::StorageRank::kLocal) { // const int local_address_space = 5; // TODO(tqchen): for higher version of LLVM, local address space can be set. - llvm::AllocaInst* alloca = builder_->CreateAlloca( - LLVMType(op->type), ConstInt32(constant_size)); + llvm::AllocaInst* alloca = WithFunctionEntry([&]() { + return builder_->CreateAlloca( + LLVMType(op->type), ConstInt32(constant_size)); + }); if (alloca->getAlignment() < static_cast<uint32_t>(info.alignment)) { alloca->setAlignment(info.alignment); } diff --git a/src/codegen/llvm/codegen_cpu.cc b/src/codegen/llvm/codegen_cpu.cc index 436c727f8..4e0053466 100644 --- a/src/codegen/llvm/codegen_cpu.cc +++ b/src/codegen/llvm/codegen_cpu.cc @@ -503,7 +503,9 @@ llvm::Value* CodeGenCPU::GetPackedFuncHandle(const std::string& fname) { handle_not_null, end_block, init_block, md_very_likely_branch_); // Initialize the handle if needed. builder_->SetInsertPoint(init_block); - llvm::Value* out = builder_->CreateAlloca(t_tvm_func_handle_); + llvm::Value* out = WithFunctionEntry([&]() { + return builder_->CreateAlloca(t_tvm_func_handle_); + }); llvm::LoadInst* ctx = builder_->CreateAlignedLoad( gv_mod_ctx_, gv_mod_ctx_->getAlignment()); ctx->setMetadata( @@ -513,6 +515,8 @@ llvm::Value* CodeGenCPU::GetPackedFuncHandle(const std::string& fname) { RuntimeTVMGetFuncFromEnv(), {ctx, GetConstString(fname), out}); init_block = CheckCallSuccess(retcode); llvm::Value* loaded_handle = builder_->CreateAlignedLoad(out, align); + // Store the handle + builder_->CreateStore(loaded_handle, hptr); builder_->CreateBr(end_block); // end block builder_->SetInsertPoint(end_block); @@ -637,19 +641,23 @@ llvm::Value* CodeGenCPU::CreateIntrinsic(const Call* op) { } else if (op->is_intrinsic(intrinsic::tvm_stack_alloca)) { CHECK_EQ(op->args.size(), 2U); const std::string& type = op->args[0].as<StringImm>()->value; - llvm::Value* num = MakeValue(op->args[1]); - if (type == "shape") { - return builder_->CreateAlloca(t_tvm_shape_index_, num); - } else if (type == "arg_value") { - return builder_->CreateAlloca(t_tvm_value_, num); - } else if (type == "arg_tcode") { - return builder_->CreateAlloca(t_int_, num); - } else if (type == "array") { - return builder_->CreateAlloca(t_tvm_array_, num); - } else { - LOG(FATAL) << "Unknown stack alloca type " << type; - return nullptr; - } + return WithFunctionEntry([&]() -> llvm::AllocaInst* { + const int64_t* pval = as_const_int(op->args[1]); + CHECK(pval) << "require stack alloca to contain constant value"; + llvm::Value* num = ConstInt32(pval[0]); + if (type == "shape") { + return builder_->CreateAlloca(t_tvm_shape_index_, num); + } else if (type == "arg_value") { + return builder_->CreateAlloca(t_tvm_value_, num); + } else if (type == "arg_tcode") { + return builder_->CreateAlloca(t_int_, num); + } else if (type == "array") { + return builder_->CreateAlloca(t_tvm_array_, num); + } else { + LOG(FATAL) << "Unknown stack alloca type " << type; + return nullptr; + } + }); } else { return CodeGenLLVM::CreateIntrinsic(op); } diff --git a/src/codegen/llvm/codegen_llvm.cc b/src/codegen/llvm/codegen_llvm.cc index c1b1fe24f..22319aa92 100644 --- a/src/codegen/llvm/codegen_llvm.cc +++ b/src/codegen/llvm/codegen_llvm.cc @@ -1049,8 +1049,10 @@ void CodeGenLLVM::VisitStmt_(const Allocate* op) { if (info.alignment > 16) { info.alignment = 16; } - llvm::AllocaInst* alloca = builder_->CreateAlloca( - LLVMType(op->type), ConstInt32(constant_size)); + llvm::AllocaInst* alloca = WithFunctionEntry([&]() { + return builder_->CreateAlloca( + LLVMType(op->type), ConstInt32(constant_size)); + }); if (alloca->getAlignment() < static_cast<uint32_t>(info.alignment)) { alloca->setAlignment(info.alignment); } diff --git a/src/codegen/llvm/codegen_llvm.h b/src/codegen/llvm/codegen_llvm.h index d0cee581a..080306310 100644 --- a/src/codegen/llvm/codegen_llvm.h +++ b/src/codegen/llvm/codegen_llvm.h @@ -132,6 +132,26 @@ class CodeGenLLVM : /*! \brief The alignment of allocation */ int alignment{0}; }; + /*! + * \brief Execute falloca at the beginning of the + * currrent function and obtain its return value. + * + * This is a helper function to make sure that + * alloca always happen in the beginning of the function. + * + * \param falloca The allocation function to be executed. + * \tparam F The function to be executed. + * \return The result. + */ + template<typename F> + inline llvm::AllocaInst* WithFunctionEntry(F falloca) { + llvm::BasicBlock* current = builder_->GetInsertBlock(); + llvm::BasicBlock* entry = &(function_->getEntryBlock()); + builder_->SetInsertPoint(entry, entry->begin()); + llvm::AllocaInst* res = falloca(); + builder_->SetInsertPoint(current); + return res; + } // create intrinstic given call virtual llvm::Value* CreateIntrinsic(const Call* op); // create extern function call diff --git a/src/codegen/llvm/codegen_nvptx.cc b/src/codegen/llvm/codegen_nvptx.cc index 6bc6ccaff..2d416d34e 100644 --- a/src/codegen/llvm/codegen_nvptx.cc +++ b/src/codegen/llvm/codegen_nvptx.cc @@ -49,8 +49,10 @@ class CodeGenNVPTX : public CodeGenLLVM { if (info.scope.rank == runtime::StorageRank::kLocal) { // const int local_address_space = 5; // TODO(tqchen): for higher version of LLVM, local address space can be set. - llvm::AllocaInst* alloca = builder_->CreateAlloca( - LLVMType(op->type), ConstInt32(constant_size)); + llvm::AllocaInst* alloca = WithFunctionEntry([&]() { + return builder_->CreateAlloca( + LLVMType(op->type), ConstInt32(constant_size)); + }); if (alloca->getAlignment() < static_cast<uint32_t>(info.alignment)) { alloca->setAlignment(info.alignment); } -- GitLab