diff --git a/src/codegen/llvm/llvm_common.cc b/src/codegen/llvm/llvm_common.cc
index 5ecdd35641099d93cf89ac3736e991e9d71dc0e7..44fd056241977279445d89b1634f13cd084d6d75 100644
--- a/src/codegen/llvm/llvm_common.cc
+++ b/src/codegen/llvm/llvm_common.cc
@@ -46,6 +46,7 @@ GetLLVMTargetMachine(const std::string& target_str) {
   std::string target_triple = "";
   std::string cpu = "generic";
   std::string attr = "";
+  bool soft_float_abi = false;
   std::string key, value;
   if (target_str.length() > 5) {
     std::istringstream is(target_str.substr(5, target_str.length() - 5));
@@ -67,6 +68,14 @@ GetLLVMTargetMachine(const std::string& target_str) {
         cpu = value;
       } else if (key == "-mattr") {
         attr = value;
+      } else if (key == "-mfloat-abi") {
+        if (value == "hard") {
+          soft_float_abi = false;
+        } else if (value == "soft") {
+          soft_float_abi = true;
+        } else {
+          LOG(FATAL) << "invalid -mfloat-abi option " << value;
+        }
       } else {
         LOG(FATAL) << "unknown option " << key;
       }
@@ -80,7 +89,18 @@ GetLLVMTargetMachine(const std::string& target_str) {
   const llvm::Target* target =
       llvm::TargetRegistry::lookupTarget(target_triple, err);
   CHECK(target) << err << " target_triple=" << target_triple;
+  // set target option
   llvm::TargetOptions opt;
+  opt.LessPreciseFPMADOption = true;
+  opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
+  opt.UnsafeFPMath = true;
+  opt.NoInfsFPMath = true;
+  opt.NoNaNsFPMath = true;
+  if (soft_float_abi) {
+    opt.FloatABIType = llvm::FloatABI::Soft;
+  } else {
+    opt.FloatABIType = llvm::FloatABI::Hard;
+  }
   auto rmodel = llvm::Reloc::PIC_;
   llvm::TargetMachine* tm =
       target->createTargetMachine(target_triple, cpu, attr, opt, rmodel);
diff --git a/src/codegen/llvm/llvm_module.cc b/src/codegen/llvm/llvm_module.cc
index cfa4510d111e1dac7ed00d2482a8670745366348..3b7d4753572d5b98e5bd4fb94d9e28f74bd81e63 100644
--- a/src/codegen/llvm/llvm_module.cc
+++ b/src/codegen/llvm/llvm_module.cc
@@ -66,6 +66,13 @@ class LLVMModuleNode final : public runtime::ModuleNode {
           pass, dest, llvm::TargetMachine::CGFT_ObjectFile) == 0)
           << "Cannot emit target CGFT_ObjectFile";
       pass.run(*mptr_);
+    } else if (fmt == "s" || fmt == "asm") {
+      llvm::legacy::PassManager pass;
+      CHECK(tm_);
+      CHECK(tm_->addPassesToEmitFile(
+          pass, dest, llvm::TargetMachine::CGFT_AssemblyFile) == 0)
+          << "Cannot emit target CGFT_AssemblyFile";
+      pass.run(*mptr_);
     } else if (fmt == "ll") {
       mptr_->print(dest, nullptr);
     } else if (fmt == "bc") {
diff --git a/tests/python/unittest/test_codegen_cross_llvm.py b/tests/python/unittest/test_codegen_cross_llvm.py
index 38f82d6ed567d08071fe4884e3d48de213be1ad5..ff504672ea78e08f5e62c8ad61d7486968204974 100644
--- a/tests/python/unittest/test_codegen_cross_llvm.py
+++ b/tests/python/unittest/test_codegen_cross_llvm.py
@@ -45,6 +45,8 @@ def test_llvm_add_pipeline():
         path = temp.relpath("myadd.o")
         f.save(path)
         verify_elf(path, 0x28)
+        asm_path = temp.relpath("myadd.asm")
+        f.save(asm_path)
         # Do a RPC verification, launch kernel on Arm Board if available.
         host = os.environ.get('TVM_RPC_ARM_HOST', None)
         remote = None