From 7f25bf1dab0edd1a205a8ef91609f26b28dc4a13 Mon Sep 17 00:00:00 2001
From: Thierry Moreau <moreau@cs.washington.edu>
Date: Wed, 23 May 2018 15:39:12 -0700
Subject: [PATCH] [BITSTREAM SERVER] Bitstream server integration (#38)

---
 vta/NEWS.md                                   |  7 ++-
 vta/apps/pynq_rpc/README.md                   | 14 +----
 vta/examples/resnet18/pynq/README.md          | 11 +++-
 .../resnet18/pynq/imagenet_predict.py         | 15 +++--
 vta/hardware/xilinx/Makefile                  | 23 +++-----
 vta/hardware/xilinx/README.md                 |  2 -
 vta/make/config.json                          |  5 +-
 vta/make/sim_sample.json                      |  5 +-
 vta/make/vta_config.py                        | 20 ++++++-
 vta/python/vta/__init__.py                    |  3 +-
 vta/python/vta/bitstream.py                   | 55 +++++++++++++++++++
 vta/python/vta/environment.py                 | 25 +++++++--
 vta/python/vta/pkg_config.py                  |  3 +
 vta/python/vta/rpc_client.py                  | 14 ++++-
 vta/python/vta/testing/util.py                |  7 +--
 vta/tests/python/pynq/test_program_rpc.py     | 50 +++++++++--------
 16 files changed, 178 insertions(+), 81 deletions(-)
 create mode 100644 vta/python/vta/bitstream.py

diff --git a/vta/NEWS.md b/vta/NEWS.md
index 2c8fe5554..93b26f80b 100644
--- a/vta/NEWS.md
+++ b/vta/NEWS.md
@@ -1,4 +1,4 @@
-TVM Change Log
+VTA Change Log
 ==============
 
 This file records the changes in VTA stack in reverse chronological order.
@@ -6,7 +6,8 @@ This file records the changes in VTA stack in reverse chronological order.
 
 ## Initial version
 
-- Vivado based hardware
-- Driver for PYNQ
+- Vivado based hardware.
+- Driver for PYNQ board.
 - Runtime library.
 - TVM compiler stack.
+- Resnet-18 example.
diff --git a/vta/apps/pynq_rpc/README.md b/vta/apps/pynq_rpc/README.md
index 4cc2b46fb..2025cac23 100644
--- a/vta/apps/pynq_rpc/README.md
+++ b/vta/apps/pynq_rpc/README.md
@@ -31,21 +31,10 @@ From there, clone the VTA repository:
 git clone git@github.com:uwsaml/vta.git --recursive
 ```
 
-Next, clone the TVM repository:
-```bash
-git clone git@github.com:dmlc/tvm.git --recursive
-```
-
-TVM is rapidly changing, and to ensure stability, we keep track of working TVM checkpoints.
-As of now, the TVM checkpoint `e4c2af9abdcb3c7aabafba8084414d7739c17c4c` is known to work with VTA.
-```bash
-git checkout e4c2af9abdcb3c7aabafba8084414d7739c17c4c
-```
-
 Now, ssh into your **Pynq board** to build the TVM runtime with the following commands:
 ```bash
 ssh xilinx@192.168.2.99 # ssh if you haven't done so
-cd ~/tvm
+cd ~/vta/nnvm/tvm
 cp make/config.mk .
 echo USE_RPC=1 >> config.mk
 make runtime -j2
@@ -57,7 +46,6 @@ We're now ready to build the Pynq RPC server on the Pynq board.
 ```bash
 ssh xilinx@192.168.2.99 # ssh if you haven't done so
 cd ~/vta
-export TVM_PATH = /home/xilinx/tvm
 make
 ```
 
diff --git a/vta/examples/resnet18/pynq/README.md b/vta/examples/resnet18/pynq/README.md
index 51a2bc4be..aa521392a 100644
--- a/vta/examples/resnet18/pynq/README.md
+++ b/vta/examples/resnet18/pynq/README.md
@@ -4,6 +4,7 @@ In order to run this example you'll need to have:
 * VTA installed
 * TVM installed
 * NNVM installed
+* MxNet installed
 * A Pynq-based RPC server running
 
 ## VTA installation
@@ -26,9 +27,9 @@ git clone git@github.com:dmlc/tvm.git --recursive
 ```
 
 TVM is rapidly changing, and to ensure stability, we keep track of working TVM checkpoints.
-As of now, the TVM checkpoint `e4c2af9abdcb3c7aabafba8084414d7739c17c4c` is known to work with VTA.
+As of now, the TVM checkpoint `168f099155106d1188dbc54ac00acc02900a3c6f` is known to work with VTA.
 ```bash
-git checkout e4c2af9abdcb3c7aabafba8084414d7739c17c4c
+git checkout 168f099155106d1188dbc54ac00acc02900a3c6f
 ```
 
 Before building TVM, copy the `make/config.mk` file into the root TVM directory:
@@ -59,7 +60,7 @@ Clone the NNVM repository from `tqchen` in the directory of your choosing:
 git clone git@github.com:tqchen/nnvm.git --recursive
 ```
 
-To run this example, we rely on a special branch of NNVM: `qt`:
+To run this example, we rely on a special branch of NNVM until these changes get merged back into the main repo: `qt`:
 ```bash
 cd <nnvm root>
 git checkout qt
@@ -76,6 +77,10 @@ Finally update your `~/.bashrc` file to include the NNVM python libraries in you
 export PYTHONPATH=<nnvm root>/python:${PYTHONPATH}
 ```
 
+## MxNet Installation
+
+Follow the [MxNet Installation Instructions](https://mxnet.incubator.apache.org)
+
 ## Pynq RPC Server Setup
                                                        
 Follow the [Pynq RPC Server Guide](https://github.com/uwsaml/vta/tree/master/apps/pynq_rpc/README.md)
diff --git a/vta/examples/resnet18/pynq/imagenet_predict.py b/vta/examples/resnet18/pynq/imagenet_predict.py
index 554cceabd..8ec0d5a27 100644
--- a/vta/examples/resnet18/pynq/imagenet_predict.py
+++ b/vta/examples/resnet18/pynq/imagenet_predict.py
@@ -15,8 +15,6 @@ from tvm.contrib import graph_runtime, rpc, util
 
 bfactor = 1
 cfactor = 16
-host = "pynq"
-port = 9091
 verbose = False
 # only run fpga component, mark non-conv ops as nop
 debug_fpga_only = False
@@ -27,8 +25,7 @@ TEST_FILE = 'cat.jpg'
 CATEG_FILE = 'synset.txt'
 RESNET_GRAPH_FILE = 'quantize_graph.json'
 RESNET_PARAMS_FILE = 'quantize_params.pkl'
-BITSTREAM_FILE = 'vta.bit'
-for file in [TEST_FILE, CATEG_FILE, RESNET_GRAPH_FILE, RESNET_PARAMS_FILE, BITSTREAM_FILE]:
+for file in [TEST_FILE, CATEG_FILE, RESNET_GRAPH_FILE, RESNET_PARAMS_FILE]:
     if not os.path.isfile(file):
         print ("Downloading {}".format(file))
         wget.download(url+file)
@@ -43,7 +40,6 @@ target_host = "llvm -mtriple=armv7-none-linux-gnueabihf -mcpu=cortex-a9 -mattr=+
 if vta.get_env().TARGET == "sim":
     target_host = "llvm"
 
-
 synset = eval(open(os.path.join(CATEG_FILE)).read())
 image = Image.open(os.path.join(TEST_FILE)).resize((224, 224))
 
@@ -138,8 +134,17 @@ if vta.get_env().TARGET == "sim":
     remote = rpc.LocalSession()
     print("local session")
 else:
+    host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
+    assert host
+    port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091")
+    port = int(port)
     remote = rpc.connect(host, port)
 
+# Program FPGA, and build runtime if necessary
+# Overwrite bitstream with a path to your own if you built it yourself
+vta.reconfig_runtime(remote)
+vta.program_fpga(remote, bitstream=None)
+
 remote.upload(temp.relpath("graphlib.o"))
 lib = remote.load_module("graphlib.o")
 ctx = remote.ext_dev(0) if target.device_name == "vta" else remote.cpu(0)
diff --git a/vta/hardware/xilinx/Makefile b/vta/hardware/xilinx/Makefile
index 9ed7654ed..106d5d25b 100644
--- a/vta/hardware/xilinx/Makefile
+++ b/vta/hardware/xilinx/Makefile
@@ -45,6 +45,12 @@ VTA_WGT_BUFF_SIZE := $(shell ${VTA_CONFIG} --get-wgtbuffsize)
 VTA_ACC_BUFF_SIZE := $(shell ${VTA_CONFIG} --get-accbuffsize)
 VTA_OUT_BUFF_SIZE := $(shell ${VTA_CONFIG} --get-outbuffsize)
 
+#---------------------
+# FPGA Parameters
+#--------------------
+VTA_CLOCK_FREQ = $(shell ${VTA_CONFIG} --get-fpgafreq)
+VTA_TARGET_PER = $(shell ${VTA_CONFIG} --get-fpgaper)
+
 #---------------------
 # Compilation parameters
 #--------------------
@@ -52,19 +58,8 @@ VTA_OUT_BUFF_SIZE := $(shell ${VTA_CONFIG} --get-outbuffsize)
 #  Number of threads during compilation
 VTA_HW_COMP_THREADS = 8
 
-#  Target Frequency
-VTA_HW_COMP_CLOCK_FREQ = 100
-
-#  Timing closure compensation (0 for none, 3 for highest)
-VTA_HW_COMP_TIMING_COMP = 0
-
-# Derive clock target period
-TARGET_PER = \
-$(shell echo "$$(( (1000 + $(VTA_HW_COMP_CLOCK_FREQ) - 1) / $(VTA_HW_COMP_CLOCK_FREQ) - $(VTA_HW_COMP_TIMING_COMP)))" )
-
 # Derive config name
-CONF_ROOT = $(shell ${VTA_CONFIG} --cfg-str)
-CONF = $(CONF_ROOT)_$(VTA_HW_COMP_CLOCK_FREQ)MHz_$(TARGET_PER)ns
+CONF = $(shell ${VTA_CONFIG} --cfg-str)
 IP_BUILD_PATH = $(BUILD_DIR)/hls/$(CONF)
 HW_BUILD_PATH = $(BUILD_DIR)/vivado/$(CONF)
 
@@ -90,7 +85,7 @@ $(IP_PATH): $(SRC_DIR)/*
 	cd $(IP_BUILD_PATH) && \
 		$(VIVADO_HLS) -f $(SCRIPT_DIR)/hls.tcl \
 		-tclargs $(SRC_DIR) $(SIM_DIR) $(TEST_DIR) $(INCLUDE_DIR) \
-		$(MODE) $(DEBUG) $(NO_DSP) $(NO_ALU) $(TARGET_PER) \
+		$(MODE) $(DEBUG) $(NO_DSP) $(NO_ALU) $(VTA_TARGET_PER) \
 		$(VTA_INP_WIDTH) $(VTA_WGT_WIDTH) $(VTA_ACC_WIDTH) $(VTA_OUT_WIDTH) \
 		$(VTA_BATCH) $(VTA_IN_BLOCK) $(VTA_OUT_BLOCK) \
 		$(VTA_UOP_BUFF_SIZE) $(VTA_INP_BUFF_SIZE) $(VTA_WGT_BUFF_SIZE) \
@@ -104,7 +99,7 @@ $(BIT_PATH): $(IP_PATH)
 	mkdir -p $(HW_BUILD_PATH)
 	cd $(HW_BUILD_PATH) && \
 		$(VIVADO) -mode tcl -source $(SCRIPT_DIR)/vivado.tcl \
-		-tclargs $(BUILD_DIR)/hls/$(CONF) $(VTA_HW_COMP_THREADS) $(VTA_HW_COMP_CLOCK_FREQ) \
+		-tclargs $(BUILD_DIR)/hls/$(CONF) $(VTA_HW_COMP_THREADS) $(VTA_CLOCK_FREQ) \
 		$(VTA_INP_WIDTH) $(VTA_WGT_WIDTH) $(VTA_OUT_WIDTH) \
 		$(VTA_BATCH) $(VTA_IN_BLOCK) $(VTA_OUT_BLOCK) \
 		$(VTA_INP_BUFF_SIZE) $(VTA_WGT_BUFF_SIZE) $(VTA_OUT_BUFF_SIZE)
diff --git a/vta/hardware/xilinx/README.md b/vta/hardware/xilinx/README.md
index 0c68724ea..69f66d654 100644
--- a/vta/hardware/xilinx/README.md
+++ b/vta/hardware/xilinx/README.md
@@ -67,7 +67,5 @@ make
 
 The local `Makefile` containts several variables that can be tweaked by the user:
 * `VTA_HW_COMP_THREADS`: determines the number of threads used for the Vivado compilation job (default 8 threads).
-* `VTA_HW_COMP_CLOCK_FREQ`: determines the target frequency of the VTA design (default 100MHz). It can only be set to 100, 142, 167 or 200MHz.
-* `VTA_HW_COMP_TIMING_COMP`: determines how much additional slack must be provided to close timing (default 0ns). Generally when utilization is high for an FPGA design, setting this paramter to 1, 2 or 3 can help close timing.
 
 Once the compilation completes, the generated bitstream can be found under `<vta root>/build/hardware/xilinx/vivado/<design name>/export/vta.bit`. 
\ No newline at end of file
diff --git a/vta/make/config.json b/vta/make/config.json
index c64473e74..5c37108e6 100644
--- a/vta/make/config.json
+++ b/vta/make/config.json
@@ -1,5 +1,8 @@
 {
   "TARGET" : "pynq",
+  "HW_FREQ" : 100,
+  "HW_CLK_TARGET" : 8,
+  "HW_VER" : "0.0.0",
   "LOG_INP_WIDTH" : 3,
   "LOG_WGT_WIDTH" : 3,
   "LOG_ACC_WIDTH" : 5,
@@ -7,7 +10,7 @@
   "LOG_BATCH" : 0,
   "LOG_BLOCK_IN" : 4,
   "LOG_BLOCK_OUT" : 4,
-  "LOG_UOP_BUFF_SIZE" : 14,
+  "LOG_UOP_BUFF_SIZE" : 15,
   "LOG_INP_BUFF_SIZE" : 15,
   "LOG_WGT_BUFF_SIZE" : 18,
   "LOG_ACC_BUFF_SIZE" : 17
diff --git a/vta/make/sim_sample.json b/vta/make/sim_sample.json
index 747e22fce..73a00e33d 100644
--- a/vta/make/sim_sample.json
+++ b/vta/make/sim_sample.json
@@ -1,5 +1,8 @@
 {
   "TARGET" : "sim",
+  "HW_FREQ" : 100,
+  "HW_CLK_TARGET" : 8,
+  "HW_VER" : "0.0.0",
   "LOG_INP_WIDTH" : 3,
   "LOG_WGT_WIDTH" : 3,
   "LOG_ACC_WIDTH" : 5,
@@ -7,7 +10,7 @@
   "LOG_BATCH" : 0,
   "LOG_BLOCK_IN" : 4,
   "LOG_BLOCK_OUT" : 4,
-  "LOG_UOP_BUFF_SIZE" : 14,
+  "LOG_UOP_BUFF_SIZE" : 15,
   "LOG_INP_BUFF_SIZE" : 15,
   "LOG_WGT_BUFF_SIZE" : 18,
   "LOG_ACC_BUFF_SIZE" : 17
diff --git a/vta/make/vta_config.py b/vta/make/vta_config.py
index c64a11f6a..9788e433e 100644
--- a/vta/make/vta_config.py
+++ b/vta/make/vta_config.py
@@ -54,6 +54,10 @@ def main():
                         help="returns log of accum buffer size in B")
     parser.add_argument("--get-outbuffsize", action="store_true",
                         help="returns log of output buffer size in B")
+    parser.add_argument("--get-fpgafreq", action="store_true",
+                        help="returns FPGA frequency")
+    parser.add_argument("--get-fpgaper", action="store_true",
+                        help="returns HLS target clock period")
     args = parser.parse_args()
 
     if len(sys.argv) == 1:
@@ -91,7 +95,8 @@ def main():
         print(pkg.cfg_json)
 
     if args.cfg_str:
-        cfg_str = "{}x{}x{}_{}bx{}b_{}_{}_{}_{}".format(
+        # Needs to match the BITSTREAM string in python/vta/environment.py
+        cfg_str = "{}x{}x{}_{}bx{}b_{}_{}_{}_{}_{}MHz_{}ns_v{}".format(
             (1 << cfg["LOG_BATCH"]),
             (1 << cfg["LOG_BLOCK_IN"]),
             (1 << cfg["LOG_BLOCK_OUT"]),
@@ -100,8 +105,11 @@ def main():
             cfg["LOG_UOP_BUFF_SIZE"],
             cfg["LOG_INP_BUFF_SIZE"],
             cfg["LOG_WGT_BUFF_SIZE"],
-            cfg["LOG_ACC_BUFF_SIZE"])
-        print cfg_str
+            cfg["LOG_ACC_BUFF_SIZE"],
+            cfg["HW_FREQ"],
+            cfg["HW_CLK_TARGET"],
+            cfg["HW_VER"].replace('.', '_'))
+        print(cfg_str)
 
     if args.get_inpwidth:
         print(cfg["LOG_INP_WIDTH"])
@@ -139,5 +147,11 @@ def main():
     if args.get_accbuffsize:
         print(cfg["LOG_ACC_BUFF_SIZE"])
 
+    if args.get_fpgafreq:
+        print(cfg["HW_FREQ"])
+
+    if args.get_fpgaper:
+        print(cfg["HW_CLK_TARGET"])
+
 if __name__ == "__main__":
     main()
diff --git a/vta/python/vta/__init__.py b/vta/python/vta/__init__.py
index 37c2e7f34..c2dc7c30d 100644
--- a/vta/python/vta/__init__.py
+++ b/vta/python/vta/__init__.py
@@ -8,11 +8,10 @@ from __future__ import absolute_import as _abs
 
 __version__ = "0.1.0"
 
-
+from .bitstream import get_bitstream_path, download_bitstream
 from .environment import get_env, Environment
 from .rpc_client import reconfig_runtime, program_fpga
 
-
 try:
     from . import top
     from .build_module import build_config, lower, build
diff --git a/vta/python/vta/bitstream.py b/vta/python/vta/bitstream.py
new file mode 100644
index 000000000..dd0a9afdb
--- /dev/null
+++ b/vta/python/vta/bitstream.py
@@ -0,0 +1,55 @@
+"""VTA specific bitstream management library."""
+from __future__ import absolute_import as _abs
+
+import os
+import urllib
+from .environment import get_env
+
+# bitstream repo
+BITSTREAM_URL = "https://github.com/uwsaml/vta-distro/raw/master/bitstreams/"
+
+def get_bitstream_path():
+    """Returns the path to the cached bitstream corresponding to the current config
+
+    Returns
+    -------
+    bit_path: str
+        Corresponding to the filepath of the bitstream
+    """
+
+    env = get_env()
+
+    # Derive destination path
+    cache_dir = os.getenv("VTA_CACHE_PATH", os.path.join(os.getenv("HOME"), ".vta_cache/"))
+    cache_dir = os.path.join(cache_dir, env.TARGET)
+    # Create the directory if it didn't exist
+    if not os.path.exists(cache_dir):
+        os.makedirs(cache_dir)
+    bit_path = os.path.join(cache_dir, env.BITSTREAM)
+
+    return bit_path
+
+
+def download_bitstream():
+    """Downloads a cached bitstream corresponding to the current config
+    """
+
+    env = get_env()
+
+    success = False
+    bit = get_bitstream_path()
+    url = os.path.join(BITSTREAM_URL, env.TARGET)
+    url = os.path.join(url, env.HW_VER)
+    url = os.path.join(url, env.BITSTREAM)
+    # Check that the bitstream is accessible from the server
+    if urllib.urlopen(url).getcode() == 404:
+        # Raise error - the solution when this happens it to build your own bitstream and add it
+        # to your VTA_CACHE_PATH
+        raise RuntimeError(
+            "Error: {} is not available. It appears that this configuration has not been built."
+            .format(url))
+    else:
+        urllib.urlretrieve(url, bit)
+        success = True
+
+    return success
diff --git a/vta/python/vta/environment.py b/vta/python/vta/environment.py
index 41ec38aef..7391d56e2 100644
--- a/vta/python/vta/environment.py
+++ b/vta/python/vta/environment.py
@@ -24,7 +24,7 @@ class DevContext(object):
     Note
     ----
     This class is introduced so we have a clear separation
-    of developer related stuffs and user facing attributes.
+    of developer related, and user facing attributes.
     """
     # Memory id for DMA
     MEM_ID_UOP = 0
@@ -62,7 +62,7 @@ class DevContext(object):
 
 
 class Environment(object):
-    """Hareware configuration object.
+    """Hardware configuration object.
 
     This object contains all the information
     needed for compiling to a specific VTA backend.
@@ -98,23 +98,24 @@ class Environment(object):
 
     # initialization function
     def __init__(self, cfg):
-        # Log of input/activation width in bits
         self.__dict__.update(cfg)
         for key in PkgConfig.cfg_keys:
             if key not in cfg:
                 raise ValueError("Expect key %s in cfg" % key)
+        # derive output buffer size
         self.LOG_OUT_BUFF_SIZE = (
             self.LOG_ACC_BUFF_SIZE +
             self.LOG_OUT_WIDTH -
             self.LOG_ACC_WIDTH)
-        # width
+        # data type width
         self.INP_WIDTH = 1 << self.LOG_INP_WIDTH
         self.WGT_WIDTH = 1 << self.LOG_WGT_WIDTH
         self.ACC_WIDTH = 1 << self.LOG_ACC_WIDTH
+        self.OUT_WIDTH = self.INP_WIDTH
+        # tensor intrinsic shape
         self.BATCH = 1 << self.LOG_BATCH
         self.BLOCK_IN = 1 << self.LOG_BLOCK_IN
         self.BLOCK_OUT = 1 << self.LOG_BLOCK_OUT
-        self.OUT_WIDTH = self.INP_WIDTH
         # buffer size
         self.UOP_BUFF_SIZE = 1 << self.LOG_UOP_BUFF_SIZE
         self.INP_BUFF_SIZE = 1 << self.LOG_INP_BUFF_SIZE
@@ -138,6 +139,20 @@ class Environment(object):
         self.WGT_ELEM_BYTES = self.WGT_ELEM_BITS // 8
         self.ACC_ELEM_BYTES = self.ACC_ELEM_BITS // 8
         self.OUT_ELEM_BYTES = self.OUT_ELEM_BITS // 8
+        # Configuration bitstream name
+        self.BITSTREAM = "{}x{}x{}_{}bx{}b_{}_{}_{}_{}_{}MHz_{}ns_v{}.bit".format(
+            (1 << cfg["LOG_BATCH"]),
+            (1 << cfg["LOG_BLOCK_IN"]),
+            (1 << cfg["LOG_BLOCK_OUT"]),
+            (1 << cfg["LOG_INP_WIDTH"]),
+            (1 << cfg["LOG_WGT_WIDTH"]),
+            cfg["LOG_UOP_BUFF_SIZE"],
+            cfg["LOG_INP_BUFF_SIZE"],
+            cfg["LOG_WGT_BUFF_SIZE"],
+            cfg["LOG_ACC_BUFF_SIZE"],
+            cfg["HW_FREQ"],
+            cfg["HW_CLK_TARGET"],
+            cfg["HW_VER"].replace('.', '_'))
         # dtypes
         self.acc_dtype = "int%d" % self.ACC_WIDTH
         self.inp_dtype = "int%d" % self.INP_WIDTH
diff --git a/vta/python/vta/pkg_config.py b/vta/python/vta/pkg_config.py
index 36655982d..f657b2b32 100644
--- a/vta/python/vta/pkg_config.py
+++ b/vta/python/vta/pkg_config.py
@@ -22,6 +22,9 @@ class PkgConfig(object):
     """
     cfg_keys = [
         "TARGET",
+        "HW_FREQ",
+        "HW_CLK_TARGET",
+        "HW_VER",
         "LOG_INP_WIDTH",
         "LOG_WGT_WIDTH",
         "LOG_ACC_WIDTH",
diff --git a/vta/python/vta/rpc_client.py b/vta/python/vta/rpc_client.py
index 341130ed7..999250cd7 100644
--- a/vta/python/vta/rpc_client.py
+++ b/vta/python/vta/rpc_client.py
@@ -2,6 +2,7 @@
 import os
 
 from .environment import get_env
+from .bitstream import download_bitstream, get_bitstream_path
 
 def reconfig_runtime(remote):
     """Reconfigure remote runtime based on current hardware spec.
@@ -16,7 +17,7 @@ def reconfig_runtime(remote):
     freconfig(env.pkg_config().cfg_json)
 
 
-def program_fpga(remote, bitstream):
+def program_fpga(remote, bitstream=None):
     """Upload and program bistream
 
     Parameters
@@ -24,9 +25,16 @@ def program_fpga(remote, bitstream):
     remote : RPCSession
         The TVM RPC session
 
-    bitstream : str
-        Path to a local bistream file.
+    bitstream : str, optional
+        Path to a local bistream file. If unset, tries to download from cache server.
     """
+    if bitstream:
+        assert os.path.isfile(bitstream)
+    else:
+        bitstream = get_bitstream_path()
+        if not os.path.isfile(bitstream):
+            download_bitstream()
+
     fprogram = remote.get_function("tvm.contrib.vta.init")
     remote.upload(bitstream)
     fprogram(os.path.basename(bitstream))
diff --git a/vta/python/vta/testing/util.py b/vta/python/vta/testing/util.py
index 402546c0e..18701320f 100644
--- a/vta/python/vta/testing/util.py
+++ b/vta/python/vta/testing/util.py
@@ -6,7 +6,6 @@ from tvm.contrib import rpc
 from ..environment import get_env
 from . import simulator
 
-
 def run(run_func):
     """Run test function on all available env.
 
@@ -29,10 +28,10 @@ def run(run_func):
             run_func(env, rpc.LocalSession())
 
     # Run on PYNQ if env variable exists
-    pynq_host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
-    if pynq_host:
+    host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
+    if host:
         env.TARGET = "pynq"
         port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091")
         port = int(port)
-        remote = rpc.connect(pynq_host, port)
+        remote = rpc.connect(host, port)
         run_func(env, remote)
diff --git a/vta/tests/python/pynq/test_program_rpc.py b/vta/tests/python/pynq/test_program_rpc.py
index 4c252acba..22a1e69de 100644
--- a/vta/tests/python/pynq/test_program_rpc.py
+++ b/vta/tests/python/pynq/test_program_rpc.py
@@ -1,30 +1,36 @@
-import tvm
-import vta
 import os
-from tvm.contrib import rpc, util
-
-env = vta.get_env()
-host = "pynq"
-port = 9091
-target = "llvm -target=armv7-none-linux-gnueabihf"
-bit = "{}x{}x{}_{}bx{}b_{}_{}_{}_{}_100MHz_10ns.bit".format(
-		env.BATCH, env.BLOCK_IN, env.BLOCK_OUT,
-		env.INP_WIDTH, env.WGT_WIDTH,
-		env.LOG_UOP_BUFF_SIZE, env.LOG_INP_BUFF_SIZE,
-		env.LOG_WGT_BUFF_SIZE, env.LOG_ACC_BUFF_SIZE)
+import tvm
+from tvm.contrib import rpc
+from vta import get_bitstream_path, download_bitstream, program_fpga, reconfig_runtime
 
-curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
-bitstream = os.path.join(curr_path, "../../../../vta_bitstreams/bitstreams/", bit)
+def program_rpc_bitstream(path=None):
+    """Program the FPGA on the RPC server
 
-def test_program_rpc():
+    Parameters
+    ----------
+    path : path to bitstream (optional)
+    """
     assert tvm.module.enabled("rpc")
+    host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
+    if not host:
+        raise RuntimeError(
+            "Error: VTA_PYNQ_RPC_HOST environment variable not set.")
+    # If a path to a bitstream is passed, make sure that it point to a valid bitstream
+    port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091")
+    port = int(port)
     remote = rpc.connect(host, port)
-    vta.program_fpga(remote, bit)
+    program_fpga(remote, path)
 
-def test_reconfig_runtime():
+def reconfig_rpc_runtime():
+    """Reconfig the RPC server runtime
+    """
     assert tvm.module.enabled("rpc")
-    remote = rpc.connect(host, port)
-    vta.reconfig_runtime(remote)
+    host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
+    if host:
+        port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091")
+        port = int(port)
+        remote = rpc.connect(host, port)
+        reconfig_runtime(remote)
 
-test_program_rpc()
-test_reconfig_runtime()
+program_rpc_bitstream()
+reconfig_rpc_runtime()
-- 
GitLab