From 34630e8147bbc78b0b281a54feb306bb0396d225 Mon Sep 17 00:00:00 2001
From: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Date: Thu, 7 Jun 2018 00:44:52 +0900
Subject: [PATCH] [NNVM][FRONTEND][Keras] Support for reusing layers (#1192)

---
 nnvm/python/nnvm/frontend/keras.py            | 46 ++++++++-------
 .../python/frontend/keras/test_forward.py     | 57 ++++++++++++++++---
 tutorials/nnvm/from_keras.py                  |  6 +-
 3 files changed, 78 insertions(+), 31 deletions(-)

diff --git a/nnvm/python/nnvm/frontend/keras.py b/nnvm/python/nnvm/frontend/keras.py
index a61fee7e3..0ca6ebf6d 100644
--- a/nnvm/python/nnvm/frontend/keras.py
+++ b/nnvm/python/nnvm/frontend/keras.py
@@ -152,8 +152,8 @@ def _convert_convolution(insym, keras_layer, symtab):
         pass
     # we insert a separate pad operator
     elif keras_layer.padding == 'same':
-        in_h = keras_layer.input.shape[1].value
-        in_w = keras_layer.input.shape[2].value
+        in_h = keras_layer.input_shape[1]
+        in_w = keras_layer.input_shape[2]
         pad_t, pad_b = _get_pad_pair(in_h, kernel_h, stride_h)
         pad_l, pad_r = _get_pad_pair(in_w, kernel_w, stride_w)
         insym = _sym.pad(data=insym, pad_width=((0, 0), (0, 0), (pad_t, pad_b), (pad_l, pad_r)))
@@ -192,8 +192,8 @@ def _convert_separable_convolution(insym, keras_layer, symtab):
         pass
     # we insert a separate pad operator
     elif keras_layer.padding == 'same':
-        in_h = keras_layer.input.shape[1].value
-        in_w = keras_layer.input.shape[2].value
+        in_h = keras_layer.input_shape[1]
+        in_w = keras_layer.input_shape[2]
         pad_t, pad_b = _get_pad_pair(in_h, kernel_h, stride_h)
         pad_l, pad_r = _get_pad_pair(in_w, kernel_w, stride_w)
         insym = _sym.pad(data=insym, pad_width=(
@@ -249,8 +249,8 @@ def _convert_pooling(insym, keras_layer, symtab):
             pass
         # we insert a separate pad operator
         elif keras_layer.padding == 'same':
-            in_h = keras_layer.input.shape[1].value
-            in_w = keras_layer.input.shape[2].value
+            in_h = keras_layer.input_shape[1]
+            in_w = keras_layer.input_shape[2]
             pad_t, pad_b = _get_pad_pair(in_h, pool_h, stride_h)
             pad_l, pad_r = _get_pad_pair(in_w, pool_w, stride_w)
             insym = _sym.pad(data=insym, pad_width=(
@@ -475,25 +475,33 @@ def from_keras(model):
     symtab = SymbolTable()
     for keras_layer in model.layers:
         if isinstance(keras_layer, keras.engine.topology.InputLayer):
-            keras_layer.name = 'data'
             symtab.get_var(keras_layer.name, must_contain=False)
         else:
-            predecessors = []
             inbound_nodes = keras_layer.inbound_nodes if hasattr(keras_layer, 'inbound_nodes') \
                        else keras_layer._inbound_nodes if hasattr(keras_layer, '_inbound_nodes') \
                        else None
             if inbound_nodes is None:
                 raise TypeError("Unknown layer type or unsupported Keras version : {}"
                                 .format(keras_layer))
-            for node in inbound_nodes:
-                for pred in node.inbound_layers:
-                    predecessors.append(pred.name)
-            if len(predecessors) == 1:
-                insym = symtab.get_var(predecessors[0], must_contain=True)
-            else:
-                insym = [symtab.get_var(pred, must_contain=True) for pred in predecessors]
-            keras_op_to_nnvm(insym, keras_layer, keras_layer.name, symtab)
-
-    returns = [symtab.get_var(i.name, must_contain=False) for i in model.output_layers]
+            for my_idx, node in enumerate(inbound_nodes):
+                insym = []
+
+                # Since Keras allows creating multiple layers from the same name instance,
+                # we append node index to the symbol name to make it unique.
+                # The one exception is InputLayer.  Changing input variable names after conversion
+                # would confuse users, so we should keep them as far as possible.  Fortunately,
+                # they are named uniquely to input_1, input_2, input_3 ... by default.
+                for pred_idx, pred in zip(node.node_indices, node.inbound_layers):
+                    if isinstance(pred, keras.engine.topology.InputLayer):
+                        _sym = symtab.get_var(pred.name, must_contain=True)
+                    else:
+                        _sym = symtab.get_var(pred.name + ':' + str(pred_idx), must_contain=True)
+                    insym.append(_sym)
+
+                if len(insym) == 1:
+                    insym = insym[0]
+                keras_op_to_nnvm(insym, keras_layer, keras_layer.name + ':' + str(my_idx), symtab)
+
+    outsym = symtab.get_var(model.output_layers[0].name + ':0')
     tvmparams = {k:tvm.nd.array(np.array(v, dtype=np.float32)) for k, v in symtab.params.items()}
-    return returns[0], tvmparams
+    return outsym, tvmparams
diff --git a/nnvm/tests/python/frontend/keras/test_forward.py b/nnvm/tests/python/frontend/keras/test_forward.py
index 5a399577f..0cc65350c 100644
--- a/nnvm/tests/python/frontend/keras/test_forward.py
+++ b/nnvm/tests/python/frontend/keras/test_forward.py
@@ -14,28 +14,31 @@ set_session(tf.Session(config=config))
 
 
 def verify_keras_frontend(keras_model):
-    in_shape = [dim.value if dim.value is not None else 1 for dim in keras_model.input_layers[0].input.shape]
+    in_shapes = []
+    for layer in keras_model.input_layers:
+        in_shapes.append(tuple(dim.value if dim.value is not None else 1 for dim in layer.input.shape))
     out_shape = [dim.value if dim.value is not None else 1 for dim in keras_model.output_layers[0].output.shape]
 
-    def get_keras_output(x, dtype='float32'):
-        return keras_model.predict(x)
+    def get_keras_output(xs, dtype='float32'):
+        return keras_model.predict(xs)
 
-    def get_tvm_output(x, target, ctx, input_name='data', dtype='float32'):
+    def get_tvm_output(xs, target, ctx, dtype='float32'):
         sym, params = nnvm.frontend.from_keras(keras_model)
-        shape_dict = {input_name : x.shape}
+        shape_dict = {name: x.shape for (name, x) in zip(keras_model.input_names, xs)}
         with nnvm.compiler.build_config(opt_level=2):
             graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params)
         m = graph_runtime.create(graph, lib, ctx)
-        m.set_input(input_name, tvm.nd.array(x.astype(dtype)))
+        for name, x in zip(keras_model.input_names, xs):
+            m.set_input(name, tvm.nd.array(x.astype(dtype)))
         m.set_input(**params)
         m.run()
         out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
         return out.asnumpy()
 
-    x = np.random.uniform(size=in_shape)
-    keras_out = get_keras_output(x)
+    xs = [np.random.uniform(size=shape) for shape in in_shapes]
+    keras_out = get_keras_output(xs)
     for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(x.transpose([0,3,1,2]), target, ctx)
+        tvm_out = get_tvm_output([x.transpose([0,3,1,2]) for x in xs], target, ctx)
         np.testing.assert_allclose(keras_out, tvm_out, rtol=1e-5, atol=1e-5)
 
     
@@ -166,6 +169,39 @@ def test_forward_mobilenet():
     verify_keras_frontend(keras_model)
 
 
+def test_forward_multi_inputs():
+    data1 = keras.layers.Input(shape=(32,32,3))
+    data2 = keras.layers.Input(shape=(32,32,3))
+    x = keras.layers.Conv2D(8, (3, 3), padding="same")(data1)
+    y = keras.layers.Conv2D(8, (3, 3), padding="same")(data2)
+    z = keras.layers.add([x, y])
+    z = keras.layers.GlobalAveragePooling2D()(z)
+    keras_model = keras.models.Model([data1, data2], z)
+    verify_keras_frontend(keras_model)
+
+
+def test_forward_reuse_layers():
+    # reuse conv2d
+    data = keras.layers.Input(shape=(32,32,3))
+    conv2d = keras.layers.Conv2D(8, (3, 3), padding="same")
+    x = conv2d(data)
+    y = conv2d(data)
+    z = keras.layers.add([x, y])
+    z = keras.layers.GlobalAveragePooling2D()(z)
+    keras_model = keras.models.Model(data, z)
+    verify_keras_frontend(keras_model)
+
+    # reuse add
+    data = keras.layers.Input(shape=(32,32,3))
+    x = keras.layers.Conv2D(8, (3, 3), padding="same")(data)
+    add = keras.layers.Add()
+    x = add([x, x])
+    x = add([x, x])
+    z = keras.layers.GlobalAveragePooling2D()(x)
+    keras_model = keras.models.Model(data, z)
+    verify_keras_frontend(keras_model)
+
+
 if __name__ == '__main__':
     test_forward_elemwise_add()
     test_forward_softmax()
@@ -182,3 +218,6 @@ if __name__ == '__main__':
     test_forward_xception()
     test_forward_resnet50()
     test_forward_mobilenet()
+
+    test_forward_multi_inputs()
+    test_forward_reuse_layers()
diff --git a/tutorials/nnvm/from_keras.py b/tutorials/nnvm/from_keras.py
index 0466d672c..402010b98 100644
--- a/tutorials/nnvm/from_keras.py
+++ b/tutorials/nnvm/from_keras.py
@@ -63,7 +63,7 @@ plt.show()
 # input preprocess
 data = np.array(img)[np.newaxis, :].astype('float32')
 data = preprocess_input(data).transpose([0, 3, 1, 2])
-print('data', data.shape)
+print('input_1', data.shape)
 
 ######################################################################
 # Compile the model on NNVM
@@ -74,7 +74,7 @@ print('data', data.shape)
 sym, params = nnvm.frontend.from_keras(keras_resnet50)
 # compile the model
 target = 'cuda'
-shape_dict = {'data': data.shape}
+shape_dict = {'input_1': data.shape}
 with nnvm.compiler.build_config(opt_level=2):
 	graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params)
 
@@ -86,7 +86,7 @@ from tvm.contrib import graph_runtime
 ctx = tvm.gpu(0)
 m = graph_runtime.create(graph, lib, ctx)
 # set inputs
-m.set_input('data', tvm.nd.array(data.astype('float32')))
+m.set_input('input_1', tvm.nd.array(data.astype('float32')))
 m.set_input(**params)
 # execute
 m.run()
-- 
GitLab