MarvinTeichmann · tylercode362 · Mar 16, 2017 · Mar 16, 2017 · Mar 20, 2017 · Mar 20, 2017
diff --git a/README.md b/README.md
@@ -13,14 +13,15 @@ The repository contains code for training, evaluating and visualizing semantic s
 
 ## Requirements
 
-The code requires [Tensorflow 1.0](https://www.tensorflow.org/install/) as well as the following python libraries: 
+The code requires [Tensorflow 1.0](https://www.tensorflow.org/install/), python 2.7 as well as the following python libraries: 
 
 * matplotlib
 * numpy
 * Pillow
 * scipy
+* commentjson
 
-Those modules can be installed using: `pip install numpy scipy pillow matplotlib` or `pip install -r requirements.txt`.
+Those modules can be installed using: `pip install numpy scipy pillow matplotlib commentjson` or `pip install -r requirements.txt`.
 
 
 ## Setup
@@ -104,7 +105,7 @@ KittiSeg is build on top of the TensorVision [TensorVision](https://github.com/T
 To utilize the entire TensorVision functionality install it using 
 
 `$ cd KittiSeg/submodules/TensorVision` <br>
-`$ python setup install`
+`$ python setup.py install`
 
 Now you can use the TensorVision command line tools, which includes:
 

diff --git a/demo.py b/demo.py
@@ -211,6 +211,17 @@ def main(_):
     logging.info("Green plot of predictions have been saved to: {}".format(
         os.path.realpath(green_image_name)))
 
+    logging.info("")
+    logging.warning("Do NOT use this Code to evaluate multiple images.")
+
+    logging.warning("Demo.py is **very slow** and designed "
+                    "to be a tutorial to show how the KittiSeg works.")
+    logging.warning("")
+    logging.warning("Please see this comment, if you like to apply demo.py to"
+                    "multiple images see:")
+    logging.warning("https://github.com/MarvinTeichmann/KittiBox/"
+                    "issues/15#issuecomment-301800058")
+
 
 if __name__ == '__main__':
     tf.app.run()
diff --git a/encoder/fcn8_vgg.py b/encoder/fcn8_vgg.py
@@ -51,8 +51,9 @@ def inference(hypes, images, train=True):
     logits['feed2'] = vgg_fcn.pool4
     logits['feed4'] = vgg_fcn.pool3
 
-    logits['feed4'] = vgg_fcn.pool3
-
     logits['fcn_logits'] = vgg_fcn.upscore32
 
+    logits['deep_feat'] = vgg_fcn.pool5
+    logits['early_feat'] = vgg_fcn.conv4_3
+
     return logits
diff --git a/encoder/resnet.py b/encoder/resnet.py
@@ -1,4 +1,4 @@
-'''
+"""
 The MIT License (MIT)
 
 Original Work: Copyright (c) 2016 Ryan Dahl
@@ -7,7 +7,7 @@
 Modified Work: Copyright (c) 2017 Marvin Teichmann
 
 For details see 'licenses/RESNET_LICENSE.txt'
-'''
+"""
 import tensorflow as tf
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.training import moving_averages
@@ -33,12 +33,13 @@
 IMAGENET_MEAN_BGR = [103.062623801, 115.902882574, 123.151630838, ]
 
 
-network_file = os.path.join("tensorflow_resnet_convert_1.1",
-                            "ResNet-L101.ckpt")
-
 network_url = "Not yet uploaded."
 
 
+def checkpoint_fn(layers):
+    return 'ResNet-L%d.ckpt' % layers
+
+
 def inference(hypes, images, train=True,
               num_classes=1000,
               num_blocks=[3, 4, 6, 3],  # defaults to 50-layer network
@@ -55,6 +56,8 @@ def inference(hypes, images, train=True,
         num_blocks = [3, 4, 23, 3]
     elif layers == 152:
         num_blocks = [3, 8, 36, 3]
+    else:
+        assert()
 
     if preprocess:
         x = _imagenet_preprocess(images)
@@ -67,25 +70,29 @@ def inference(hypes, images, train=True,
 
     with tf.variable_scope('scale1'):
         x = _conv(x, 64, ksize=7, stride=2)
-        x = _bn(x, is_training)
+        x = _bn(x, is_training, hypes)
         x = _relu(x)
         scale1 = x
 
     with tf.variable_scope('scale2'):
         x = _max_pool(x, ksize=3, stride=2)
-        x = stack(x, num_blocks[0], 64, bottleneck, is_training, stride=1)
+        x = stack(x, num_blocks[0], 64, bottleneck, is_training, stride=1,
+                  hypes=hypes)
         scale2 = x
 
     with tf.variable_scope('scale3'):
-        x = stack(x, num_blocks[1], 128, bottleneck, is_training, stride=2)
+        x = stack(x, num_blocks[1], 128, bottleneck, is_training, stride=2,
+                  hypes=hypes)
         scale3 = x
 
     with tf.variable_scope('scale4'):
-        x = stack(x, num_blocks[2], 256, bottleneck, is_training, stride=2)
+        x = stack(x, num_blocks[2], 256, bottleneck, is_training, stride=2,
+                  hypes=hypes)
         scale4 = x
 
     with tf.variable_scope('scale5'):
-        x = stack(x, num_blocks[3], 512, bottleneck, is_training, stride=2)
+        x = stack(x, num_blocks[3], 512, bottleneck, is_training, stride=2,
+                  hypes=hypes)
         scale5 = x
 
     logits['images'] = images
@@ -94,6 +101,9 @@ def inference(hypes, images, train=True,
     logits['feed2'] = scale4
     logits['feed4'] = scale3
 
+    logits['early_feat'] = scale3
+    logits['deep_feat'] = scale5
+
     if train:
         restore = tf.global_variables()
         hypes['init_function'] = _initalize_variables
@@ -113,13 +123,18 @@ def _initalize_variables(hypes):
 
         saver = tf.train.Saver(var_list=restore)
 
-        filename = network_file
+        layers = hypes['arch']['layers']
+
+        assert layers in [50, 101, 152]
+
+        filename = checkpoint_fn(layers)
 
         if 'TV_DIR_DATA' in os.environ:
             filename = os.path.join(os.environ['TV_DIR_DATA'], 'weights',
-                                    filename)
+                                    "tensorflow_resnet", filename)
         else:
-            filename = os.path.join('DATA', 'weights', filename)
+            filename = os.path.join('DATA', 'weights', "tensorflow_resnet",
+                                    filename)
 
         if not os.path.exists(filename):
             logging.error("File not found: {}".format(filename))
@@ -145,19 +160,21 @@ def _imagenet_preprocess(rgb):
     return bgr
 
 
-def stack(x, num_blocks, filters_internal, bottleneck, is_training, stride):
+def stack(x, num_blocks, filters_internal, bottleneck, is_training, stride,
+          hypes):
     for n in range(num_blocks):
         s = stride if n == 0 else 1
         with tf.variable_scope('block%d' % (n + 1)):
             x = block(x,
                       filters_internal,
                       bottleneck=bottleneck,
                       is_training=is_training,
-                      stride=s)
+                      stride=s,
+                      hypes=hypes)
     return x
 
 
-def block(x, filters_internal, is_training, stride, bottleneck):
+def block(x, filters_internal, is_training, stride, bottleneck, hypes):
     filters_in = x.get_shape()[-1]
 
     # Note: filters_out isn't how many filters are outputed.
@@ -175,31 +192,31 @@ def block(x, filters_internal, is_training, stride, bottleneck):
     if bottleneck:
         with tf.variable_scope('a'):
             x = _conv(x, filters_internal, ksize=1, stride=stride)
-            x = _bn(x, is_training)
+            x = _bn(x, is_training, hypes)
             x = _relu(x)
 
         with tf.variable_scope('b'):
             x = _conv(x, filters_internal, ksize=3, stride=1)
-            x = _bn(x, is_training)
+            x = _bn(x, is_training, hypes)
             x = _relu(x)
 
         with tf.variable_scope('c'):
             x = _conv(x, filters_out, ksize=1, stride=1)
-            x = _bn(x, is_training)
+            x = _bn(x, is_training, hypes)
     else:
         with tf.variable_scope('A'):
             x = _conv(x, filters_internal, ksize=3, stride=stride)
-            x = _bn(x, is_training)
+            x = _bn(x, is_training, hypes)
             x = _relu(x)
 
         with tf.variable_scope('B'):
             x = _conv(x, filters_out, ksize=3, stride=1)
-            x = _bn(x, is_training)
+            x = _bn(x, is_training, hypes)
 
     with tf.variable_scope('shortcut'):
         if filters_out != filters_in or stride != 1:
             shortcut = _conv(shortcut, filters_out, ksize=1, stride=stride)
-            shortcut = _bn(shortcut, is_training)
+            shortcut = _bn(shortcut, is_training, hypes)
 
     return _relu(x + shortcut)
 
@@ -208,7 +225,7 @@ def _relu(x):
     return tf.nn.relu(x)
 
 
-def _bn(x, is_training):
+def _bn(x, is_training, hypes):
     x_shape = x.get_shape()
     params_shape = x_shape[-1:]
     axis = list(range(len(x_shape) - 1))
@@ -231,16 +248,21 @@ def _bn(x, is_training):
 
     # These ops will only be preformed when training.
     mean, variance = tf.nn.moments(x, axis)
+
     update_moving_mean = moving_averages.assign_moving_average(moving_mean,
-                                                               mean, BN_DECAY)
+                                                               mean,
+                                                               BN_DECAY)
     update_moving_variance = moving_averages.assign_moving_average(
         moving_variance, variance, BN_DECAY)
-    tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
-    tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
+    if hypes['use_moving_average_bn']:
+        tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
+        tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
 
-    mean, variance = control_flow_ops.cond(
-        is_training, lambda: (mean, variance),
-        lambda: (moving_mean, moving_variance))
+        mean, variance = control_flow_ops.cond(
+            is_training, lambda: (mean, variance),
+            lambda: (moving_mean, moving_variance))
+    else:
+        mean, variance = mean, variance
 
     x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON)
     # x.set_shape(inputs.get_shape()) ??

diff --git a/evals/kitti_eval.py b/evals/kitti_eval.py
@@ -136,7 +136,7 @@ def evaluate(hypes, sess, image_pl, inf_out):
 
         if phase == 'val':
             start_time = time.time()
-            for i in xrange(10):
+            for i in range(10):
                 sess.run([softmax], feed_dict=feed_dict)
             dt = (time.time() - start_time)/10
 

diff --git a/hypes/KittiRes.json → hypes/KittiSeg_ResNet.json b/hypes/KittiRes.json → hypes/KittiSeg_ResNet.json
@@ -44,7 +44,6 @@
     "reseize_image": true,
     "image_height" : 384,
     "image_width" : 1248,
-
     "augment_level": 1
   },
 
@@ -64,12 +63,13 @@
       "threads": 4,
       "learning_rate": 1e-5,
       "learning_rate_step": null,
-      "max_steps": 12000
+      "max_steps": 8000
   },
   "use_fc_wd": true,
   "loss": "xentropy",
   "clip_norm" : 1.0,
   "wd": 5e-4,
   "load_pretrained": true,
+  "use_moving_average_bn": true,
   "scale_down": 0.5
 }
diff --git a/hypes/KittiSeg_ResNet50.json b/hypes/KittiSeg_ResNet50.json
@@ -0,0 +1,75 @@
+{
+  "model": {
+    "input_file": "../inputs/kitti_seg_input.py",
+    "architecture_file" : "../encoder/resnet.py",
+    "objective_file" : "../decoder/fcn.py",
+    "optimizer_file" : "../optimizer/generic_optimizer.py",
+    "evaluator_file" : "../evals/kitti_eval.py"
+  },
+
+  "path": ["../incl"],
+
+  "data": {
+    "train_file" : "data_road/train3.txt",
+    "val_file" : "data_road/val3.txt",
+    "road_color" : [255,0,255],
+    "background_color" : [255,0,0],
+    "vgg_url": "https://dl.dropboxusercontent.com/u/50333326/vgg16.npy",
+    "kitti_url": ""
+  },
+
+  "arch": {
+    "layers": 50,
+    "num_classes" : 2,
+    "image_size" : 50,
+    "weight": [1, 2],
+    "num_channels" : 3,
+    "whitening": false
+  },
+
+
+  "jitter": {
+    "random_resize": false,
+    "lower_size": 0.4,
+    "upper_size": 1.7,
+    "sig": 0.15,
+    "res_chance": 0.4,
+    "random_crop": true,
+    "crop_patch": false,
+    "patch_height": 256,
+    "patch_width": 256,
+    "max_crop": 32,
+    "crop_chance": 0.8,
+    "fix_shape": false,
+    "reseize_image": true,
+    "image_height" : 384,
+    "image_width" : 1248,
+    "augment_level": 1
+  },
+
+  "logging": {
+    "display_iter": 50,
+    "eval_iter": 250,
+    "write_iter": 250,
+    "save_iter": 2000,
+    "image_iter": 20000
+  },
+
+  "solver": {
+      "opt": "Adam",
+      "batch_size": 1,
+      "epsilon": 0.000000001,
+      "adam_eps": 0.00001,
+      "threads": 4,
+      "learning_rate": 1e-5,
+      "learning_rate_step": null,
+      "max_steps": 8000
+  },
+  "use_fc_wd": true,
+  "loss": "xentropy",
+  "clip_norm" : 1.0,
+  "wd": 5e-4,
+  "load_pretrained": true,
+  "use_moving_average_bn": true,
+  "scale_down": 0.5
+}
diff --git a/hypes/KittiVGG.json → hypes/KittiSeg_VGG.json b/hypes/KittiVGG.json → hypes/KittiSeg_VGG.json
diff --git a/inputs/kitti_seg_input.py b/inputs/kitti_seg_input.py
@@ -129,9 +129,9 @@ def _make_data_gen(hypes, phase, data_dir):
     """Return a data generator that outputs image samples.
 
     @ Returns
-    image: integer array of shape [width, height, 3].
+    image: integer array of shape [height, width, 3].
     Representing RGB value of each pixel.
-    gt_image: boolean array of shape [width, height, num_classes].
+    gt_image: boolean array of shape [height, width, num_classes].
     Set `gt_image[i,j,k] == 1` if and only if pixel i,j
     is assigned class k. `gt_image[i,j,k] == 0` otherwise.
 
@@ -352,7 +352,7 @@ def enqueue_loop(sess, enqueue_op, phase, gen):
 
     enqueue_op = q.enqueue((image_pl, label_pl))
     gen = _make_data_gen(hypes, phase, data_dir)
-    gen.next()
+    next(gen)
     # sess.run(enqueue_op, feed_dict=make_feed(data))
     if phase == 'val':
         num_threads = 1