From b738d1d1af5723be6fcdad589e0cd1857854842a Mon Sep 17 00:00:00 2001
From: Yang Zhang <yangzhang@nvidia.com>
Date: Fri, 14 Feb 2020 15:44:26 -0800
Subject: [PATCH 1/2] fix trt regression bug

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>
---
 PyTorch/SpeechRecognition/Jasper/trt/Dockerfile           | 2 +-
 PyTorch/SpeechRecognition/Jasper/trt/perf.py              | 2 +-
 PyTorch/SpeechRecognition/Jasper/trt/perfutils.py         | 7 ++++---
 .../Jasper/trt/scripts/trt_inference_benchmark.sh         | 8 ++++----
 PyTorch/SpeechRecognition/Jasper/trt/trtutils.py          | 4 ++--
 .../Jasper/trtis/scripts/export_model_helper.sh           | 4 ++--
 6 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile b/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile
index e598a67d4..f89dd7107 100644
--- a/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile
+++ b/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile
@@ -5,7 +5,7 @@ RUN apt-get update && apt-get install -y python3
 
 WORKDIR /tmp/onnx-trt
 COPY trt/onnx-trt.patch .
-RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git submodule update --init --recursive && \
+RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git checkout 8716c9b && git submodule update --init --recursive && \
     patch -f < ../onnx-trt.patch && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
 
 
diff --git a/PyTorch/SpeechRecognition/Jasper/trt/perf.py b/PyTorch/SpeechRecognition/Jasper/trt/perf.py
index 426ee6610..17e8ec0a5 100755
--- a/PyTorch/SpeechRecognition/Jasper/trt/perf.py
+++ b/PyTorch/SpeechRecognition/Jasper/trt/perf.py
@@ -122,7 +122,7 @@ def parse_args():
     parser.add_argument("--wav", type=str, help='absolute path to .wav file (16KHz)')
     parser.add_argument("--max_workspace_size", default=0, type=int, help="Maximum GPU memory workspace size for constructed engine; needed when building")
     parser.add_argument("--transpose", action="store_true", default=False, help="If set, will transpose input")
-    parser.add_argument("--dynamic_shape", action="store_true", default=False, help="If set, use dynamic shape")
+    parser.add_argument("--static_shape", action="store_true", default=False, help="If set, use static shape otherwise dynamic shape. Dynamic shape is always preferred.")
 
     return parser.parse_args()
 
diff --git a/PyTorch/SpeechRecognition/Jasper/trt/perfutils.py b/PyTorch/SpeechRecognition/Jasper/trt/perfutils.py
index eb34987dd..9306accfd 100644
--- a/PyTorch/SpeechRecognition/Jasper/trt/perfutils.py
+++ b/PyTorch/SpeechRecognition/Jasper/trt/perfutils.py
@@ -110,9 +110,9 @@ def get_onnx(path, acoustic_model,  args):
 
 
     if args.transpose:
-        signal_shape=(args.engine_batch_size, args.seq_len, 64)
+        signal_shape=(args.engine_batch_size, int(args.seq_len), 64)
     else:
-        signal_shape=(args.engine_batch_size, 64, args.seq_len)
+        signal_shape=(args.engine_batch_size, 64, int(args.seq_len))
         
     with torch.no_grad():
         phony_signal = torch.zeros(signal_shape, dtype=torch.float, device=torch.device("cuda"))
@@ -191,6 +191,7 @@ def get_pytorch_components_and_onnx(args):
         wav, seq_len = audio_from_file(args.wav)
         if args.seq_len is None or args.seq_len == 0:
             args.seq_len = seq_len/(featurizer_config['sample_rate']/100)
+        args.seq_len = int(args.seq_len)
 
     if args.transpose:
         featurizer_config["transpose_out"] = True
@@ -237,7 +238,7 @@ def adjust_shape(am_input, args):
 
     '''
     input = am_input[0]    
-    baked_length = args.seq_len
+    baked_length = int(args.seq_len)
     
     if args.transpose:
         in_seq_len = input.shape[1]
diff --git a/PyTorch/SpeechRecognition/Jasper/trt/scripts/trt_inference_benchmark.sh b/PyTorch/SpeechRecognition/Jasper/trt/scripts/trt_inference_benchmark.sh
index 523492c4f..543d55f15 100755
--- a/PyTorch/SpeechRecognition/Jasper/trt/scripts/trt_inference_benchmark.sh
+++ b/PyTorch/SpeechRecognition/Jasper/trt/scripts/trt_inference_benchmark.sh
@@ -81,11 +81,11 @@ mkdir -p $LOG_DIR
 
 
 
-if [ "$USE_DYNAMIC_SHAPE" = "yes" ] ; then
-    DYNAMIC_PREFIX=" --dynamic_shape "
-    PREFIX=DYNAMIC
-else
+if [ "$USE_DYNAMIC_SHAPE" = "no" ] ; then
     PREFIX=BS${BATCH_SIZE}_NF${NUM_FRAMES}
+    DYNAMIC_PREFIX=" --static_shape "
+else
+    PREFIX=DYNAMIC
 fi
 
 # Currently, TRT parser for ONNX can't parse mixed-precision weights, so ONNX
diff --git a/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py b/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py
index 92460b295..74580330f 100644
--- a/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py
+++ b/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py
@@ -60,7 +60,7 @@ def build_engine_from_parser(args):
     config = builder.create_builder_config()
     config.flags = config_flags
     
-    if args.dynamic_shape:
+    if not args.static_shape:
         profile = builder.create_optimization_profile()
         if args.transpose:
             profile.set_shape("FEATURES", min=(1,192,64), opt=(args.engine_batch_size,256,64), max=(builder.max_batch_size, max_len, 64))
@@ -73,7 +73,7 @@ def build_engine_from_parser(args):
     with trt.OnnxParser(network, TRT_LOGGER) as parser:
         with open(args.onnx_path, 'rb') as model:
             parsed = parser.parse(model.read())
-            print ("Parsing returned ", parsed, "dynamic_shape= " , args.dynamic_shape, "\n")
+            print ("Parsing returned ", parsed, "dynamic_shape= " , not args.static_shape, "\n")
             return builder.build_engine(network, config=config)
 
 def deserialize_engine(engine_path, is_verbose):
diff --git a/PyTorch/SpeechRecognition/Jasper/trtis/scripts/export_model_helper.sh b/PyTorch/SpeechRecognition/Jasper/trtis/scripts/export_model_helper.sh
index d6eb0b81d..0133429cf 100755
--- a/PyTorch/SpeechRecognition/Jasper/trtis/scripts/export_model_helper.sh
+++ b/PyTorch/SpeechRecognition/Jasper/trtis/scripts/export_model_helper.sh
@@ -76,7 +76,7 @@ python  ${JASPER_REPO}/trt/perf.py \
 	--ckpt_path ${CHECKPOINT_DIR}/${CHECKPOINT} \
 	--wav=${JASPER_REPO}/notebooks/example1.wav \
 	--model_toml=${JASPER_REPO}/configs/${MODEL_CONFIG} \
-	--make_onnx --onnx_path jasper-tmp.onnx --engine_path ${MODEL_REPO}/jasper-trt/1/jasper_${ARCH}.plan --seq_len=256 --max_seq_len ${MAX_SEQUENCE_LENGTH_FOR_ENGINE} --verbose --dynamic_shape ${PREC_FLAGS} || exit 1
+	--make_onnx --onnx_path jasper-tmp.onnx --engine_path ${MODEL_REPO}/jasper-trt/1/jasper_${ARCH}.plan --seq_len=256 --max_seq_len ${MAX_SEQUENCE_LENGTH_FOR_ENGINE} --verbose ${PREC_FLAGS} || exit 1
 rm -fr jasper-tmp.onnx
 
 
@@ -89,7 +89,7 @@ python  ${JASPER_REPO}/trt/perf.py \
 	--ckpt_path ${CHECKPOINT_DIR}/${CHECKPOINT} \
 	--wav=${JASPER_REPO}/notebooks/example1.wav \
 	--model_toml=${JASPER_REPO}/configs/${MODEL_CONFIG} \
-	--make_onnx --onnx_path ${MODEL_REPO}/jasper-onnx/1/jasper.onnx --seq_len=256 --max_seq_len ${MAX_SEQUENCE_LENGTH_FOR_ENGINE} --verbose ${PREC_FLAGS} --dynamic_shape ${ADDITIONAL_TRT_ARGS} || exit 1
+	--make_onnx --onnx_path ${MODEL_REPO}/jasper-onnx/1/jasper.onnx --seq_len=256 --max_seq_len ${MAX_SEQUENCE_LENGTH_FOR_ENGINE} --verbose ${PREC_FLAGS} ${ADDITIONAL_TRT_ARGS} || exit 1
 
 mkdir -p ${MODEL_REPO}/jasper-onnx-cpu/1
 cp -f ${MODEL_REPO}/jasper-onnx/1/jasper.onnx ${MODEL_REPO}/jasper-onnx-cpu/1/jasper.onnx 

From c72df2be4aa32c13c168e16dc05646a8be7a2de5 Mon Sep 17 00:00:00 2001
From: Yang Zhang <yangzhang@nvidia.com>
Date: Fri, 14 Feb 2020 15:49:54 -0800
Subject: [PATCH 2/2] fix syntax error

---
 PyTorch/SpeechRecognition/Jasper/trt/README.md    | 2 +-
 PyTorch/SpeechRecognition/Jasper/trt/perfutils.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/PyTorch/SpeechRecognition/Jasper/trt/README.md b/PyTorch/SpeechRecognition/Jasper/trt/README.md
index e72acd221..d6545dae2 100644
--- a/PyTorch/SpeechRecognition/Jasper/trt/README.md
+++ b/PyTorch/SpeechRecognition/Jasper/trt/README.md
@@ -229,7 +229,7 @@ NUM_STEPS: Number of inference steps. If -1 runs inference on entire dataset (de
 BATCH_SIZE: data batch size (default: 64)
 NUM_FRAMES: cuts/pads all pre-processed feature tensors to this length. 100 frames ~ 1 second of audio (default: 512)
 FORCE_ENGINE_REBUILD: boolean that indicates whether an already-built TensorRT engine of equivalent precision, batch-size, and number of frames should not be used. Engines are specific to the GPU, library versions, TensorRT versions, and CUDA versions they were built in and cannot be used in a different environment. (default: "true")
-USE_DYNAMIC_SHAPE: if 'yes' uses dynamic shapes (default: ‘yes’)
+USE_DYNAMIC_SHAPE: if 'yes' uses dynamic shapes (default: ‘yes’). Dynamic shape is always preferred since it allows to reuse engines.
 ```
 
 The complete list of parameters available for `trt/scripts/trt_inference.sh` is the same as `trt/scripts/trt_inference_benchmark.sh` only with different default input arguments. In the following, only the parameters with different default values are listed:
diff --git a/PyTorch/SpeechRecognition/Jasper/trt/perfutils.py b/PyTorch/SpeechRecognition/Jasper/trt/perfutils.py
index 9306accfd..3da69fc73 100644
--- a/PyTorch/SpeechRecognition/Jasper/trt/perfutils.py
+++ b/PyTorch/SpeechRecognition/Jasper/trt/perfutils.py
@@ -105,7 +105,7 @@ def get_onnx(path, acoustic_model,  args):
     '''
     
     dynamic_dim = 0
-    if args.dynamic_shape:
+    if not args.static_shape:
         dynamic_dim = 1 if args.transpose else 2