NVIDIA · nvpstr · Mar 9, 2020 · Feb 14, 2020 · Feb 14, 2020
diff --git a/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile b/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile
@@ -5,7 +5,7 @@ RUN apt-get update && apt-get install -y python3
 
 WORKDIR /tmp/onnx-trt
 COPY trt/onnx-trt.patch .
-RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git submodule update --init --recursive && \
+RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git checkout 8716c9b && git submodule update --init --recursive && \
     patch -f < ../onnx-trt.patch && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
 
 

diff --git a/PyTorch/SpeechRecognition/Jasper/trt/README.md b/PyTorch/SpeechRecognition/Jasper/trt/README.md
@@ -229,7 +229,7 @@ NUM_STEPS: Number of inference steps. If -1 runs inference on entire dataset (de
 BATCH_SIZE: data batch size (default: 64)
 NUM_FRAMES: cuts/pads all pre-processed feature tensors to this length. 100 frames ~ 1 second of audio (default: 512)
 FORCE_ENGINE_REBUILD: boolean that indicates whether an already-built TensorRT engine of equivalent precision, batch-size, and number of frames should not be used. Engines are specific to the GPU, library versions, TensorRT versions, and CUDA versions they were built in and cannot be used in a different environment. (default: "true")
-USE_DYNAMIC_SHAPE: if 'yes' uses dynamic shapes (default: ‘yes’)
+USE_DYNAMIC_SHAPE: if 'yes' uses dynamic shapes (default: ‘yes’). Dynamic shape is always preferred since it allows to reuse engines.
 ```
 
 The complete list of parameters available for `trt/scripts/trt_inference.sh` is the same as `trt/scripts/trt_inference_benchmark.sh` only with different default input arguments. In the following, only the parameters with different default values are listed:

diff --git a/PyTorch/SpeechRecognition/Jasper/trt/perf.py b/PyTorch/SpeechRecognition/Jasper/trt/perf.py
@@ -122,7 +122,7 @@ def parse_args():
     parser.add_argument("--wav", type=str, help='absolute path to .wav file (16KHz)')
     parser.add_argument("--max_workspace_size", default=0, type=int, help="Maximum GPU memory workspace size for constructed engine; needed when building")
     parser.add_argument("--transpose", action="store_true", default=False, help="If set, will transpose input")
-    parser.add_argument("--dynamic_shape", action="store_true", default=False, help="If set, use dynamic shape")
+    parser.add_argument("--static_shape", action="store_true", default=False, help="If set, use static shape otherwise dynamic shape. Dynamic shape is always preferred.")
 
     return parser.parse_args()
 

diff --git a/PyTorch/SpeechRecognition/Jasper/trt/perfutils.py b/PyTorch/SpeechRecognition/Jasper/trt/perfutils.py
@@ -105,14 +105,14 @@ def get_onnx(path, acoustic_model,  args):
     '''
 
     dynamic_dim = 0
-    if args.dynamic_shape:
+    if not args.static_shape:
         dynamic_dim = 1 if args.transpose else 2
 
 
     if args.transpose:
-        signal_shape=(args.engine_batch_size, args.seq_len, 64)
+        signal_shape=(args.engine_batch_size, int(args.seq_len), 64)
     else:
-        signal_shape=(args.engine_batch_size, 64, args.seq_len)
+        signal_shape=(args.engine_batch_size, 64, int(args.seq_len))
 
     with torch.no_grad():
         phony_signal = torch.zeros(signal_shape, dtype=torch.float, device=torch.device("cuda"))
@@ -191,6 +191,7 @@ def get_pytorch_components_and_onnx(args):
         wav, seq_len = audio_from_file(args.wav)
         if args.seq_len is None or args.seq_len == 0:
             args.seq_len = seq_len/(featurizer_config['sample_rate']/100)
+        args.seq_len = int(args.seq_len)
 
     if args.transpose:
         featurizer_config["transpose_out"] = True
@@ -237,7 +238,7 @@ def adjust_shape(am_input, args):
 
     '''
     input = am_input[0]    
-    baked_length = args.seq_len
+    baked_length = int(args.seq_len)
 
     if args.transpose:
         in_seq_len = input.shape[1]

diff --git a/PyTorch/SpeechRecognition/Jasper/trt/scripts/trt_inference_benchmark.sh b/PyTorch/SpeechRecognition/Jasper/trt/scripts/trt_inference_benchmark.sh
@@ -81,11 +81,11 @@ mkdir -p $LOG_DIR
 
 
 
-if [ "$USE_DYNAMIC_SHAPE" = "yes" ] ; then
-    DYNAMIC_PREFIX=" --dynamic_shape "
-    PREFIX=DYNAMIC
-else
+if [ "$USE_DYNAMIC_SHAPE" = "no" ] ; then
     PREFIX=BS${BATCH_SIZE}_NF${NUM_FRAMES}
+    DYNAMIC_PREFIX=" --static_shape "
+else
+    PREFIX=DYNAMIC
 fi
 
 # Currently, TRT parser for ONNX can't parse mixed-precision weights, so ONNX

diff --git a/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py b/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py
@@ -60,7 +60,7 @@ def build_engine_from_parser(args):
     config = builder.create_builder_config()
     config.flags = config_flags
 
-    if args.dynamic_shape:
+    if not args.static_shape:
         profile = builder.create_optimization_profile()
         if args.transpose:
             profile.set_shape("FEATURES", min=(1,192,64), opt=(args.engine_batch_size,256,64), max=(builder.max_batch_size, max_len, 64))
@@ -73,7 +73,7 @@ def build_engine_from_parser(args):
     with trt.OnnxParser(network, TRT_LOGGER) as parser:
         with open(args.onnx_path, 'rb') as model:
             parsed = parser.parse(model.read())
-            print ("Parsing returned ", parsed, "dynamic_shape= " , args.dynamic_shape, "\n")
+            print ("Parsing returned ", parsed, "dynamic_shape= " , not args.static_shape, "\n")
             return builder.build_engine(network, config=config)
 
 def deserialize_engine(engine_path, is_verbose):

diff --git a/PyTorch/SpeechRecognition/Jasper/trtis/scripts/export_model_helper.sh b/PyTorch/SpeechRecognition/Jasper/trtis/scripts/export_model_helper.sh
@@ -76,7 +76,7 @@ python  ${JASPER_REPO}/trt/perf.py \
 	--ckpt_path ${CHECKPOINT_DIR}/${CHECKPOINT} \
 	--wav=${JASPER_REPO}/notebooks/example1.wav \
 	--model_toml=${JASPER_REPO}/configs/${MODEL_CONFIG} \
-	--make_onnx --onnx_path jasper-tmp.onnx --engine_path ${MODEL_REPO}/jasper-trt/1/jasper_${ARCH}.plan --seq_len=256 --max_seq_len ${MAX_SEQUENCE_LENGTH_FOR_ENGINE} --verbose --dynamic_shape ${PREC_FLAGS} || exit 1
+	--make_onnx --onnx_path jasper-tmp.onnx --engine_path ${MODEL_REPO}/jasper-trt/1/jasper_${ARCH}.plan --seq_len=256 --max_seq_len ${MAX_SEQUENCE_LENGTH_FOR_ENGINE} --verbose ${PREC_FLAGS} || exit 1
 rm -fr jasper-tmp.onnx
 
 
@@ -89,7 +89,7 @@ python  ${JASPER_REPO}/trt/perf.py \
 	--ckpt_path ${CHECKPOINT_DIR}/${CHECKPOINT} \
 	--wav=${JASPER_REPO}/notebooks/example1.wav \
 	--model_toml=${JASPER_REPO}/configs/${MODEL_CONFIG} \
-	--make_onnx --onnx_path ${MODEL_REPO}/jasper-onnx/1/jasper.onnx --seq_len=256 --max_seq_len ${MAX_SEQUENCE_LENGTH_FOR_ENGINE} --verbose ${PREC_FLAGS} --dynamic_shape ${ADDITIONAL_TRT_ARGS} || exit 1
+	--make_onnx --onnx_path ${MODEL_REPO}/jasper-onnx/1/jasper.onnx --seq_len=256 --max_seq_len ${MAX_SEQUENCE_LENGTH_FOR_ENGINE} --verbose ${PREC_FLAGS} ${ADDITIONAL_TRT_ARGS} || exit 1
 
 mkdir -p ${MODEL_REPO}/jasper-onnx-cpu/1
 cp -f ${MODEL_REPO}/jasper-onnx/1/jasper.onnx ${MODEL_REPO}/jasper-onnx-cpu/1/jasper.onnx