Demuxer reopening fixes

This is quite deep and important change. Thing is, old code had "demuxer reuse" attempt. And unfortunately it neither made sense (next to nothing performance improvement, complex code, limited to MPEGTS streams), nor was working fine. During Low Latency work I tried to eliminate demuxer reusing because I wanted single demuxer logic to make input I/O plug-in easier. It turned out that so modified code failed certain tests such as Nvidia_AudioOnly. It took me a great deal of debugging, but I found out that there is bug in demuxer reuse procedure: namely only first muxer open attempt _really_ checks what is in the segment provided, the rest just assumes that same streams are there. So basically, in the series of "audio/video"/"audio without video" /"audio/video" segments all will be perceived as containing video! This "patched over" *AudioOnly tests, and let them run, but only because Transcoder "though" that it has video all the time. And this simply wasn't true. When I removed "muxer reusing" code, the Nvidia-dedicated "preserve video decoder" tactics was falling over the fact that pixel format changed (because it changed from AV_PIX_FMT_SOMETHING into AV_PIX_FMT_NONE) and tried to create hardware video decoder for AV_PIX_FMT_NONE, and failed. Fixing all that finally allowed me to get rid of recursive call to open_input when HW decoder needs to be reset. Which is good. During the work on this commit I also noticed that various pieces of code here and there assume that video stream is always there. Which is not true, and never was (for example, some tests were using segments without video in the middle of transcode sequence, and it worked only because Transcoder "failed to notice" there is no video anymore). So the whole code was carefully tested and examined against this dangerous assumption and checks were added. These changes, in turn, allowed to remove the limitations that allowed the Transcoder to start only with segments containing video. Transcoder will now happily process audio-only segments.
2026-04-22 15:57:25 +08:00 · 2022-07-22 20:01:50 +02:00
parent 663ea721bf
commit c7121378ee
10 changed files with 143 additions and 228 deletions
@@ -88,7 +88,7 @@ func TestAPI_SkippedSegment(t *testing.T) {
 }

 func TestTranscoderAPI_InvalidFile(t *testing.T) {
-	// Test the following file open results on input: fail, success, fail, success
+	// Test the following file open results on input: success, fail, success

 	tc := NewTranscoder()
 	defer tc.StopTranscoder()
@@ -100,18 +100,9 @@ func TestTranscoderAPI_InvalidFile(t *testing.T) {
 		Muxer:        ComponentOptions{Name: "null"},
 	}}

-	// fail # 1
-	in.Fname = "none"
-	_, err := tc.Transcode(in, out)
-	if err == nil || err.Error() != "TranscoderInvalidVideo" {
-		// Early codec check didn't find video in missing input file so we get `TranscoderInvalidVideo`
-		//  instead of `No such file or directory`
-		t.Error("Expected 'TranscoderInvalidVideo', got ", err)
-	}
-
 	// success # 1
 	in.Fname = "../transcoder/test.ts"
-	_, err = tc.Transcode(in, out)
+	_, err := tc.Transcode(in, out)
 	if err != nil {
 		t.Error(err)
 	}
@@ -1217,27 +1208,6 @@ func audioOnlySegment(t *testing.T, accel Acceleration) {
 		}
 	}
 	tc.StopTranscoder()
-
-	// Test encoding with audio-only segment in start of stream
-	tc = NewTranscoder()
-	defer tc.StopTranscoder()
-	for i := 2; i < 4; i++ {
-		in := &TranscodeOptionsIn{
-			Fname: fmt.Sprintf("%s/test%d.ts", dir, i),
-			Accel: accel,
-		}
-		out := []TranscodeOptions{{
-			Oname:   fmt.Sprintf("%s/out2_%d.ts", dir, i),
-			Profile: prof,
-			Accel:   accel,
-		}}
-		_, err := tc.Transcode(in, out)
-		if i == 2 && (err == nil || err.Error() != "TranscoderInvalidVideo") {
-			t.Errorf("Expected to fail for audio-only segment but did not, instead got err=%v", err)
-		} else if i != 2 && err != nil {
-			t.Error(err)
-		}
-	}
 }

 func TestTranscoder_AudioOnly(t *testing.T) {
@@ -128,21 +128,22 @@ pixfmt_cleanup:
 }


-int open_audio_decoder(struct input_ctx *ctx, AVCodec *codec)
+static int open_audio_decoder(struct input_ctx *ctx, AVCodec *codec)
 {
  int ret = 0;
  AVFormatContext *ic = ctx->ic;

  // open audio decoder
-    AVCodecContext * ac = avcodec_alloc_context3(codec);
-    if (!ac) LPMS_ERR(open_audio_err, "Unable to alloc audio codec");
-    if (ctx->ac) LPMS_WARN("An audio context was already open!");
-    ctx->ac = ac;
-    ret = avcodec_parameters_to_context(ac, ic->streams[ctx->ai]->codecpar);
-    if (ret < 0) LPMS_ERR(open_audio_err, "Unable to assign audio params");
-    ret = avcodec_open2(ac, codec, NULL);
-    if (ret < 0) LPMS_ERR(open_audio_err, "Unable to open audio decoder");
-
+  AVCodecContext * ac = avcodec_alloc_context3(codec);
+  if (!ac) LPMS_ERR(open_audio_err, "Unable to alloc audio codec");
+  if (ctx->ac) LPMS_WARN("An audio context was already open!");
+  ctx->ac = ac;
+  ret = avcodec_parameters_to_context(ac, ic->streams[ctx->ai]->codecpar);
+  if (ret < 0) LPMS_ERR(open_audio_err, "Unable to assign audio params");
+  ret = avcodec_open2(ac, codec, NULL);
+  if (ret < 0) LPMS_ERR(open_audio_err, "Unable to open audio decoder");
+  ctx->last_frame_a = av_frame_alloc();
+  if (!ctx->last_frame_a) LPMS_ERR(open_audio_err, "Unable to alloc last_frame_a");
  return 0;

 open_audio_err:
@@ -150,7 +151,13 @@ open_audio_err:
  return ret;
 }

-char* get_hw_decoder(int ff_codec_id, int hw_type)
+static void close_audio_decoder(struct input_ctx *ictx)
+{
+  if (ictx->ac) avcodec_free_context(&ictx->ac);
+  if (ictx->last_frame_a) av_frame_free(&ictx->last_frame_a);
+}
+
+static char* get_hw_decoder(int ff_codec_id, int hw_type)
 {
    switch (hw_type) {
        case AV_HWDEVICE_TYPE_CUDA:
@@ -184,47 +191,51 @@ char* get_hw_decoder(int ff_codec_id, int hw_type)
    }
 }

-int open_video_decoder(struct input_ctx *ctx, AVCodec *codec)
+static int open_video_decoder(struct input_ctx *ctx, AVCodec *codec)
 {
  int ret = 0;
  AVDictionary **opts = NULL;
  AVFormatContext *ic = ctx->ic;
  // open video decoder
-    if (ctx->hw_type > AV_HWDEVICE_TYPE_NONE) {
-      char* decoder_name = get_hw_decoder(codec->id, ctx->hw_type);
-      if (!*decoder_name) {
-        ret = lpms_ERR_INPUT_CODEC;
-        LPMS_ERR(open_decoder_err, "Input codec does not support hardware acceleration");
-      }
-      AVCodec *c = avcodec_find_decoder_by_name(decoder_name);
-      if (c) codec = c;
-      else LPMS_WARN("Nvidia decoder not found; defaulting to software");
-      if (AV_PIX_FMT_YUV420P != ic->streams[ctx->vi]->codecpar->format &&
-          AV_PIX_FMT_YUVJ420P != ic->streams[ctx->vi]->codecpar->format) {
-        // TODO check whether the color range is truncated if yuvj420p is used
-        ret = lpms_ERR_INPUT_PIXFMT;
-        LPMS_ERR(open_decoder_err, "Non 4:2:0 pixel format detected in input");
-      }
+  if (ctx->hw_type > AV_HWDEVICE_TYPE_NONE) {
+    char* decoder_name = get_hw_decoder(codec->id, ctx->hw_type);
+    if (!*decoder_name) {
+      ret = lpms_ERR_INPUT_CODEC;
+      LPMS_ERR(open_decoder_err, "Input codec does not support hardware acceleration");
    }
-    AVCodecContext *vc = avcodec_alloc_context3(codec);
-    if (!vc) LPMS_ERR(open_decoder_err, "Unable to alloc video codec");
-    ctx->vc = vc;
-    ret = avcodec_parameters_to_context(vc, ic->streams[ctx->vi]->codecpar);
-    if (ret < 0) LPMS_ERR(open_decoder_err, "Unable to assign video params");
-    vc->opaque = (void*)ctx;
-    // XXX Could this break if the original device falls out of scope in golang?
-    if (ctx->hw_type == AV_HWDEVICE_TYPE_CUDA) {
-      // First set the hw device then set the hw frame
-      ret = av_hwdevice_ctx_create(&ctx->hw_device_ctx, ctx->hw_type, ctx->device, NULL, 0);
-      if (ret < 0) LPMS_ERR(open_decoder_err, "Unable to open hardware context for decoding")
-      vc->hw_device_ctx = av_buffer_ref(ctx->hw_device_ctx);
-      vc->get_format = get_hw_pixfmt;
+    AVCodec *c = avcodec_find_decoder_by_name(decoder_name);
+    if (c) codec = c;
+    else LPMS_WARN("Nvidia decoder not found; defaulting to software");
+    // It is safe to use ctx->vi here, because open_video_decoder won't be
+    // called if vi < 0
+    if (AV_PIX_FMT_YUV420P != ic->streams[ctx->vi]->codecpar->format &&
+        AV_PIX_FMT_YUVJ420P != ic->streams[ctx->vi]->codecpar->format) {
+      // TODO check whether the color range is truncated if yuvj420p is used
+      ret = lpms_ERR_INPUT_PIXFMT;
+      LPMS_ERR(open_decoder_err, "Non 4:2:0 pixel format detected in input");
    }
-    vc->pkt_timebase = ic->streams[ctx->vi]->time_base;
-    av_opt_set(vc->priv_data, "xcoder-params", ctx->xcoderParams, 0);
-    ret = avcodec_open2(vc, codec, opts);
-    if (ret < 0) LPMS_ERR(open_decoder_err, "Unable to open video decoder");
+  }

+  AVCodecContext *vc = avcodec_alloc_context3(codec);
+  if (!vc) LPMS_ERR(open_decoder_err, "Unable to alloc video codec");
+  ctx->vc = vc;
+  ret = avcodec_parameters_to_context(vc, ic->streams[ctx->vi]->codecpar);
+  if (ret < 0) LPMS_ERR(open_decoder_err, "Unable to assign video params");
+  vc->opaque = (void*)ctx;
+  // XXX Could this break if the original device falls out of scope in golang?
+  if (ctx->hw_type == AV_HWDEVICE_TYPE_CUDA) {
+    // First set the hw device then set the hw frame
+    ret = av_hwdevice_ctx_create(&ctx->hw_device_ctx, ctx->hw_type, ctx->device, NULL, 0);
+    if (ret < 0) LPMS_ERR(open_decoder_err, "Unable to open hardware context for decoding")
+    vc->hw_device_ctx = av_buffer_ref(ctx->hw_device_ctx);
+    vc->get_format = get_hw_pixfmt;
+  }
+  vc->pkt_timebase = ic->streams[ctx->vi]->time_base;
+  av_opt_set(vc->priv_data, "xcoder-params", ctx->xcoderParams, 0);
+  ret = avcodec_open2(vc, codec, opts);
+  if (ret < 0) LPMS_ERR(open_decoder_err, "Unable to open video decoder");
+  ctx->last_frame_v = av_frame_alloc();
+  if (!ctx->last_frame_v) LPMS_ERR(open_decoder_err, "Unable to alloc last_frame_v");
  return 0;

 open_decoder_err:
@@ -233,6 +244,16 @@ open_decoder_err:
  return ret;
 }

+static void close_video_decoder(struct input_ctx *ictx)
+{
+  if (ictx->vc) {
+    if (ictx->vc->hw_device_ctx) av_buffer_unref(&ictx->vc->hw_device_ctx);
+    avcodec_free_context(&ictx->vc);
+  }
+  if (ictx->hw_device_ctx) av_buffer_unref(&ictx->hw_device_ctx);
+  if (ictx->last_frame_v) av_frame_free(&ictx->last_frame_v);
+}
+
 int open_input(input_params *params, struct input_ctx *ctx)
 {
  char *inp = params->fname;
@@ -247,52 +268,51 @@ int open_input(input_params *params, struct input_ctx *ctx)
  ctx->device = params->device;

  // open demuxer
-  if (!ctx->ic) {
-    ret = avformat_open_input(&ctx->ic, inp, NULL, NULL);
-    if (ret < 0) LPMS_ERR(open_input_err, "demuxer: Unable to open input");
-    ret = avformat_find_stream_info(ctx->ic, NULL);
-    if (ret < 0) LPMS_ERR(open_input_err, "Unable to find input info");
-  } else if (!ctx->ic->pb) {
-    // reopen input segment file IO context if needed
-    ret = avio_open(&ctx->ic->pb, inp, AVIO_FLAG_READ);
-    if (ret < 0) LPMS_ERR(open_input_err, "Unable to reopen file");
-  } else reopen_decoders = 0;
+  ret = avformat_open_input(&ctx->ic, inp, NULL, NULL);
+  if (ret < 0) LPMS_ERR(open_input_err, "demuxer: Unable to open input");
+  ret = avformat_find_stream_info(ctx->ic, NULL);
+  if (ret < 0) LPMS_ERR(open_input_err, "Unable to find input info");

  AVCodec *video_codec = NULL;
  AVCodec *audio_codec = NULL;
  ctx->vi = av_find_best_stream(ctx->ic, AVMEDIA_TYPE_VIDEO, -1, -1, &video_codec, 0);
  ctx->ai = av_find_best_stream(ctx->ic, AVMEDIA_TYPE_AUDIO, -1, -1, &audio_codec, 0);

-  if (AV_HWDEVICE_TYPE_CUDA == ctx->hw_type && ctx->vi >= 0) {
-    if (ctx->last_format == AV_PIX_FMT_NONE) ctx->last_format = ctx->ic->streams[ctx->vi]->codecpar->format;
-    else if (ctx->ic->streams[ctx->vi]->codecpar->format != ctx->last_format) {
+  // Now be careful here. It appears that in certain situation (such as .ts
+  // stream without video stream) ctx->vi will be set to 0, but the format will
+  // be set to AV_PIX_FMT_NONE and both width and height will be zero, etc
+  // This is normally fine, but when re-using video decoder we have to be
+  // extra careful, and handle both situations: one with negative vi, and one
+  // with positive vi but AV_PIX_FMT_NONE in stream format
+  enum AVPixelFormat format =
+    (ctx->vi >= 0) ? ctx->ic->streams[ctx->vi]->codecpar->format : AV_PIX_FMT_NONE;
+  if ((AV_HWDEVICE_TYPE_CUDA == ctx->hw_type) && (ctx->vi >= 0)
+      && (AV_PIX_FMT_NONE != format)) {
+    if (ctx->last_format == AV_PIX_FMT_NONE) ctx->last_format = format;
+    else if (format != ctx->last_format) {
      LPMS_WARN("Input pixel format has been changed in the middle.");
-      ctx->last_format = ctx->ic->streams[ctx->vi]->codecpar->format;
+      ctx->last_format = format;
      // if the decoder is not re-opened when the video pixel format is changed,
      // the decoder tries HW decoding with the video context initialized to a pixel format different from the input one.
      // to handle a change in the input pixel format,
-      // we close the demuxer and re-open the decoder by calling open_input().
-      free_input(ctx, FORCE_CLOSE_HW_DECODER);
-      ret = open_input(params, ctx);
-      if (ret < 0) LPMS_ERR(open_input_err, "Unable to reopen video demuxer for HW decoding");
-      reopen_decoders = 0;
+      // we close the decoder so it will get reopened later
+      close_video_decoder(ctx);
    }
  }

  if (reopen_decoders) {
-    if (!ctx->dv && (ctx->vi >= 0) &&
-        (!ctx->vc || (ctx->hw_type == AV_HWDEVICE_TYPE_NONE))) {
-      ret = open_video_decoder(ctx, video_codec);
-      if (ret < 0) LPMS_ERR(open_input_err, "Unable to open video decoder")
-      ctx->last_frame_v = av_frame_alloc();
-      if (!ctx->last_frame_v) LPMS_ERR(open_input_err, "Unable to alloc last_frame_v");
+    if (!ctx->dv && (ctx->vi >= 0) && (AV_PIX_FMT_NONE != format)) {
+      // yes, we have video stream to decode, but check if we should reopen
+      // decoder
+      if (!ctx->vc || (ctx->hw_type == AV_HWDEVICE_TYPE_NONE)) {
+        ret = open_video_decoder(ctx, video_codec);
+        if (ret < 0) LPMS_ERR(open_input_err, "Unable to open video decoder")
+      }
    } else LPMS_WARN("No video stream found in input");

    if (!ctx->da && (ctx->ai >= 0)) {
      ret = open_audio_decoder(ctx, audio_codec);
      if (ret < 0) LPMS_ERR(open_input_err, "Unable to open audio decoder")
-      ctx->last_frame_a = av_frame_alloc();
-      if (!ctx->last_frame_a) LPMS_ERR(open_input_err, "Unable to alloc last_frame_a");
    } else LPMS_WARN("No audio stream found in input");
  }

@@ -306,44 +326,19 @@ open_input_err:

 void free_input(struct input_ctx *ictx, enum FreeInputPolicy policy)
 {
-  if (FORCE_CLOSE_HW_DECODER == policy) {
-    // This means we are closing everything, so we also want to
-    // remove demuxer
-    if (ictx->ic) avformat_close_input(&ictx->ic);
-  } else {
-    // Otherwise we may want to retain demuxer in certain cases. Note that
-    // this is a lot of effort for very little gain, because demuxer is very
-    // cheap to create and destroy (being software component)
-    if (ictx->ic) {
-      // Only mpegts reuse the demuxer for subsequent segments.
-      // Close the demuxer for everything else.
-      // TODO might be reusable with fmp4 ; check!
-      if (!is_mpegts(ictx->ic)) avformat_close_input(&ictx->ic);
-      else if (ictx->ic->pb) {
-        // Reset leftovers from demuxer internals to prepare for next segment
-        avio_flush(ictx->ic->pb);
-        avformat_flush(ictx->ic);
-        avio_closep(&ictx->ic->pb);
-      }
-    }
-  }
+  if (ictx->ic) avformat_close_input(&ictx->ic);
  ictx->flushed = 0;
  ictx->flushing = 0;
  ictx->pkt_diff = 0;
  ictx->sentinel_count = 0;
+  // this is allocated elsewhere on first video packet
  if (ictx->first_pkt) av_packet_free(&ictx->first_pkt);
-  if (ictx->ac) avcodec_free_context(&ictx->ac);
  // video decoder is always closed when it is a SW decoder
  // otherwise only when forced
-  int close_vc = ictx->vc &&
-    ((AV_HWDEVICE_TYPE_NONE == ictx->hw_type) || (FORCE_CLOSE_HW_DECODER == policy));
-  if (close_vc) {
-    if (ictx->vc->hw_device_ctx) av_buffer_unref(&ictx->vc->hw_device_ctx);
-    avcodec_free_context(&ictx->vc);
-    if (ictx->hw_device_ctx) av_buffer_unref(&ictx->hw_device_ctx);
-    if (ictx->last_frame_v) av_frame_free(&ictx->last_frame_v);
+  if ((AV_HWDEVICE_TYPE_NONE == ictx->hw_type) || (FORCE_CLOSE_HW_DECODER == policy)) {
+    close_video_decoder(ictx);
  }
-  if (ictx->last_frame_a) av_frame_free(&ictx->last_frame_a);
-
+  // audio decoder is always closed
+  close_audio_decoder(ictx);
 }

@@ -10,6 +10,9 @@ struct input_ctx {
  AVFormatContext *ic; // demuxer required
  AVCodecContext  *vc; // video decoder optional
  AVCodecContext  *ac; // audo  decoder optional
+  // TODO: perhaps get rid of indices and introduce pointers same way as on
+  // the encoder side, easier to check and easier to dereference without
+  // pointer to demuxer
  int vi, ai; // video and audio stream indices
  int dv, da; // flags whether to drop video or audio

@@ -42,22 +42,19 @@ static int add_video_stream(struct output_ctx *octx, struct input_ctx *ictx)
 {
  // video stream to muxer
  int ret = 0;
-  AVStream *st = NULL;
  if (is_copy(octx->video->name)) {
    // create stream as a copy of existing one
    if (ictx->vi < 0) LPMS_ERR(add_video_err, "Input video stream does not exist");
-    st = add_stream_copy(octx, ictx->ic->streams[ictx->vi]);
-    if (!st) LPMS_ERR(add_video_err, "Error adding video copy stream");
-    octx->vi = st->index;
-    if (octx->fps.den) st->avg_frame_rate = octx->fps;
-    else st->avg_frame_rate = ictx->ic->streams[ictx->vi]->r_frame_rate;
+    octx->video_stream = add_stream_copy(octx, ictx->ic->streams[ictx->vi]);
+    if (!octx->video_stream) LPMS_ERR(add_video_err, "Error adding video copy stream");
+    if (octx->fps.den) octx->video_stream->avg_frame_rate = octx->fps;
+    else octx->video_stream->avg_frame_rate = ictx->ic->streams[ictx->vi]->r_frame_rate;
  } else if (octx->vc) {
    // create stream from encoder
-    st = add_stream_for_encoder(octx, octx->vc);
-    if (!st) LPMS_ERR(add_video_err, "Error adding video encoder stream");
-    octx->vi = st->index;
-    if (octx->fps.den) st->avg_frame_rate = octx->fps;
-    else st->avg_frame_rate = ictx->ic->streams[ictx->vi]->r_frame_rate;
+    octx->video_stream = add_stream_for_encoder(octx, octx->vc);
+    if (!octx->video_stream) LPMS_ERR(add_video_err, "Error adding video encoder stream");
+    if (octx->fps.den) octx->video_stream->avg_frame_rate = octx->fps;
+    else octx->video_stream->avg_frame_rate = ictx->ic->streams[ictx->vi]->r_frame_rate;
    // Video has rescale here. Audio is slightly different
    // Rescale the gop/clip time to the expected timebase after filtering.
    // The FPS filter outputs pts incrementing by 1 at a rate of 1/framerate
@@ -65,7 +62,7 @@ static int add_video_stream(struct output_ctx *octx, struct input_ctx *ictx)
    AVRational ms_tb = {1, 1000};
    AVRational dest_tb;
    if (octx->fps.den) dest_tb = av_inv_q(octx->fps);
-    else dest_tb = ictx->ic->streams[ictx->vi]->time_base;
+    else dest_tb = ictx->ic->streams[ictx->vi]->time_base;  // should be safe to use vi
    if (octx->gop_time) {
      octx->gop_pts_len = av_rescale_q(octx->gop_time, ms_tb, dest_tb);
      octx->next_kf_pts = 0; // force for first frame
@@ -77,8 +74,14 @@ static int add_video_stream(struct output_ctx *octx, struct input_ctx *ictx)
      octx->clip_to_pts = av_rescale_q(octx->clip_to, ms_tb, dest_tb);
    }
  } else if (is_drop(octx->video->name)) {
+    octx->video_stream = NULL;
    LPMS_ERR(add_video_err, "add_video_stream called for dropped video!");
-  } else LPMS_ERR(add_video_err, "No video encoder, not a copy; what is this?");
+  } else {
+    // this can actually happen if the transcoder configured for video
+    // gets segment without actual video stream
+    octx->video_stream = NULL;
+    LPMS_WARN("No video encoder, not a copy; missing video input perhaps?");
+  }

  octx->last_video_dts = AV_NOPTS_VALUE;
  return 0;
@@ -99,26 +102,26 @@ static int add_audio_stream(struct input_ctx *ictx, struct output_ctx *octx)

  // audio stream to muxer
  int ret = 0;
-  AVStream *st = NULL;
  if (is_copy(octx->audio->name)) {
    // create stream as a copy of existing one
    if (ictx->ai < 0) LPMS_ERR(add_audio_err, "Input audio stream does not exist");
-    st = add_stream_copy(octx, ictx->ic->streams[ictx->ai]);
-    octx->ai = st->index;
+    octx->audio_stream = add_stream_copy(octx, ictx->ic->streams[ictx->ai]);
  } else if (octx->ac) {
    // create stream from encoder
-    st = add_stream_for_encoder(octx, octx->ac);
-    octx->ai = st->index;
+    octx->audio_stream = add_stream_for_encoder(octx, octx->ac);
    // Video has rescale here
  } else if (is_drop(octx->audio->name)) {
    // Supposed to exit this function early if there's a drop
+    octx->audio_stream = NULL;
    LPMS_ERR(add_audio_err, "add_audio_stream called for dropped audio!");
  } else {
-    LPMS_ERR(add_audio_err, "No audio encoder; not a copy; what is this?");
+    // see comment in add_video_stream above
+    octx->audio_stream = NULL;
+    LPMS_WARN("No audio encoder; not a copy; missing audio input perhaps?");
+    return 0;
  }

-  if (!st) LPMS_ERR(add_audio_err, "Error adding video copy stream");
-  octx->ai = st->index;
+  if (!octx->audio_stream) LPMS_ERR(add_audio_err, "Error adding audio stream");;

  // Audio has rescale here. Video version is slightly different
  AVRational ms_tb = {1, 1000};
@@ -131,7 +134,7 @@ static int add_audio_stream(struct input_ctx *ictx, struct output_ctx *octx)
  }

  // signal whether to drop preroll audio
-  if (st->codecpar->initial_padding) octx->drop_ts = AV_NOPTS_VALUE;
+  if (octx->audio_stream->codecpar->initial_padding) octx->drop_ts = AV_NOPTS_VALUE;

  octx->last_audio_dts = AV_NOPTS_VALUE;

@@ -217,10 +220,10 @@ static int open_video_encoder(struct input_ctx *ictx, struct output_ctx *octx,
  vc->height = av_buffersink_get_h(octx->vf.sink_ctx);
  if (octx->fps.den) vc->framerate = av_buffersink_get_frame_rate(octx->vf.sink_ctx);
  else if (ictx->vc->framerate.num && ictx->vc->framerate.den) vc->framerate = ictx->vc->framerate;
-  else vc->framerate = ictx->ic->streams[ictx->vi]->r_frame_rate;
+  else vc->framerate = ictx->ic->streams[ictx->vi]->r_frame_rate; // vi should be safe
  if (octx->fps.den) vc->time_base = av_buffersink_get_time_base(octx->vf.sink_ctx);
  else if (ictx->vc->time_base.num && ictx->vc->time_base.den) vc->time_base = ictx->vc->time_base;
-  else vc->time_base = ictx->ic->streams[ictx->vi]->time_base;
+  else vc->time_base = ictx->ic->streams[ictx->vi]->time_base;    // vi should be safe
  if (octx->bitrate) vc->rc_min_rate = vc->bit_rate = vc->rc_max_rate = vc->rc_buffer_size = octx->bitrate;
  if (av_buffersink_get_hw_frames_ctx(octx->vf.sink_ctx)) {
    vc->hw_frames_ctx =
@@ -88,7 +88,6 @@ type Transcoder struct {
 	handle     *C.struct_transcode_thread
 	stopped    bool
 	started    bool
-	lastacodec string
 	mu         *sync.Mutex
 }

@@ -857,8 +856,6 @@ func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions)
 	if input == nil {
 		return nil, ErrTranscoderInp
 	}
-	var reopendemux bool
-	reopendemux = false
 	// don't read metadata for pipe input, because it can't seek back and av_find_input_format in the decoder will fail
 	if !strings.HasPrefix(strings.ToLower(input.Fname), "pipe:") {
 		status, format, err := GetCodecInfo(input.Fname)
@@ -875,30 +872,8 @@ func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions)
 			}
 		}
 		if !t.started {
-			// NeedsBypass is state where video is present in container & without any frames
-			videoMissing := status == CodecStatusNeedsBypass || format.Vcodec == ""
-			if videoMissing {
-				// Audio-only segment, fail fast right here as we cannot handle them nicely
-				return nil, ErrTranscoderVid
-			}
-			// keep last audio codec
-			t.lastacodec = format.Acodec
 			// Stream is either OK or completely broken, let the transcoder handle it
 			t.started = true
-		} else {
-			// check if we need to reopen demuxer because added audio in video
-			// TODO: fixes like that are needed because handling of cfg change in
-			// LPMS is a joke. We need to decide whether LPMS should support full
-			// dynamic config one day and either implement it there, or implement
-			// some generic workaround for the problem in Go code, such as marking
-			// config changes as significant/insignificant and re-creating the instance
-			// if the former type change happens
-			if format.Acodec != "" && !isAudioAllDrop(ps) {
-				if (t.lastacodec == "") || (t.lastacodec != "" && t.lastacodec != format.Acodec) {
-					reopendemux = true
-					t.lastacodec = format.Acodec
-				}
-			}
 		}
 	}
 	hw_type, err := accelDeviceType(input.Accel)
@@ -954,16 +929,6 @@ func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions)
 		paramsPointer = (*C.output_params)(&params[0])
 		resultsPointer = (*C.output_results)(&results[0])
 	}
-	if reopendemux {
-		// forcefully close and open demuxer
-		ret := int(C.lpms_transcode_reopen_demux(inp))
-		if ret != 0 {
-			if LogTranscodeErrors {
-				glog.Error("Reopen demux returned : ", ErrorMap[ret])
-			}
-			return nil, ErrorMap[ret]
-		}
-	}
 	ret := int(C.lpms_transcode(inp, paramsPointer, resultsPointer, C.int(len(params)), decoded))
 	if ret != 0 {
 		if LogTranscodeErrors {
@@ -871,17 +871,6 @@ func TestTranscoder_StreamCopy(t *testing.T) {
 	if res.Decoded.Frames != 0 || res.Encoded[0].Frames != 0 {
 		t.Error("Unexpected count of decoded/encoded frames")
 	}
-	in = &TranscodeOptionsIn{Fname: dir + "/audioonly.ts"}
-	out = []TranscodeOptions{
-		{
-			Oname:        dir + "/noaudio.ts",
-			Profile:      P144p30fps16x9,
-			AudioEncoder: ComponentOptions{Name: "copy"},
-		},
-	}
-	// Audio only segments are not supported
-	_, err = Transcode3(in, out)
-	assert.EqualError(t, err, "TranscoderInvalidVideo")
 }

 func TestTranscoder_StreamCopy_Validate_B_Frames(t *testing.T) {
@@ -1010,11 +999,6 @@ func TestTranscoder_Drop(t *testing.T) {
 	if res.Decoded.Frames != 30 || res.Encoded[0].Frames != 30 {
 		t.Error("Unexpected encoded/decoded frame counts ", res.Decoded.Frames, res.Encoded[0].Frames)
 	}
-	in.Fname = dir + "/novideo.ts"
-	out = []TranscodeOptions{{Oname: dir + "/encoded-audio.mp4", Profile: P144p30fps16x9}}
-	_, err = Transcode3(in, out)
-	// Audio only segments are not supported
-	assert.EqualError(t, err, "TranscoderInvalidVideo")
 }

 func TestTranscoder_StreamCopyAndDrop(t *testing.T) {
@@ -53,6 +53,8 @@ int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx)
    const AVFilter *buffersink = avfilter_get_by_name("buffersink");
    AVFilterInOut *outputs = NULL;
    AVFilterInOut *inputs  = NULL;
+    // vi should be safe here because this function gets called only if video
+    // is available
    AVRational time_base = ictx->ic->streams[ictx->vi]->time_base;
    enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE }; // XXX ensure the encoder allows this
    struct filter_ctx *vf = &octx->vf;
@@ -302,7 +304,7 @@ int filtergraph_write(AVFrame *inf, struct input_ctx *ictx, struct output_ctx *o
  }

  // Timestamp handling code
-  AVStream *vst = ictx->ic->streams[ictx->vi];
+  AVStream *vst = (ictx->vi >= 0) ? ictx->ic->streams[ictx->vi] : NULL;
  if (inf) { // Non-Flush Frame
    inf->opaque = (void *) inf->pts; // Store original PTS for calc later
    if (is_video && octx->fps.den) {
@@ -363,6 +365,7 @@ int filtergraph_read(struct input_ctx *ictx, struct output_ctx *octx, struct fil
      // re-calculate our output PTS before passing it on to the encoder
      if (filter->pts_diff == INT64_MIN) {
        int64_t pts = (int64_t)frame->opaque; // original input PTS
+        // safe to use ictx->vi because we know this is video frame
        pts = av_rescale_q_rnd(pts, ictx->ic->streams[ictx->vi]->time_base, av_buffersink_get_time_base(filter->sink_ctx), AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
        // difference between rescaled input PTS and the segment's first frame PTS of the filtergraph output
        filter->pts_diff = pts - frame->pts;
@@ -45,7 +45,8 @@ struct output_ctx {
  AVFormatContext *oc; // muxer required
  AVCodecContext  *vc; // video decoder optional
  AVCodecContext  *ac; // audo  decoder optional
-  int vi, ai; // video and audio stream indices
+  AVStream *audio_stream;
+  AVStream *video_stream;
  int dv, da; // flags whether to drop video or audio
  struct filter_ctx vf, af, sf;

@@ -36,10 +36,9 @@ const int lpms_ERR_UNRECOVERABLE = FFERRTAG('U', 'N', 'R', 'V');
 //  short of re-initializing the component. This is addressed for each component
 //  as follows:
 //
-//  Demuxer: For resumable / header-less formats such as mpegts, the demuxer
-//           is reused across segments. This gives a small speed boost. For
-//           all other formats, the demuxer is closed and reopened at the next
-//           segment.
+//  Demuxer: Used to be reused, but it was found very problematic, as reused
+//           muxer retained information from previous segments. It caused all
+//           kind of subtle problems and was removed
 //

 // MOVED TO decoder.[ch]
@@ -133,13 +132,13 @@ static int flush_output(struct input_ctx *ictx, struct output_ctx *octx)
  int ret = 0;
  if (octx->vc) { // flush video
    while (!ret || ret == AVERROR(EAGAIN)) {
-      ret = process_out(ictx, octx, octx->vc, octx->oc->streams[0], &octx->vf, NULL);
+      ret = process_out(ictx, octx, octx->vc, octx->video_stream, &octx->vf, NULL);
    }
  }
  ret = 0;
  if (octx->ac) { // flush audio
    while (!ret || ret == AVERROR(EAGAIN)) {
-      ret = process_out(ictx, octx, octx->ac, octx->oc->streams[octx->dv ? 0 : 1], &octx->af, NULL);
+      ret = process_out(ictx, octx, octx->ac, octx->audio_stream, &octx->af, NULL);
    }
  }
  // send EOF signal to signature filter
@@ -214,7 +213,6 @@ static int handle_audio_frame(struct transcode_thread *h, AVStream *ist,
                              output_results *decoded_results, AVFrame *dframe)
 {
  struct input_ctx *ictx = &h->ictx;
-
  ++decoded_results->audio_frames;
  // frame duration update
  int64_t dur = 0;
@@ -237,7 +235,7 @@ static int handle_audio_frame(struct transcode_thread *h, AVStream *ist,

    if (octx->ac) {
      int ret = process_out(ictx, octx, octx->ac,
-                            octx->oc->streams[octx->dv ? 0 : 1], &octx->af, dframe);
+                            octx->audio_stream, &octx->af, dframe);
      if (AVERROR(EAGAIN) == ret || AVERROR_EOF == ret) continue; // this is ok
      if (ret < 0) LPMS_ERR_RETURN("Error encoding audio");
    }
@@ -320,7 +318,7 @@ static int handle_audio_packet(struct transcode_thread *h, output_results *decod
      if (octx->da) continue; // drop audio
      // If there is no encoder, then we are copying. Also the index of
      // audio stream is 0 when we are dropping video and 1 otherwise
-      if (!octx->ac) ost = octx->oc->streams[octx->dv ? 0 : 1];
+      if (!octx->ac) ost = octx->audio_stream;
    }

    if (ost) {
@@ -418,7 +416,7 @@ static int handle_video_packet(struct transcode_thread *h, output_results *decod
      // This is video stream for this output, but do we need packet?
      if (octx->dv) continue; // drop video
      // If there is no encoder, then we are copying
-      if (!octx->vc) ost = octx->oc->streams[0];
+      if (!octx->vc) ost = octx->video_stream;
    }

    if (ost) {
@@ -714,12 +712,6 @@ transcode_cleanup:
  return ret;
 }

-int lpms_transcode_reopen_demux(input_params *inp)
-{
-  free_input(&inp->handle->ictx, FORCE_CLOSE_HW_DECODER);
-  return open_input(inp, &inp->handle->ictx);
-}
-
 // TODO: name - this is called _stop, but it is more like stop & destroy
 void lpms_transcode_stop(struct transcode_thread *handle)
 {
@@ -98,7 +98,6 @@ enum LPMSLogLevel {

 void lpms_init(enum LPMSLogLevel max_level);
 int lpms_transcode(input_params *inp, output_params *params, output_results *results, int nb_outputs, output_results *decoded_results);
-int lpms_transcode_reopen_demux(input_params *inp);
 struct transcode_thread* lpms_transcode_new(lvpdnn_opts *dnn_opts);
 void lpms_transcode_stop(struct transcode_thread* handle);
 void lpms_transcode_discontinuity(struct transcode_thread *handle);