ffmpeg time_base详解

来源：互联网发布：中国地图标注软件编辑：程序博客网时间：2024/06/08 12:22

ffmpeg time_base

ffmpeg存在多个时间基准(time_base)，对应不同的阶段(结构体)，每个time_base具体的值不一样，ffmpeg提供函数在各个time_base中进行切换。搞清楚各个time_base的来源，对于阅读ffmpeg的代码很重要。

一、time_base
1、AVStream(libavformat/avformat.h)

typedef struct AVStream {
    /**
     * This is the fundamental unit of time (in seconds) in terms
     * of which frame timestamps are represented.
     *
     * decoding: set by libavformat
     * encoding: May be set by the caller before avformat_write_header() to
     *           provide a hint to the muxer about the desired timebase. In
     *           avformat_write_header(), the muxer will overwrite this field
     *           with the timebase that will actually be used for the timestamps
     *           written into the file (which may or may not be related to the
     *           user-provided one, depending on the format).
     */
    AVRational time_base;

    /**
     * Decoding: pts of the first frame of the stream in presentation order, in stream time base.
     * Only set this if you are absolutely 100% sure that the value you set
     * it to really is the pts of the first frame.
     * This may be undefined (AV_NOPTS_VALUE).
     * @note The ASF header does NOT contain a correct start_time the ASF
     * demuxer must NOT set this.
     */
    int64_t start_time;

    /**
     * Decoding: duration of the stream, in stream time base.
     * If a source file does not specify a duration, but does specify
     * a bitrate, this value will be estimated from bitrate and file size.
     */
    int64_t duration;

从上面的信息可以看到，AVStream->time_base单位为秒。

那AVStream->time_base具体的值是多少呢？下面以mpegts_demuxer为例：

static int mpegts_set_stream_info(AVStream *st, PESContext *pes,
                                  uint32_t stream_type, uint32_t prog_reg_desc)
{
avpriv_set_pts_info(st, 33, 1, 90000);

void avpriv_set_pts_info(AVStream *s, int pts_wrap_bits,
                         unsigned int pts_num, unsigned int pts_den)
{
    AVRational new_tb;
    if (av_reduce(&new_tb.num, &new_tb.den, pts_num, pts_den, INT_MAX)) {
        if (new_tb.num != pts_num)
            av_log(NULL, AV_LOG_DEBUG,
                   "st:%d removing common factor %d from timebase\n",
                   s->index, pts_num / new_tb.num);
    } else
        av_log(NULL, AV_LOG_WARNING,
               "st:%d has too large timebase, reducing\n", s->index);

    if (new_tb.num <= 0 || new_tb.den <= 0) {
        av_log(NULL, AV_LOG_ERROR,
               "Ignoring attempt to set invalid timebase %d/%d for st:%d\n",
               new_tb.num, new_tb.den,
               s->index);
        return;
    }
    s->time_base     = new_tb;
    av_codec_set_pkt_timebase(s->codec, new_tb);
    s->pts_wrap_bits = pts_wrap_bits;
}

通过avpriv_set_pts_info(st, 33, 1, 90000)函数，设置AVStream->time_base为1/90000。为什么是90000？因为mpeg的pts、dts都是以90kHz来采样的，所以采样间隔为1/90000秒。

2、AVCodecContext

typedef struct AVCodecContext {
    /**
     * This is the fundamental unit of time (in seconds) in terms
     * of which frame timestamps are represented. For fixed-fps content,
     * timebase should be 1/framerate and timestamp increments should be
     * identically 1.
     * - encoding: MUST be set by user.
     * - decoding: Set by libavcodec.
     */
    AVRational time_base;

从上面的信息可以看到，AVCodecContext->time_base单位同样为秒，不过精度没有AVStream->time_base高，大小为1/framerate。

下面以ffmpeg转码工具为例：

static int transcode_init(void)
{
            if (enc_ctx->codec_type == AVMEDIA_TYPE_VIDEO) {
                if (ost->filter && !ost->frame_rate.num)
                    ost->frame_rate = av_buffersink_get_frame_rate(ost->filter->filter);
                if (ist && !ost->frame_rate.num)
                    ost->frame_rate = ist->framerate;
                if (ist && !ost->frame_rate.num)
                    ost->frame_rate = ist->st->r_frame_rate;
                if (ist && !ost->frame_rate.num) {
                    ost->frame_rate = (AVRational){25, 1};
                    av_log(NULL, AV_LOG_WARNING,
                           "No information "
                           "about the input framerate is available. Falling "
                           "back to a default value of 25fps for output stream #%d:%d. Use the -r option "
                           "if you want a different framerate.\n",
                           ost->file_index, ost->index);
                }
//                    ost->frame_rate = ist->st->avg_frame_rate.num ? ist->st->avg_frame_rate : (AVRational){25, 1};
                if (ost->enc && ost->enc->supported_framerates && !ost->force_fps) {
                    int idx = av_find_nearest_q_idx(ost->frame_rate, ost->enc->supported_framerates);
                    ost->frame_rate = ost->enc->supported_framerates[idx];
                }
                if (enc_ctx->codec_id == AV_CODEC_ID_MPEG4) {
                    av_reduce(&ost->frame_rate.num, &ost->frame_rate.den,
                              ost->frame_rate.num, ost->frame_rate.den, 65535);
                }
            }

            switch (enc_ctx->codec_type) {
            case AVMEDIA_TYPE_VIDEO:

                enc_ctx->time_base = av_inv_q(ost->frame_rate);
                if (ost->filter && !(enc_ctx->time_base.num && enc_ctx->time_base.den))
                    enc_ctx->time_base = ost->filter->filter->inputs[0]->time_base;
                if (   av_q2d(enc_ctx->time_base) < 0.001 && video_sync_method != VSYNC_PASSTHROUGH
                   && (video_sync_method == VSYNC_CFR || video_sync_method == VSYNC_VSCFR || (video_sync_method == VSYNC_AUTO && !(oc->oformat->flags & AVFMT_VARIABLE_FPS)))){
                    av_log(oc, AV_LOG_WARNING, "Frame rate very high for a muxer not efficiently supporting it.\n"
                                               "Please consider specifying a lower framerate, a different muxer or -vsync 2\n");
                }

首先获取ost->frame_rate，然后计算enc_ctx->time_base = 1/ost->frame_rate。

总结：
AVStream->time_base比AVCodecContext->time_base精度要高(数值要小)，比如AVStream->time_base为1/90000，而AVCodecContext->time_base为1/30(假设frame_rate为30)；同样的pts和dts，以AVStream->time_base为单位，数值要比以AVCodecContext->time_base为单位要大。

二、pts、dts
那各个结构下，pts和dts使用哪个time_base来表示呢？

1、AVPacket

typedef struct AVPacket {
    /**
     * Presentation timestamp in AVStream->time_base units; the time at which
     * the decompressed packet will be presented to the user.
     * Can be AV_NOPTS_VALUE if it is not stored in the file.
     * pts MUST be larger or equal to dts as presentation cannot happen before
     * decompression, unless one wants to view hex dumps. Some formats misuse
     * the terms dts and pts/cts to mean something different. Such timestamps
     * must be converted to true pts/dts before they are stored in AVPacket.
     */
    int64_t pts;
    /**
     * Decompression timestamp in AVStream->time_base units; the time at which
     * the packet is decompressed.
     * Can be AV_NOPTS_VALUE if it is not stored in the file.
     */
    int64_t dts;

从上面可以看到，AVPacket下的pts和dts以AVStream->time_base为单位(数值比较大)。这也很容易理解，根据mpeg的协议，压缩后或解压前的数据，pts和dts是90kHz时钟的采样值，时间间隔就是AVStream->time_base。

2、AVFrame

typedef struct AVFrame {
    /**
     * Presentation timestamp in time_base units (time when frame should be shown to user).
     */
    int64_t pts;

    /**
     * PTS copied from the AVPacket that was decoded to produce this frame.
     */
    int64_t pkt_pts;

    /**
     * DTS copied from the AVPacket that triggered returning this frame. (if frame threading isn't used)
     * This is also the Presentation time of this AVFrame calculated from
     * only AVPacket.dts values without pts values.
     */
    int64_t pkt_dts;

注意：
AVFrame里面的pkt_pts和pkt_dts是拷贝自AVPacket，同样以AVStream->time_base为单位；而pts是为输出(显示)准备的，以AVCodecContex->time_base为单位)。//FIXME

3、InputStream

typedef struct InputStream {
    int file_index;
    AVStream *st;
    AVCodecContext *dec_ctx;
    int64_t       start;     /* time when read started */
    /* predicted dts of the next packet read for this stream or (when there are
     * several frames in a packet) of the next frame in current packet (in AV_TIME_BASE units) */
    int64_t       next_dts;
    int64_t       dts;       ///< dts of the last packet read for this stream (in AV_TIME_BASE units)

    int64_t       next_pts;  ///< synthetic pts for the next decode frame (in AV_TIME_BASE units)
    int64_t       pts;       ///< current pts of the decoded frame  (in AV_TIME_BASE units)

从上面可以看到，InputStream下的pts和dts以AV_TIME_BASE为单位(微秒)，至于为什么要转化为微妙，可能是为了避免使用浮点数。

4、OutputStream

typedef struct OutputStream {
    int file_index;          /* file index */
    int index;               /* stream index in the output file */
    int source_index;        /* InputStream index */
    AVStream *st;            /* stream in the output file */
    int encoding_needed;     /* true if encoding needed for this stream */
    int frame_number;
    /* input pts and corresponding output pts
       for A/V sync */
    struct InputStream *sync_ist; /* input stream to sync against */
    int64_t sync_opts;       /* output frame counter, could be changed to some true timestamp */ // FIXME look at frame_number
    /* pts of the first frame encoded for this stream, used for limiting
     * recording time */
    int64_t first_pts;
    /* dts of the last packet sent to the muxer */
    int64_t last_mux_dts;
    AVBitStreamFilterContext *bitstream_filters;
    AVCodecContext *enc_ctx;
    AVCodec *enc;
    int64_t max_frames;
    AVFrame *filtered_frame;

OutputStream涉及音视频同步，结构和InputStream不同，暂时只作记录，不分析。

三、各个time_base之间转换

ffmpeg提供av_rescale_q函数用于time_base之间转换，av_rescale_q(a,b,c)作用相当于执行a*b/c，通过设置b,c的值，可以很方便的实现time_base之间转换。
例如：
1、InputStream(AV_TIME_BASE)到AVPacket(AVStream->time_base)

static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output)
{
pkt->dts = av_rescale_q(ist->dts, AV_TIME_BASE_Q, ist->st->time_base);

2、AVPacket(AVStream->time_base)到InputStream(AV_TIME_BASE)

static int process_input_packet(InputStream *ist, const AVPacket *pkt)
{

if (pkt->dts != AV_NOPTS_VALUE) {
ist->next_dts = ist->dts = av_rescale_q(pkt->dts, ist->st->time_base, AV_TIME_BASE_Q);

四、后记：
AVFrame->pts和AVPacket->pts、AVPacket->dts的值，在解码/编码后，会经历短暂的time_base不匹配的情况：

1、解码后

static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output)
{
    decoded_frame = ist->decoded_frame;
    pkt->dts  = av_rescale_q(ist->dts, AV_TIME_BASE_Q, ist->st->time_base);

    update_benchmark(NULL);
    ret = avcodec_decode_video2(ist->dec_ctx,
                                decoded_frame, got_output, pkt);

    best_effort_timestamp= av_frame_get_best_effort_timestamp(decoded_frame);
    if(best_effort_timestamp != AV_NOPTS_VALUE)
        ist->next_pts = ist->pts = av_rescale_q(decoded_frame->pts = best_effort_timestamp, ist->st->time_base, AV_TIME_BASE_Q);

解码后，decoded_frame->pts的值使用AVStream->time_base为单位，后在AVFilter里面转换成以AVCodecContext->time_base为单位。 //FIXME

2、编码后

static void do_video_out(AVFormatContext *s,
                         OutputStream *ost,
                         AVFrame *in_picture)
{
        ret = avcodec_encode_video2(enc, &pkt, in_picture, &got_packet);
        if (got_packet) {
            if (debug_ts) {
                av_log(NULL, AV_LOG_INFO, "encoder -> type:video "
                       "pkt_pts:%s pkt_pts_time:%s pkt_dts:%s pkt_dts_time:%s\n",
                       av_ts2str(pkt.pts), av_ts2timestr(pkt.pts, &enc->time_base),
                       av_ts2str(pkt.dts), av_ts2timestr(pkt.dts, &enc->time_base));
            }

            if (pkt.pts == AV_NOPTS_VALUE && !(enc->codec->capabilities & CODEC_CAP_DELAY))
                pkt.pts = ost->sync_opts;

            av_packet_rescale_ts(&pkt, enc->time_base, ost->st->time_base);

            if (debug_ts) {
                av_log(NULL, AV_LOG_INFO, "encoder -> type:video "
                    "pkt_pts:%s pkt_pts_time:%s pkt_dts:%s pkt_dts_time:%s\n",
                    av_ts2str(pkt.pts), av_ts2timestr(pkt.pts, &ost->st->time_base),
                    av_ts2str(pkt.dts), av_ts2timestr(pkt.dts, &ost->st->time_base));
            }

            frame_size = pkt.size;
            write_frame(s, &pkt, ost);
            /* if two pass, output log */
            if (ost->logfile && enc->stats_out) {
                fprintf(ost->logfile, "%s", enc->stats_out);
            }
        }

编码后，pkt.pts、pkt.dts使用AVCodecContext->time_base为单位，后通过调用"av_packet_rescale_ts"转换为AVStream->time_base为单位。

转载地址：http://www.cnitblog.com/luofuchong/archive/2014/11/28/89869.html

问题是这样的用一个 VLC(流媒体客户端) 去请求流媒体服务器上的数据，但是获得的数据播放速度明显快于1倍速，大概是 timestamp 不对，不知道是服务器的错误，还是客户端解码时出错，总感觉服务器那边有问题，由于服务器端是客户端提供的，客户说是我们的问题，我还不知道如何证明是谁的错。

A:RFC3984 规定采用 90000 Hz 的时钟，因此如果编码帧频是 30，那么时间戳间隔就该是 90000 / 30 = 3000，根据抓包来看，似乎时间戳间隔的确是 3000。

时间戳的间隔不固定，比如有的时间戳间隔是 2990 有的是 3002，会导致解析出来的视频快播的效果么

Q：各位大侠好：
我现在正在开发视频实时流播放，简单的过程如下：
采集视频流 -> 视频流转换为Sorenson H.263编码格式 -> 把编码的实时流通过RTMP协议发送 -> flash客户端进行播放。
现在我的时间戳颗粒是这样生成的：
第一帧的时间戳为0；
第二帧的时间戳的算法为：第一个字符编码的当前时间 - 上一帧第一个字符编码的当前时间
根据这个时间颗粒的算法，我在flash客户端播放就会产生延时。
请问各位大侠有什么好的建议或是文档之类的，以前firstime管管建议我看RFC4629文档，但是效果不太明显？
谢谢！

A；时间戳顺序累加就行了，每次加1

Q:最近做了一个捕捉摄像头并保存FLV的小东西，发现转换完毕后的FLV文件，用播放器播放的时候，速度特别快，大概是正常速度的4倍。请问这是怎么回事？网上搜了一下，说是时间戳的问题，可是PTS我跟了，AVPacket的PTS是每帧增长40，time_base为: 25/s.。DTS是个无效值。PTS的计算是根据ffmpeg的例子写的。
pkt.pts= av_rescale_q(oAcc->coded_frame->pts, oAcc->time_base, audio_st->time_base);

1. dts到底需不需要自己计算？
2. 还有播放速度过快的可能原因？
3. 还有PTS和DTS的具体含义？
int64_t pts; ///< presentation time stamp in time_base units
int64_t dts; ///< decompression time stamp in time_base units

上面的意思是不是说，播放器根据PTS进行播放。然后DTS是在编码的时候自己设置？

刚用ffmpeg，好些东西不懂，还请大侠多多指教------刚才又试了一下，把time_base降为10帧每秒。播放速度和正常速度接近。但是不知道FLV文件的帧率该设置多少合适。有没有一个权威的说法。

A:我也做摄像头捕捉，跟你出现一样的问题，我自己分析的话，应该是捕捉摄像头的图像的速度只有10帧每秒，但是保存成视频25帧每秒的话播放看起来就非常快，但是我摄像头捕捉设定的是25帧每秒，难道是速度达不到？
反正我还没解决，LZ解决了的话告诉下，

谢谢。暂时认为是摄像头捕捉速率问题。换了一个高清无驱摄像头就好了

Q:在每个音视频数据包中都含有PTS和DTS，一个数据包中应该含有多个数据帧以及音频数据，那么这里的PTS和DTS它是如何来标识数据帧的？PTS和DTS的单位是什么？视频的最小单位是帧，可通过PTS来指定它何时播放，那音频的最小单位是什么？这里的PTS对音频而言它标识的是什么？是这个时间点采样点吗？

在网上找了很久关于音视频编解码的资料，都没有合适的

audio_timebase = av_q2d(fmtctx->streams[audio_index]->time_base);
video_timebase = av_q2d(fmtctx->streams[video_index]->time_base);

last_video_pts = pts * video_timebase;
last_audio_pts = pts * audio_timebase;

timebase就是单位

以audio为基准同步video。只要设置好了 ao 的参数，如sample rate, channels, sample size等， audio驱动就能以正确的速度播放，所以只要程序里write不出大问题的话，这种同步是非常有效的。

在video out里如下做：

pre_time = av_gettime();
gl_vo->vo_display(pic);
after_time = av_gettime();
rest_time = 1000*1000/fps - (after_time - pre_time);

av_diff = last_audio_pts - last_video_pts;

if ( av_diff > 0.2 )
{
            if( av_diff < 0.5 ) rest_time -= rest_time / 4;
            else rest_time -= rest_time / 2;
}
else if ( av_diff < -0.2)
{
            if( av_diff > -0.5 ) rest_time += rest_time / 4;
            else rest_time += rest_time / 2;
}

if ( rest_time > 0 )
    usleep(rest_time);

Q:谢谢kf701的回复，看后明白了不少
这种同步是音频抽样一次就与一帧图像去同步的吗？

A:上面的代码是每display一个picture,就与audio的PTS比较一下,
如果没有audio,只有video,那么video就会以fps显示, 靠的就是那个 usleep(rest_time)

Q:如何利用AVPacket包里的pts,dts实现音视频同步？声频播放是只管自己播放，视频有一个初始化播放帧率，如何根据AVPacket里的pts,dts还实现两者的同步？
现在我的视频播放一直按原始播放帧率播放，声音有点卡！哪位知道，尽快告知小弟！

A:DTS：decoding time stamp
PTS：presentation time stamp

Generally the PTS and DTS will only differ when the stream we are playing has B frames in it.

Q:关于b帧和时间戳的问题

我从mpeg2视频中用av_read_frame()读取视频帧并解码，顺序是IPBBPBB...
它们的pts顺序是1423756...现在我要把这个视频再用mpeg2编码，最大b帧数还是2.那么我在编码时是否要将视频数据调整为按显示时间先后的顺序，再交给avcodec_encode_video()编码？即把第2帧放在3、4帧之后，第7帧放在5、6帧之后？

A:你不能这么做，编码器会给你这么做的。如果你有B帧，那么所有的B帧都会被放在缓冲区里直到下一个I/P帧到来

例如：你的输入序列是IBBPBBPBBI

那么输出的序列是

输入I，编码I，输出I

输入B

输入P，编码P，输出P

编码B，输出B

输入P,编码P，输出P

。。。。。。

在解码端所有的P帧都会被放在缓冲力直到下一个I/P真的到来

如：解码I，输出I

解码P,放入缓冲P

解码B，输出B

解码P，输出上一次P帧

Q:解码出来的图片的时间戳问题 MPEG一个包中包含有时间戳, 而可能几个包才能解码出一张图象, 也可能一个包能解码出几张图, 请问包中的时间戳与解码出来的图象如何对应上?

A:在ffmpeg中通过parser部件把从 avformat部件取下来的原始包重新“合成”为有仅包含一个完整帧的包。从MPEG2部份的代码中看出，如果“几个包才能解码出一张图象”的话，会取第一个包的PTS和DTS，如果“也可能一个包能解码出几张图”，则会跟据这个包的PTS和DTS通过帧频推算出其它帧的DTS。

Q: ffmpeg的avcodec_decode_video 函数解码时间戳问题？在 VLC 中调用 avcodec_decode_video() 函数进行解码时，AVFrame->pts 时间戳不对，导致我的图像不能够显示？请问有谁知道它的解码原理，这个 PTS 怎么得出的吗？还是外部传入的？

A:
        is->video_st->codec->reordered_opaque= pkt->pts;
        len1 = avcodec_decode_video(is->video_st->codec,
                                    frame, &got_picture,
                                    pkt->data, pkt->size);

        if(   (decoder_reorder_pts || pkt->dts == AV_NOPTS_VALUE)
           && frame->reordered_opaque != AV_NOPTS_VALUE)
            pts= frame->reordered_opaque;
        else if(pkt->dts != AV_NOPTS_VALUE)
            pts= pkt->dts;
        else
            pts= 0;
        pts *= av_q2d(is->video_st->time_base);

Q:我贴下   VLC 的代码，（vlc-0.9.8a/modules/codec/avcodec/video.c 文件中）

       i_used = avcodec_decode_video( p_sys->p_context, p_sys->p_ff_pic,
                                    &b_gotpicture,
                                    p_sys->i_buffer <= 0 && p_sys->b_flush ? NULL : (uint8_t*)p_sys->p_buffer, p_sys-    >i_buffer );

      中间省略

取得   PTS ，
       if( p_sys->p_ff_pic->pts )
       {
         printf(" p_sys->p_ff_pic->pts   = %Lx\n",   p_sys->p_ff_pic->pts);
         p_sys->i_pts = p_sys->p_ff_pic->pts;
       }
从   AVFrame 结构中取得   这个 PTS ，但是这个   AVFrame 结构中取得   这个 PTS 从哪里取得的呢？

A:时间戳一般是在编码的时候加入到媒体文件中的，所以在解码时可以从中分析出PTS。

ffmpeg中的时间单位

AV_TIME_BASE

ffmpeg中的内部计时单位（时间基），ffmepg中的所有时间都是于它为一个单位，比如AVStream中的duration即以为着这个流的长度为duration个AV_TIME_BASE。AV_TIME_BASE定义为：

#define         AV_TIME_BASE   1000000
 

AV_TIME_BASE_Q

ffmpeg内部时间基的分数表示，实际上它是AV_TIME_BASE的倒数。从它的定义能很清楚的看到这点：

#define         AV_TIME_BASE_Q   (AVRational){1, AV_TIME_BASE}
 

AVRatioal的定义如下：

typedef struct AVRational{int num; //numeratorint den; //denominator} AVRational;

ffmpeg提供了一个把AVRatioal结构转换成double的函数：

static inline double av_q2d(AVRational a)｛/*** Convert rational to double.* @param a rational to convert**/    return a.num / (double) a.den;}

现在可以根据pts来计算一桢在整个视频中的时间位置：

timestamp(秒) = pts * av_q2d(st->time_base)

计算视频长度的方法：

time(秒) = st->duration * av_q2d(st->time_base)

这里的st是一个AVStream对象指针。

时间基转换公式

timestamp(ffmpeg内部时间戳) = AV_TIME_BASE * time(秒)
time(秒) = AV_TIME_BASE_Q * timestamp(ffmpeg内部时间戳)

所以当需要把视频跳转到N秒的时候可以使用下面的方法：

int64_t timestamp = N * AV_TIME_BASE; 2av_seek_frame(fmtctx, index_of_video, timestamp, AVSEEK_FLAG_BACKWARD);

ffmpeg同样为我们提供了不同时间基之间的转换函数：

int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)

这个函数的作用是计算a * bq / cq，来把时间戳从一个时基调整到另外一个时基。在进行时基转换的时候，我们应该首选这个函数，因为它可以避免溢出的情况发生。

0 0