FFmpeg HEVC 多线程解码解析

来源：互联网发布：筛子数据编辑：程序博客网时间：2024/06/03 21:32

说明：

解码并行分3个级别：frame并行，slice并行和wpp。FFmpeg中提供了frame和slice并行的框架，有点混淆试听了。所以针对HEVC，FFmpeg实现的帧内并行是wpp。

1）关于thread_type，也就是并行模式，其实分两种：slice并行和fram+slice并行（注意这句话：Frame thread:Restrictions with slice threading also apply）。所以openHEVC在frame thread init中也会slice thread init；优先判断frame thread；参数命名上，也是默认参数 for slice，特别注明的才是 for frame。

2）FFMpeg中并行解码部分稍显混乱，264与265共用了部分上层框架，但264的并行解码又有若干问题，间接影响了265。

3）openHEVC的并行解码代码就清晰不少，FFMpeg中并行解码部分大概同步到openHEVC 2013年10月提交，后面可能因为框架原因，没有再同步。

1 validate_thread_parameters

设置active_thread_type 对应并行级别

/** * Set the threading algorithms used.             * * Threading requires more than one thread. * Frame threading requires entire frames to be passed to the codec, * and introduces extra decoding delay, so is incompatible with low_delay. * * @param avctx The context. */static void validate_thread_parameters(AVCodecContext *avctx){    //! 帧级并行支持标记    int frame_threading_supported = (avctx->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS)                                && !(avctx->flags  & AV_CODEC_FLAG_TRUNCATED)                                && !(avctx->flags  & AV_CODEC_FLAG_LOW_DELAY)                                && !(avctx->flags2 & AV_CODEC_FLAG2_CHUNKS);    if (avctx->thread_count == 1) {                       ///< 多核支持        avctx->active_thread_type = 0;    } else if (frame_threading_supported && (avctx->thread_type & FF_THREAD_FRAME)) {   ///< codec设置        avctx->active_thread_type = FF_THREAD_FRAME;    } else if (avctx->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS &&               avctx->thread_type & FF_THREAD_SLICE) {    ///< slice级并行        avctx->active_thread_type = FF_THREAD_SLICE;    } else if (!(avctx->codec->capabilities & AV_CODEC_CAP_AUTO_THREADS)) {        avctx->thread_count       = 1;        avctx->active_thread_type = 0;    }                                                     ///< auto 已设置    if (avctx->thread_count > MAX_AUTO_THREADS)        av_log(avctx, AV_LOG_WARNING,               "Application has requested %d threads. Using a thread count greater than %d is not recommended.\n",               avctx->thread_count, MAX_AUTO_THREADS);}

.capabilities := AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS

12322 = 1<< 1 | 1<< 5 | 1<< 13 | 1<< 12

5 ff_thread_init pthread.c

int ff_thread_init(AVCodecContext *avctx){    validate_thread_parameters(avctx);      ///< 参1 设置avctx->active_thread_type    if (avctx->active_thread_type&FF_THREAD_SLICE)        return ff_slice_thread_init(avctx); ///< 参7    else if (avctx->active_thread_type&FF_THREAD_FRAME)        return ff_frame_thread_init(avctx); ///< 参6    return 0;}

avcodec_open2

-ff_thread_init
--ff_slice_thread_initactive_thread_type & FF_THREAD_SLICE
--ff_frame_thread_initactive_thread_type & FF_THREAD_FRAME

从pthread.c中分离出frame, slice级别的代码，独立为pthread_frame.c和pthread_slice.c。

6 ff_frame_thread_init pthread_frame.c

int ff_frame_thread_init(AVCodecContext *avctx){    int thread_count = avctx->thread_count;    const AVCodec *codec = avctx->codec;    AVCodecContext *src = avctx;    FrameThreadContext *fctx;    int i, err = 0;#if HAVE_W32THREADS    w32thread_init();#endif    if (!thread_count) {         ///< 初始化codec阶段未设置thread_count或者设置0，则根据cpu数适配        int nb_cpus = av_cpu_count();        if ((avctx->debug & (FF_DEBUG_VIS_QP | FF_DEBUG_VIS_MB_TYPE)) || avctx->debug_mv)            nb_cpus = 1;        // use number of cores + 1 as thread count if there is more than one        if (nb_cpus > 1)            thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS);        else            thread_count = avctx->thread_count = 1;    }    if (thread_count <= 1) {///< 不使用并行        avctx->active_thread_type = 0;        return 0;    }    avctx->internal->thread_ctx = fctx = av_mallocz(sizeof(FrameThreadContext));///< frame thread context    if (!fctx)        return AVERROR(ENOMEM);    fctx->threads = av_mallocz_array(thread_count, sizeof(PerThreadContext));///< 初始化线程池: thread_count    if (!fctx->threads) {        av_freep(&avctx->internal->thread_ctx);        return AVERROR(ENOMEM);    }    pthread_mutex_init(&fctx->buffer_mutex, NULL);    fctx->delaying = 1;    for (i = 0; i < thread_count; i++) {        AVCodecContext *copy = av_malloc(sizeof(AVCodecContext));        PerThreadContext *p  = &fctx->threads[i];        pthread_mutex_init(&p->mutex, NULL);        pthread_mutex_init(&p->progress_mutex, NULL);        pthread_cond_init(&p->input_cond, NULL);        pthread_cond_init(&p->progress_cond, NULL);        pthread_cond_init(&p->output_cond, NULL);        p->frame = av_frame_alloc();        if (!p->frame) {            av_freep(&copy);            err = AVERROR(ENOMEM);            goto error;        }        p->parent = fctx;        p->avctx  = copy;        if (!copy) {            err = AVERROR(ENOMEM);            goto error;        }        *copy = *src;        copy->internal = av_malloc(sizeof(AVCodecInternal));        if (!copy->internal) {            copy->priv_data = NULL;            err = AVERROR(ENOMEM);            goto error;        }        *copy->internal = *src->internal;        copy->internal->thread_ctx = p;        copy->internal->pkt = &p->avpkt;        if (!i) {            src = copy;            if (codec->init)                err = codec->init(copy);            update_context_from_thread(avctx, copy, 1);///< 更新下一个线程的AVCodecContext参考线程的上下文        } else {            copy->priv_data = av_malloc(codec->priv_data_size);            if (!copy->priv_data) {                err = AVERROR(ENOMEM);                goto error;            }            memcpy(copy->priv_data, src->priv_data, codec->priv_data_size);            copy->internal->is_copy = 1;            if (codec->init_thread_copy)                err = codec->init_thread_copy(copy);        }        if (err) goto error;        err = AVERROR(pthread_create(&p->thread, NULL, frame_worker_thread, p));///< frame级解码线程        p->thread_init= !err;        if(!p->thread_init)            goto error;    }    return 0;error:    ff_frame_thread_free(avctx, i+1);    return err;}

7 ff_slice_thread_init pthread_slice.c

int ff_slice_thread_init(AVCodecContext *avctx){    int i;    SliceThreadContext *c;    int thread_count = avctx->thread_count;#if HAVE_W32THREADS    w32thread_init();#endif    // We cannot do this in the encoder init as the threads are created before    if (av_codec_is_encoder(avctx->codec) &&        avctx->codec_id == AV_CODEC_ID_MPEG1VIDEO &&        avctx->height > 2800)        thread_count = avctx->thread_count = 1;    if (!thread_count) {///< 若thread_count为0 则根据cpu数适配        int nb_cpus = av_cpu_count();        if  (avctx->height)            nb_cpus = FFMIN(nb_cpus, (avctx->height+15)/16);        // use number of cores + 1 as thread count if there is more than one        if (nb_cpus > 1)///< cores + 1, 16(264的原因限制)            thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS);        else            thread_count = avctx->thread_count = 1;    }    if (thread_count <= 1) {        avctx->active_thread_type = 0;        return 0;    }    c = av_mallocz(sizeof(SliceThreadContext));///< slice thread context    if (!c)        return -1;    c->workers = av_mallocz_array(thread_count, sizeof(pthread_t));    if (!c->workers) {        av_free(c);        return -1;    }    avctx->internal->thread_ctx = c;    c->current_job = 0;    c->job_count = 0;    c->job_size = 0;    c->done = 0;    pthread_cond_init(&c->current_job_cond, NULL);    pthread_cond_init(&c->last_job_cond, NULL);    pthread_mutex_init(&c->current_job_lock, NULL);    pthread_mutex_lock(&c->current_job_lock);    for (i=0; i<thread_count; i++) {        if(pthread_create(&c->workers[i], NULL, worker, avctx)) {///< worker线程创建           avctx->thread_count = i;           pthread_mutex_unlock(&c->current_job_lock);           ff_thread_free(avctx);           return -1;        }    }    thread_park_workers(c, thread_count);     ///< 设置thread_count(成功创建的worker现程数)    avctx->execute = thread_execute;    avctx->execute2 = thread_execute2;    return 0;}

openHEVC中，frame，slice thread 参数分开，更清晰！

8 submit_packet

主线程将packet submit给解码线程 frame worker thread。线程的异步操作，用到条件变量和互斥量。

//! 异步通信frame thread p解码packet avpkt     PerThreadContext* 存储threads' contextstatic int submit_packet(PerThreadContext *p, AVPacket *avpkt){    FrameThreadContext *fctx = p->parent;    PerThreadContext *prev_thread = fctx->prev_thread;    const AVCodec *codec = p->avctx->codec;    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))        return 0;    pthread_mutex_lock(&p->mutex);    release_delayed_buffers(p);    if (prev_thread) {        int err;        if (prev_thread->state == STATE_SETTING_UP) {            pthread_mutex_lock(&prev_thread->progress_mutex);            while (prev_thread->state == STATE_SETTING_UP)                pthread_cond_wait(&prev_thread->progress_cond, &prev_thread->progress_mutex);            pthread_mutex_unlock(&prev_thread->progress_mutex);        }        err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);  ///< prev_thread状态变更（setup->finished），则更新context        if (err) {            pthread_mutex_unlock(&p->mutex);            return err;        }    }    av_packet_unref(&p->avpkt);    av_packet_ref(&p->avpkt, avpkt);    p->state = STATE_SETTING_UP;    pthread_cond_signal(&p->input_cond);    ///< 发送input packet完成准备的信号 -> frame解码线程    pthread_mutex_unlock(&p->mutex);    /*     * If the client doesn't have a thread-safe get_buffer(),     * then decoding threads call back to the main thread,     * and it calls back to the client here.     */    if (!p->avctx->thread_safe_callbacks && (         p->avctx->get_format != avcodec_default_get_format ||         p->avctx->get_buffer2 != avcodec_default_get_buffer2)) {        while (p->state != STATE_SETUP_FINISHED && p->state != STATE_INPUT_READY) {            int call_done = 1;            pthread_mutex_lock(&p->progress_mutex);            while (p->state == STATE_SETTING_UP)                pthread_cond_wait(&p->progress_cond, &p->progress_mutex);   ///< 等待解码线程的progress_cond                                                                            ///< 使用通用接口get_buffer()获取            switch (p->state) {            case STATE_GET_BUFFER:                p->result = ff_get_buffer(p->avctx, p->requested_frame, p->requested_flags);                break;            case STATE_GET_FORMAT:                p->result_format = ff_get_format(p->avctx, p->available_formats);                break;            default:                call_done = 0;                break;            }            if (call_done) {                p->state  = STATE_SETTING_UP;                pthread_cond_signal(&p->progress_cond);            }            pthread_mutex_unlock(&p->progress_mutex);        }    }    fctx->prev_thread = p;    fctx->next_decoding++;    return 0;}

阅读全文

0 0