【FFmpeg学习笔记006】 PCM编码为AAC

来源：互联网发布：80端口被攻击编辑：程序博客网时间：2024/06/06 02:24

PCM为音频采样数据，AAC为压缩编码数据。FFmepg音频编码器可以实现将PCM转换为AAC。先了解一下PCM和AAC。
PCM:PCM（Pulse Code Modulation—-脉码调制录音)。所谓PCM录音就是将声音等模拟信号变成符号化的脉冲列，再予以记录。PCM信号是由[1]、[0]等符号构成的数字信号，而未经过任何编码和压缩处理。与模拟信号比，它不易受传送系统的杂波及失真的影响。动态范围宽，可得到音质相当好的影响效果。
AAC: AAC（ Advanced Audio Coding—-高级音频编码），是一种专为声音数据设计的文件压缩格式。与MP3不同，它采用了全新的算法进行编码，更加高效，具有更高的“性价比”。利用AAC格式，可使人感觉声音质量没有明显降低的前提下，更加小巧。
简单来说：PCM是没有压缩的编码方式，AAC是使用算法编码压缩格式。
AAC有两种封装格式，分别是ADIF ADTS，多数流媒体一般使用ADTS格式。参考AAC格式简介。
FFmpeg数据结构

AVCodecContext
AVCodec
AVCodecID
AVFrame
AVPacket

对PCM文件的读写直接使用FILE文件指针。
AVCodec是一个编码器，可以单纯的理解为一个编解码算法的结构。
AVCodecContext是AVCodec的一个上下文，打个比如，在视频编码h264时，有i p b三种帧，如果有一个视频流是 I B B P这种顺序到达，由于B帧需要依靠前后的帧来计算出本帧现实的内容，所有需要一些buffer保存一些，以根据这些来计算出B帧的内容，当然还有很多其他的内容。
AVCodecID是编码器的ID，如编码AAC是，就使用AV_CODEC_ID_AAC。
AVFrame 是编码前、解码后保存的数据。
AVPacket是编码后、解码前保存的数据。
关于官方定义的AVFrame:

typedef struct AVFrame {  #define AV_NUM_DATA_POINTERS 8      /**      * pointer to the picture/channel planes.      * This might be different from the first allocated byte      *      * Some decoders access areas outside 0,0 - width,height, please      * see avcodec_align_dimensions2(). Some filters and swscale can read      * up to 16 bytes beyond the planes, if these filters are to be used,      * then 16 extra bytes must be allocated.      *      * NOTE: Except for hwaccel formats, pointers not needed by the format      * MUST be set to NULL.      */      uint8_t *data[AV_NUM_DATA_POINTERS];      /**      * For video, size in bytes of each picture line.      * For audio, size in bytes of each plane.      *      * For audio, only linesize[0] may be set. For planar audio, each channel      * plane must be the same size.      *      * For video the linesizes should be multiples of the CPUs alignment      * preference, this is 16 or 32 for modern desktop CPUs.      * Some code requires such alignment other code can be slower without      * correct alignment, for yet other it makes no difference.      *      * @note The linesize may be larger than the size of usable data -- there      * may be extra padding present for performance reasons.      */      int linesize[AV_NUM_DATA_POINTERS];      /**      * pointers to the data planes/channels.      *      * For video, this should simply point to data[].      *      * For planar audio, each channel has a separate data pointer, and      * linesize[0] contains the size of each channel buffer.      * For packed audio, there is just one data pointer, and linesize[0]      * contains the total size of the buffer for all channels.      *      * Note: Both data and extended_data should always be set in a valid frame,      * but for planar audio with more channels that can fit in data,      * extended_data must be used in order to access all channels.      */      uint8_t **extended_data;      ......其他成员  } AVFrame;

对于视频，目前比较流行的是H264压缩标准，好像没见过其他编码方式，而H264只能由YUV图像编码，也就是说H264解码后就是三个YUV分量，他们的数据会分别存在，data[0],data[1],data[2] ,而linesize[0],linesize[1],linesize[2]分别代表各个数据的长度。
对于音频，由于有多声道的音频，那么音频解码出来的数据不同声道也储存在不同的指针，如data[0]是左声道,data[1]是右声道，由于各个声道的数据长度是一样的，所以linesize[0]就代表了所有声道数据的长度。
成员extended_data则指向了data，是一个拓展，上面可以看到data 是包含8个指针的数组，也就是说对于音频，最多只支持8个声道。
进入正题，FFmpeg实现PCM编码为AAC。

#include <stdio.h>  #define __STDC_CONSTANT_MACROS  #ifdef _WIN32  //Windows  extern "C"  {  #include "libavcodec/avcodec.h"  #include "libavformat/avformat.h"  };  #else  //Linux...  #ifdef __cplusplus  extern "C"  {  #endif  #include <libavcodec/avcodec.h>  #include <libavformat/avformat.h>  #ifdef __cplusplus  };  #endif  #endif  int flush_encoder(AVFormatContext *fmt_ctx,unsigned int stream_index){      int ret;      int got_frame;      AVPacket enc_pkt;      if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &          CODEC_CAP_DELAY))          return 0;      while (1) {          enc_pkt.data = NULL;          enc_pkt.size = 0;          av_init_packet(&enc_pkt);          ret = avcodec_encode_audio2 (fmt_ctx->streams[stream_index]->codec, &enc_pkt,              NULL, &got_frame);          av_frame_free(NULL);          if (ret < 0)              break;          if (!got_frame){              ret=0;              break;          }          printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n",enc_pkt.size);          /* mux encoded frame */          ret = av_write_frame(fmt_ctx, &enc_pkt);          if (ret < 0)              break;      }      return ret;  }  int main(int argc, char* argv[])  {      AVFormatContext* pFormatCtx;      AVOutputFormat* fmt;      AVStream* audio_st;      AVCodecContext* pCodecCtx;      AVCodec* pCodec;      uint8_t* frame_buf;      AVFrame* pFrame;      AVPacket pkt;      int got_frame=0;      int ret=0;      int size=0;      FILE *in_file=NULL;                         //Raw PCM data      int framenum=1000;                          //Audio frame number      const char* out_file = "tdjm.aac";          //Output URL      int i;      in_file= fopen("tdjm.pcm", "rb");      av_register_all();      //Method 1.      pFormatCtx = avformat_alloc_context();      fmt = av_guess_format(NULL, out_file, NULL);      pFormatCtx->oformat = fmt;      //Method 2.      //avformat_alloc_output_context2(&pFormatCtx, NULL, NULL, out_file);      //fmt = pFormatCtx->oformat;      //Open output URL      if (avio_open(&pFormatCtx->pb,out_file, AVIO_FLAG_READ_WRITE) < 0){          printf("Failed to open output file!\n");          return -1;      }      audio_st = avformat_new_stream(pFormatCtx, 0);      if (audio_st==NULL){          return -1;      }      pCodecCtx = audio_st->codec;      pCodecCtx->codec_id = fmt->audio_codec;      pCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;      pCodecCtx->sample_fmt = AV_SAMPLE_FMT_S16;      pCodecCtx->sample_rate= 44100;      pCodecCtx->channel_layout=AV_CH_LAYOUT_STEREO;      pCodecCtx->channels = av_get_channel_layout_nb_channels(pCodecCtx->channel_layout);      pCodecCtx->bit_rate = 64000;        //Show some information      av_dump_format(pFormatCtx, 0, out_file, 1);      pCodec = avcodec_find_encoder(pCodecCtx->codec_id);      if (!pCodec){          printf("Can not find encoder!\n");          return -1;      }      if (avcodec_open2(pCodecCtx, pCodec,NULL) < 0){          printf("Failed to open encoder!\n");          return -1;      }      pFrame = av_frame_alloc();      pFrame->nb_samples= pCodecCtx->frame_size;      pFrame->format= pCodecCtx->sample_fmt;      size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,pCodecCtx->frame_size,pCodecCtx->sample_fmt, 1);      frame_buf = (uint8_t *)av_malloc(size);      avcodec_fill_audio_frame(pFrame, pCodecCtx->channels, pCodecCtx->sample_fmt,(const uint8_t*)frame_buf, size, 1);      //Write Header      avformat_write_header(pFormatCtx,NULL);      av_new_packet(&pkt,size);      for (i=0; i<framenum; i++){          //Read PCM          if (fread(frame_buf, 1, size, in_file) <= 0){              printf("Failed to read raw data! \n");              return -1;          }else if(feof(in_file)){              break;          }          pFrame->data[0] = frame_buf;  //PCM Data          pFrame->pts=i*100;          got_frame=0;          //Encode          ret = avcodec_encode_audio2(pCodecCtx, &pkt,pFrame, &got_frame);          if(ret < 0){              printf("Failed to encode!\n");              return -1;          }          if (got_frame==1){              printf("Succeed to encode 1 frame! \tsize:%5d\n",pkt.size);              pkt.stream_index = audio_st->index;              ret = av_write_frame(pFormatCtx, &pkt);              av_free_packet(&pkt);          }      }      //Flush Encoder      ret = flush_encoder(pFormatCtx,0);      if (ret < 0) {          printf("Flushing encoder failed\n");          return -1;      }      //Write Trailer      av_write_trailer(pFormatCtx);      //Clean      if (audio_st){          avcodec_close(audio_st->codec);          av_free(pFrame);          av_free(frame_buf);      }      avio_close(pFormatCtx->pb);      avformat_free_context(pFormatCtx);      fclose(in_file);      return 0;  }

主要函数详解：
av_register_all()：注册FFmpeg所有编解码器。
avformat_alloc_output_context2()：初始化输出码流的AVFormatContext。
avio_open()：打开输出文件。
av_new_stream()：创建输出码流的AVStream。
avcodec_find_encoder()：查找编码器。
avcodec_open2()：打开编码器。
avformat_write_header()：写文件头（对于某些没有文件头的封装格式，不需要此函数。比如说MPEG2TS）。
avcodec_encode_audio2()：编码音频。即将AVFrame（存储PCM采样数据）编码为AVPacket（存储AAC，MP3等格式的码流数据）。
av_write_frame()：将编码后的视频码流写入文件。
av_write_trailer()：写文件尾（对于某些没有文件头的封装格式，不需要此函数。比如说MPEG2TS）。
Reference：
http://blog.csdn.net/leixiaohua1020/article/details/25430449
http://blog.csdn.net/jammg/article/details/52684894
http://blog.csdn.net/leixiaohua1020/article/details/11822537
http://www.jianshu.com/p/1d1f893e53e9

阅读全文

0 0