关于《最简单的基于FFMPEG+SDL的音频播放器》记录

来源：互联网发布：大众排放门知乎编辑：程序博客网时间：2024/05/23 01:15

一、概述

之前的最简单的基于FFMPEG+SDL的视频播发器记录中，我们实现了播放视频的功能，但是还不能播放声音，这次我们就将实现声音的播放。为了减小难度，先只做一个音频播放器。在后续的文章中会在视频播放器中加入音频播放的功能。
同理，音频播放器的实现可以参考原作者的文章最简单的基于FFMPEG+SDL的音频播放器，这里仅仅记录自己的心得。

二、音频播放器流程

1、解码流程

音频的解码流程与视频的解码流程是类似的，只是解码音频的函数是：avcodec_decode_audio4，其余的就不再赘述，流程如下图所示：

2、SDL播放流程

SDL处理的大致流程如下图所示：

接下来对各个流程做一下简单的解释：

1）SDL_Init

初始化SDL库

2）设置输出音频参数

这一步我们将设置输出音频参数，例如：采样率、声道数、音频数据格式等等。所有的这些参数都在SDL_AudioSpec这个结构体中，SDL_AudioSpec的结构如下：

数据类型

属性

说明

int

freq

采样率，通常有11025, 22050, 44100 and 48000。数字越大，音质越好。

SDL_AudioFormat

format

音频数据格式，即每个采样的大小和类型，例如：AUDIO_S16SYS表示有符号16位

Uint8

channels

声道数，SDL2.0支持1(mono)，2(stereo)，4(quad)，6(5.1声道)四种值，声道数越多，立体效果越好。

Uint8

silence

静音值，例如用0代表静音。

Uint16

samples

音频缓冲大小 (power of 2)，ffplay为1024

Uint32

size

音频缓冲大小，以字节为单位

SDL_AudioCallback

callback

播放音频时的回调函数，参数为：userdata(编解码上下文),stream(音频数据),len(数据大小)

void*

userdata

回调函数需要的参数，一般为编解码上下文

其中声道数可以通过如下代码获取：

      //声道布局      //uint64_t out_channel_layout = AV_CH_LAYOUT_STEREO;//立体声 2uint64_t out_channel_layout = AV_CH_LAYOUT_MONO;//单声道 1int out_channels = av_get_channel_layout_nb_channels(out_channel_layout);//根据通道布局类型获取通道数

一个设置输出音频参数的例子：

        SDL_AudioSpec wanted_spec;wanted_spec.freq = 44100;//CD音质wanted_spec.format = AUDIO_S16SYS;wanted_spec.channels = out_channels;wanted_spec.silence = 0; wanted_spec.samples = 1024;wanted_spec.callback = fill_audio;//需要自己实现它wanted_spec.userdata = pCodecCtx_audio;//用户数据，它将提供给回调函数使用，这里即编解码上下文

其中fill_audio的实现如下：

/* The audio function callback takes the following parameters:* stream: A pointer to the audio buffer to be filled* len: The length (in bytes) of the audio buffer*/void  fill_audio(void *udata, Uint8 *stream, int len){//SDL 2.0  SDL_memset(stream, 0, len);if (audio_len == 0)        /*  Only  play  if  we  have  data  left  */return;len = (len>audio_len ? audio_len : len);   /*  Mix  as  much  data  as  possible  */SDL_MixAudio(stream, audio_pos, len, SDL_MIX_MAXVOLUME);audio_pos += len;audio_len -= len;}

3）SDL_OpenAudio

打开音频设备。该函数原型如下：

int SDL_OpenAudio(SDL_AudioSpec* desired, SDL_AudioSpec* obtained)

参数：

desired

期望输出的格式

obtained

实际得到的格式，可以手动设置为NULL

这个函数以期望的格式打开音频设备，并将实际的硬件参数填充到obtained中。如果obtained是NULL，那么必须保证传递给回调函数的音频数据必须是期望的格式，而且必要时会自动转换成实际的音频格式。如果obtained是NULL，desired会修改字段的值。这就是为什么我们后面必须对音频数据格式进行转换。

此外SDL_OpenAudio默认打开的是1号音频设备，如果想指定设备，请使用SDL_OpenAudioDevice。

4） swr_alloc_set_opts

设置转换格式的参数。代码如下：

//获取声道布局pCodecCtx_audio->channel_layout = av_get_default_channel_layout(pCodecCtx_audio->channels);//Swr  struct SwrContext *au_convert_ctx;au_convert_ctx = swr_alloc();au_convert_ctx = swr_alloc_set_opts(au_convert_ctx, out_channel_layout, out_sample_fmt, out_sample_rate,pCodecCtx_audio->channel_layout, pCodecCtx_audio->sample_fmt, pCodecCtx_audio->sample_rate, 0, NULL);

5）swr_init

初始化SWR上下文。

6）swr_convert

对解码后的数据帧进行数据格式转换。

7）SDL_PauseAudio

播放音频。

三、源代码

开发环境：VS 2013

#include <stdio.h>#include<stdlib.h>#include<string.h>//包含库extern "C"{#include "libavcodec/avcodec.h"#include "libavformat/avformat.h"#include "libswscale/swscale.h"#include "SDL2/SDL.h" #include "libswresample/swresample.h"  };#define MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio  //音频相关//Buffer:  //|-----------|-------------|  //chunk-------pos---len-----|  static  Uint8  *audio_chunk;static  Uint32  audio_len;static  Uint8  *audio_pos;/* The audio function callback takes the following parameters:* stream: A pointer to the audio buffer to be filled* len: The length (in bytes) of the audio buffer*/void  fill_audio(void *udata, Uint8 *stream, int len){//SDL 2.0  SDL_memset(stream, 0, len);if (audio_len == 0)        /*  Only  play  if  we  have  data  left  */return;len = (len>audio_len ? audio_len : len);   /*  Mix  as  much  data  as  possible  */SDL_MixAudio(stream, audio_pos, len, SDL_MIX_MAXVOLUME);audio_pos += len;audio_len -= len;}int main(int argc, char* argv[]){//FFmpeg相关变量  AVFormatContext *pFormatCtx;//AVFormatContext主要存储视音频封装格式中包含的信息  unsigned             i;int             audioindex;//音频流所在序号AVCodecContext  *pCodecCtx_audio;//AVCodecContext，存储该音频流使用解码方式的相关数据AVCodec         *pCodec_audio;//音频解码器 AVFrame *pFrame_audio;AVPacket packet;//解码前数据char* filepath = "1.mp3";//输入文件av_register_all();//初始化libformat库和注册编解码器avformat_network_init();//初始化网络组件  pFormatCtx = avformat_alloc_context();//打开视频文件然后读取头部信息到pFormatCtx  if (avformat_open_input(&pFormatCtx, filepath, NULL, NULL) != 0){printf("Couldn't open input stream.\n");return -1;}//获取流信息  if (avformat_find_stream_info(pFormatCtx, NULL) < 0){printf("Couldn't find stream information.\n");return -1;}audioindex = -1;//找到音频流的序号  for (i = 0; i < pFormatCtx->nb_streams; i++)if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO){audioindex = i;break;}if (audioindex == -1){printf("Didn't find a audio stream.\n");return -1;}//音频pCodecCtx_audio = pFormatCtx->streams[audioindex]->codec;//获取解码环境  pCodec_audio = avcodec_find_decoder(pCodecCtx_audio->codec_id);//获取解码器  if (pCodec_audio == NULL){printf("Codec not found.\n");return -1;}//打开解码器 if (avcodec_open2(pCodecCtx_audio, pCodec_audio, NULL) < 0){printf("Could not open codec.\n");return -1;}//设置输出的音频参数  int out_nb_samples = 1024;//单个通道样本个数AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;//采样格式int out_sample_rate = 44100;//输出时采样率,CD一般为44100HZ//声道布局//uint64_t out_channel_layout = AV_CH_LAYOUT_STEREO;//立体声//uint64_t out_channel_layout = AV_CH_LAYOUT_MONO;//单声道uint64_t out_channel_layout = AV_CH_LAYOUT_5POINT1;int out_channels = av_get_channel_layout_nb_channels(out_channel_layout);//根据通道布局类型获取通道数//根据通道数、样本个数、采样格式分配内存int out_buffer_size = av_samples_get_buffer_size(NULL, out_channels, out_nb_samples, out_sample_fmt, 1);uint8_t *out_buffer_audio = (uint8_t *)av_malloc(MAX_AUDIO_FRAME_SIZE * 2);//*2是保证输出缓存大于输入数据大小pFrame_audio = av_frame_alloc();//SDL Begin----------------------------  //初始化SDL库if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {printf("Could not initialize SDL - %s\n", SDL_GetError());return -1;}//音频部分//设置输出参数//SDL_AudioSpec  //包含了我们将要输出的音频的所有信息。SDL_AudioSpec wanted_spec;wanted_spec.freq = out_sample_rate;//采样率,即播放速度wanted_spec.format = AUDIO_S16SYS;//音频数据格式。在“S16SYS”中的S 表示有符号的signed，                                  //16 表示每个样本是16 位长的，SYS 表示大小头的顺序是与使用的系统相同的。                              //这些格式是由 avcodec_decode_audio 为我们给出来的输入音频的格式wanted_spec.channels = out_channels;//声道数wanted_spec.silence = 0; //静音值，因为数据是有符号数，所以0表示静音wanted_spec.samples = out_nb_samples;//这是当我们想要更多声音的时候，我们想让SDL 给出来的声音缓冲                                     //区的尺寸。一个比较合适的值在512 到8192 之间；ffplay 使用1024。wanted_spec.callback = fill_audio;//回调函数，向声音缓冲out_buffer_audio填入一个特定的数量的字节。wanted_spec.userdata = pCodecCtx_audio;//用户数据，它将提供给回调函数使用，这里即编解码上下文//打开音频if (SDL_OpenAudio(&wanted_spec, NULL)<0){printf("can't open audio.\n");return -1;}//SDL End------------------------  //音频uint32_t len = 0;int got_picture_audio;int index = 0; //获取声道布局pCodecCtx_audio->channel_layout = av_get_default_channel_layout(pCodecCtx_audio->channels);//Swr  struct SwrContext *au_convert_ctx;au_convert_ctx = swr_alloc();au_convert_ctx = swr_alloc_set_opts(au_convert_ctx, out_channel_layout, out_sample_fmt, out_sample_rate,pCodecCtx_audio->channel_layout, pCodecCtx_audio->sample_fmt, pCodecCtx_audio->sample_rate, 0, NULL);swr_init(au_convert_ctx);while (av_read_frame(pFormatCtx, &packet) >= 0){//读取下一帧数据  if (packet.stream_index == audioindex)//如果是音频流帧{if (avcodec_decode_audio4(pCodecCtx_audio, pFrame_audio, &got_picture_audio, &packet) < 0) {printf("Error in decoding audio frame.\n");return -1;}if (got_picture_audio > 0){swr_convert(au_convert_ctx, &out_buffer_audio, MAX_AUDIO_FRAME_SIZE, (const uint8_t **)pFrame_audio->data, pFrame_audio->nb_samples);//FIX:FLAC,MP3,AAC Different number of samples  if (wanted_spec.samples != pFrame_audio->nb_samples){SDL_CloseAudio();out_nb_samples = pFrame_audio->nb_samples;out_buffer_size = av_samples_get_buffer_size(NULL, out_channels, out_nb_samples, out_sample_fmt, 1);wanted_spec.samples = out_nb_samples;SDL_OpenAudio(&wanted_spec, NULL);}//Set audio buffer (PCM data)  audio_chunk = (Uint8 *)out_buffer_audio;//Audio buffer length  audio_len = out_buffer_size;audio_pos = audio_chunk;//Play  SDL_PauseAudio(0);while (audio_len > 0)//Wait until finish  SDL_Delay(1);//ms}}av_free_packet(&packet);}swr_free(&au_convert_ctx);SDL_CloseAudio();//Close SDL SDL_Quit();//退出SDLav_free(out_buffer_audio);av_frame_free(&pFrame_audio);avcodec_close(pCodecCtx_audio);//释放解码器环境  avformat_close_input(&pFormatCtx);//释放输入环境  return 0;}

0 0