iOS语音识别（科大讯飞SDK）

来源：互联网发布：cydia出现数据库黄字编辑：程序博客网时间：2024/05/20 09:22

最近在研究语音识别，前段时间使用了百度语音识别，最近公司又突然让转用科大讯飞的SDK，感觉有点坑。不过也算是学习了使用新的SDK了吧。在这里将集成SDK以及一些使用方法写一下。

第一步：获取APPid

先在科大讯飞语音识别开发者网站注册，获取APPid。

第二步：集成SDK

导入科大讯飞的iflyMSC.framework；然后再导入相关的依赖库：

第三步：初始化启动服务

在app delegate里面写入如下代码初始化

NSLog(@"%@",[IFlySettinggetVersion]);

//设置sdk的log等级，log保存在下面设置的工作路径中

[IFlySettingsetLogFile:LVL_ALL];

//打开输出在console的log开关

[IFlySettingshowLogcat:NO];

//设置sdk的工作路径

NSArray *paths =NSSearchPathForDirectoriesInDomains(NSCachesDirectory,NSUser DomainMask,YES);

NSString *cachePath = [pathsobjectAtIndex:0];

[IFlySettingsetLogFilePath:cachePath];

//创建语音配置,appid必须要传入，仅执行一次则可

NSString *initString = [[NSStringalloc] initWithFormat:@"appid=%@",APPID_VALUE];

//所有服务启动前，需要确保执行createUtility

[IFlySpeechUtilitycreateUtility:initString];

在使用的页面

- (void)viewWillAppear:(BOOL)animated

{

NSLog(@"%s",__func__);

[superviewWillAppear:animated];

[selfinitRecognizer];

}

- (void)viewWillDisappear:(BOOL)animated

{

NSLog(@"%s",__func__);

[superviewWillDisappear:animated];

[_iFlySpeechRecognizercancel];

[_iFlySpeechRecognizersetDelegate:nil];

[_iFlySpeechRecognizersetParameter:@""forKey:[IFlySpeechConstantPARAMS]];

}

//初始化识别参数

- (void)initRecognizer

{

NSLog(@"%s",__func__);

//单利模式无UI的实例

if (self.iFlySpeechRecognizer==nil) {

_iFlySpeechRecognizer=[IFlySpeechRecognizersharedInstance];

[_iFlySpeechRecognizersetParameter:@""forKey:[IFlySpeechConstantPARAMS]];

//设置听写模式

[_iFlySpeechRecognizersetParameter:@"iat"forKey:[IFlySpeechConstantIFLY_DOM AIN]];

}

_iFlySpeechRecognizer.delegate=self;

if (_iFlySpeechRecognizer!=nil) {

IATConfig *instance=[IATConfigsharedInstance];

//设置最长录音时间

[_iFlySpeechRecognizersetParameter:instance.speechTimeoutforKey:[IFlySpeechConstant SPEECH_TIMEOUT]];

//设置后端点

[_iFlySpeechRecognizersetParameter:instance.vadEosforKey:[IFlySpeechConstantVAD_EOS]];

//设置前端点

[_iFlySpeechRecognizersetParameter:instance.vadBosforKey:[IFlySpeechConstantVAD_BOS]];

//网络等待时间

[_iFlySpeechRecognizersetParameter:@"20000"forKey:[IFlySpeechConstantNET_TI MEOUT]];

//设置采样率，推荐16K

[_iFlySpeechRecognizersetParameter:IATConfig.lowSampleRateforKey:[IFlySpeechConstant SAMPLE_RATE]];

if ([instance.languageisEqualToString:[IATConfigchinese]]) {

//设置语言

[_iFlySpeechRecognizersetParameter:instance.languageforKey:[IFlySpeechConstant LANGUAGE]];

//设置方言

[_iFlySpeechRecognizersetParameter:instance.accentforKey:[IFlySpeechConstant ACCENT]];

}elseif([instance.languageisEqualToString:[IATConfigenglish]]){

[_iFlySpeechRecognizersetParameter:instance.languageforKey:[IFlySpeechConstant LANGUAGE]];

}

//设置是否返回标点符号

[_iFlySpeechRecognizersetParameter:instance.dotforKey:[IFlySpeechConstantAS R_PTT]];

}

- (void)startButtonClick

{

NSLog(@"start");

[_textViewsetText:@""];

[_textViewresignFirstResponder];

self.isCanceled=NO;

if (_iFlySpeechRecognizer==nil) {

[selfinitRecognizer];

}

[_iFlySpeechRecognizercancel];

//设置音频来源为麦克风

[_iFlySpeechRecognizersetParameter:IFLY_AUDIO_SOURCE_MICforKey:@"audio_source"];

//设置听说结果格式为json

[_iFlySpeechRecognizersetParameter:@"json"forKey:[IFlySpeechConstantRESULT_ TYPE]];

//保存录音文件，保存在sdk工作路径中，如未设置工作路径，则默认保存在library/cache下（为了测试音频流识别用的）

[_iFlySpeechRecognizersetParameter:@"asr.pcm"forKey:[IFlySpeechConstantASR_ AUDIO_PATH]];

[_iFlySpeechRecognizersetDelegate:self];

BOOL ret=[_iFlySpeechRecognizerstartListening];

if (ret) {

NSLog(@"启动成功");

}else{

[_popUpViewshowText:@"启动失败"];

}

//停止录音

- (void)stopButtonClick

{

NSLog(@"stop");

[_iFlySpeechRecognizerstopListening];

[_textViewresignFirstResponder];

}

//取消录音

- (void)cancelButtonClick

{

NSLog(@"cancel");

[_iFlySpeechRecognizercancel];

[_textViewresignFirstResponder];

}

回调方法

#pragma mark - IFlySpeechRecognizerDelegate

/**

音量回调函数

volume 0－30

****/

- (void) onVolumeChanged: (int)volume

{

if (self.isCanceled) {

[_popUpViewremoveFromSuperview];

return;

}

NSString * vol = [NSStringstringWithFormat:@"音量：%d",volume];

[_popUpViewshowText: vol];

}

/**

开始识别回调

****/

- (void) onBeginOfSpeech

{

NSLog(@"onBeginOfSpeech");

[_popUpViewshowText: @"正在录音"];

}

/**

停止录音回调

****/

- (void) onEndOfSpeech

{

NSLog(@"onEndOfSpeech");

[_popUpViewshowText: @"停止录音"];

}

/**

听写结束回调（注：无论听写是否正确都会回调）

error.errorCode =

0 听写正确

other 听写出错

****/

- (void)onError:(IFlySpeechError *)error

{

NSLog(@"%s",__func__);

NSString *text;

if (self.isCanceled) {

text=@"识别取消";

}elseif (error.errorCode==0){

if (self.result.length==0) {

text=@"无识别结果";

}else{

text=@"识别成功";

}

}else{

text=[NSStringstringWithFormat:@"发生错误：%d %@",error.errorCode,error.errorDesc];;

NSLog(@"%@",text);

}

/**

无界面，听写结果回调

results：听写结果

isLast：表示最后一次

****/

- (void)onResults:(NSArray *)results isLast:(BOOL)isLast

{

NSMutableString *resultString=[[NSMutableStringalloc]init];

NSDictionary *dic=results[0];

for (NSString *keyin dic) {

[resultString appendFormat:@"%@",key];

}

self.result=[NSStringstringWithFormat:@"%@%@",self.textView.text,resultString];

NSString *resultFromJson=[ISRDataHelperstringFromJson:resultString];

self.textView.text=[NSStringstringWithFormat:@"%@%@",_textView.text,resultFromJson];

if (isLast) {

NSLog(@"听说结果（json）:%@测试",self.result);

}

NSLog(@"_result=%@",_result);

NSLog(@"resultFromJson=%@",resultFromJson);

NSLog(@"isLast=%d,_textView.text=%@",isLast,_textView.text);

}

/**

听写取消回调

****/

- (void) onCancel

{

NSLog(@"识别取消");

}

对比百度语音识别的SDK，科大讯飞的SDK更新比较快速，百度语音识别的SDK最新版本还是15年更新的，并且给的实例Demo用的全是自动内存管理，文档上的集成方式也不一样，当时集成百度语音识别时候也出现各种问题，相对还是科大讯飞的使用起来简单粗暴，官方下载的Demo看起来也比较有条理。不过百度语音识别可以在边说话边识别出结果，而科大讯飞的必须要说完之后才能把结果全部返回。

0 0