iOS语音书写功能(语音转文本)
来源:互联网 发布:越南看电视的软件 编辑:程序博客网 时间:2024/05/04 10:41
Demo下载地址
最近在项目开发中,需要将语音识别转换成文本的功能。研究了下科大讯飞,附上Demo分享给大家。
研发前先得做一些准备。
1、注册科大讯飞开发者帐号(http://www.xfyun.cn)
2、下载开发平台(iOS、或android,或其他)所需要的SDK(SDK包含:说明文档、SDK即iflyMSC.framework、Demo)
3、项目中添加SDK(添加时,先将SDK复制粘贴到项目文件,再通过addframe的方法添加到项目引用),及相关联的framework
添加方法:TARGETS-Build Phases-Link Binary With Libraries-"+"-Choose frameworks and libraries to add-add other,或选择对应的framework-add
4、使用时要添加对应的头文件
特别说明:
1、使用SDK关联的APPID存在于下载的Demo中,如果SDK有替换的话APPID应该跟着一起替换。
2、添加其他framework:
libz.tbd
libc++.tbd
CoreGraphics.framework
QuartzCore.framework
AddressBook.framework
CoreLocation.framework
UIKit.framework
AudioToolbox.framework
Foundation.framework
SystemConfiguration.framework
AVFoundation.framework
CoreTelephoney.framework
3、Bitcode属性设置为NO(TARGETS-Build Settings-Build Options-Enable Bitcode-NO)
4、在使用前,务必在AppDelegate的方法中"
- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {}"进行初始化操作。
5、需要有网络的情况下才能使用。
如图
下载的科大讯飞SDK文件
Demo中的APPID
添加SDK,及添加关联framework
设置Bitcode属性为 NO
语音转文件实现代码
.h文件#import <Foundation/Foundation.h>// 导入头文件#import "iflyMSC.framework/Headers/IFlyMSC.h"#import "iflyMSC.framework/Headers/IFlySpeechUtility.h"#import "iflyMSC/IFlySpeechConstant.h"#pragma mark - 初始化参数类/**************************************************************************/@interface IATConfig : NSObject+ (IATConfig *)sharedInstance;+ (NSString *)mandarin;+ (NSString *)cantonese;+ (NSString *)henanese;+ (NSString *)chinese;+ (NSString *)english;+ (NSString *)lowSampleRate;+ (NSString *)highSampleRate;+ (NSString *)isDot;+ (NSString *)noDot;/** 以下参数,需要通过 iFlySpeechRecgonizer 进行设置 */@property (nonatomic, strong) NSString *speechTimeout;@property (nonatomic, strong) NSString *vadEos;@property (nonatomic, strong) NSString *vadBos;@property (nonatomic, strong) NSString *language;@property (nonatomic, strong) NSString *accent;@property (nonatomic, strong) NSString *dot;@property (nonatomic, strong) NSString *sampleRate;/** 以下参数无需设置 不必关 */@property (nonatomic, assign) BOOL haveView;@property (nonatomic, strong) NSArray *accentIdentifer;@property (nonatomic, strong) NSArray *accentNickName;@end/**************************************************************************/#pragma mark - 语音听写类@interface VoiceConversion : NSObject/// 启动初始化语音程序+ (void)VoiceInitialize;/// 开始录音- (void)voiceStart:(void (^)(BOOL isStart))startListening speechBegin:(void (^)(void))begin speechEnd:(void (^)(void))end speechError:(void (^)(BOOL isSuccess))error speechResult:(void (^)(NSString *text))result speechVolume:(void (^)(int volume))volume;/// 取消录音- (void)voiceCancel;/// 停止录音- (void)voiceStop;@end
.m文件#import "VoiceConversion.h"#pragma mark - 初始化参数类/**************************************************************************/static NSString *const PUTONGHUA = @"mandarin";static NSString *const YUEYU = @"cantonese";static NSString *const HENANHUA = @"henanese";static NSString *const ENGLISH = @"en_us";static NSString *const CHINESE = @"zh_cn";@implementation IATConfig- (id)init{ self = [super init]; if (self) { [self defaultSetting]; return self; } return nil;}+ (IATConfig *)sharedInstance{ static IATConfig * instance = nil; static dispatch_once_t predict; dispatch_once(&predict, ^{ instance = [[IATConfig alloc] init]; }); return instance;}- (void)defaultSetting{ _speechTimeout = @"30000"; _vadEos = @"3000"; _vadBos = @"3000"; _dot = @"1"; _sampleRate = @"16000"; _language = CHINESE; _accent = PUTONGHUA; _haveView = NO;//默认是不dai界面的 _accentNickName = [[NSArray alloc] initWithObjects:@"粤语", @"普通话", @"河南话", @"英文", nil];}+ (NSString *)mandarin{ return PUTONGHUA;}+ (NSString *)cantonese{ return YUEYU;}+ (NSString *)henanese{ return HENANHUA;}+ (NSString *)chinese{ return CHINESE;}+ (NSString *)english{ return ENGLISH;}+ (NSString *)lowSampleRate{ return @"8000";}+ (NSString *)highSampleRate{ return @"16000";}+ (NSString *)isDot{ return @"1";}+ (NSString *)noDot{ return @"0";}@end/**************************************************************************/#pragma mark - 语音听写类static NSString *const VoiceAPPID = @"572016e4";static NSString *const VoiceTimeOut = @"20000";@interface VoiceConversion () <IFlySpeechRecognizerDelegate>@property (nonatomic, strong) NSMutableString *resultText;@property (nonatomic, strong) IFlySpeechRecognizer *iFlySpeechRecognizer;@property (nonatomic, copy) void (^beginSpeech)(void);@property (nonatomic, copy) void (^endSpeech)(void);@property (nonatomic, copy) void (^errorSpeech)(BOOL isSuccess);@property (nonatomic, copy) void (^resultSpeech)(NSString *text);@property (nonatomic, copy) void (^volumeSpeech)(int volume);@end@implementation VoiceConversion#pragma mark 初始化------------/// 启动初始化语音程序+ (void)VoiceInitialize{ // 设置sdk的log等级,log保存在下面设置的工作路径中 [IFlySetting setLogFile:LVL_ALL]; // 打开输出在console的log开关 [IFlySetting showLogcat:YES]; // 设置sdk的工作路径 NSArray *paths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES); NSString *cachePath = [paths objectAtIndex:0]; [IFlySetting setLogFilePath:cachePath]; // Appid是应用的身份信息,具有唯一性,初始化时必须要传入Appid。初始化是一个异步过程,可放在 App 启动时执行初始化,具体代码可以参 照 Demo 的 MSCAppDelegate.m。未初始化时使用服务,一般会返回错误码 10111. NSString *initString = [[NSString alloc] initWithFormat:@"appid=%@", VoiceAPPID]; [IFlySpeechUtility createUtility:initString];}#pragma mark 实例化------------- (void)dealloc{ [self voiceCancel];}- (NSMutableString *)resultText{ if (!_resultText) { _resultText = [[NSMutableString alloc] init]; } return _resultText;}- (IFlySpeechRecognizer *)iFlySpeechRecognizer{ if (_iFlySpeechRecognizer == nil) { _iFlySpeechRecognizer = [IFlySpeechRecognizer sharedInstance]; [_iFlySpeechRecognizer setParameter:@"" forKey:[IFlySpeechConstant PARAMS]]; // 设置听写模式 [_iFlySpeechRecognizer setParameter:@"iat" forKey:[IFlySpeechConstant IFLY_DOMAIN]]; } return _iFlySpeechRecognizer;}- (void)initializeVoice{ self.iFlySpeechRecognizer.delegate = self; IATConfig *instance = [IATConfig sharedInstance]; // 设置最长录音时间 [self.iFlySpeechRecognizer setParameter:instance.speechTimeout forKey:[IFlySpeechConstant SPEECH_TIMEOUT]]; // 设置后端点 [self.iFlySpeechRecognizer setParameter:instance.vadEos forKey:[IFlySpeechConstant VAD_EOS]]; // 设置前端点 [self.iFlySpeechRecognizer setParameter:instance.vadBos forKey:[IFlySpeechConstant VAD_BOS]]; // 网络等待时间 [self.iFlySpeechRecognizer setParameter:@"20000" forKey:[IFlySpeechConstant NET_TIMEOUT]]; // 设置采样率,推荐使用16K [self.iFlySpeechRecognizer setParameter:instance.sampleRate forKey:[IFlySpeechConstant SAMPLE_RATE]]; if ([instance.language isEqualToString:[IATConfig chinese]]) { // 设置语言 [self.iFlySpeechRecognizer setParameter:instance.language forKey:[IFlySpeechConstant LANGUAGE]]; // 设置方言 [self.iFlySpeechRecognizer setParameter:instance.accent forKey:[IFlySpeechConstant ACCENT]]; } else if ([instance.language isEqualToString:[IATConfig english]]) { [self.iFlySpeechRecognizer setParameter:instance.language forKey:[IFlySpeechConstant LANGUAGE]]; } // 设置是否返回标点符号 [self.iFlySpeechRecognizer setParameter:instance.dot forKey:[IFlySpeechConstant ASR_PTT]];}#pragma mark 语音听写方法------------/// 开始录音- (void)voiceStart:(void (^)(BOOL isStart))startListening speechBegin:(void (^)(void))begin speechEnd:(void (^)(void))end speechError:(void (^)(BOOL isSuccess))error speechResult:(void (^)(NSString *text))result speechVolume:(void (^)(int volume))volume{ [self.resultText setString:@""]; // 回调设置 self.beginSpeech = [begin copy]; self.endSpeech = [end copy]; self.errorSpeech = [error copy]; self.resultSpeech = [result copy]; self.volumeSpeech = [volume copy]; // 初始化设置 [self initializeVoice]; [self.iFlySpeechRecognizer cancel]; // 设置音频来源为麦克风 [self.iFlySpeechRecognizer setParameter:IFLY_AUDIO_SOURCE_MIC forKey:@"audio_source"]; // 设置听写结果格式为json [self.iFlySpeechRecognizer setParameter:@"json" forKey:[IFlySpeechConstant RESULT_TYPE]]; // 保存录音文件,保存在sdk工作路径中,如未设置工作路径,则默认保存在library/cache下 [self.iFlySpeechRecognizer setParameter:@"asr.pcm" forKey:[IFlySpeechConstant ASR_AUDIO_PATH]]; BOOL isStart = [self.iFlySpeechRecognizer startListening]; if (startListening) { // 如果开始录音失败,可能是上次请求未结束,暂不支持多路并发 startListening(isStart); }}/// 取消听写- (void)voiceCancel{ [self.iFlySpeechRecognizer cancel];}/// 停止录音- (void)voiceStop{ [self.iFlySpeechRecognizer stopListening];}#pragma mark IFlySpeechRecognizerDelegate------------/** 识别结果返回代理 @param :results识别结果 @ param :isLast 表示是否最后一次结果 */- (void)onResults:(NSArray *)results isLast:(BOOL)isLast{ NSMutableString *resultString = [[NSMutableString alloc] init]; NSDictionary *dic = results[0]; for (NSString *key in dic) { [resultString appendFormat:@"%@",key]; } NSString *resultFromJson = [[self class] stringFromJson:resultString]; NSString *resultTextTemp = [NSString stringWithFormat:@"%@%@", self.resultText, resultFromJson]; [self.resultText setString:resultTextTemp]; if (isLast) { if (self.resultSpeech) { // 去掉最后一个句号 NSRange range = [self.resultText rangeOfString:@"。" options:NSBackwardsSearch]; if (range.location != NSNotFound) { resultTextTemp = [self.resultText substringToIndex:range.location]; [self.resultText setString:resultTextTemp]; } self.resultSpeech(self.resultText); } } [self voiceCancel];}/** 识别会话结束返回代理 @ param error 错误码,error.errorCode=0表示正常结束,非0表示发生错误。 */- (void)onError:(IFlySpeechError *)error{ if (self.errorSpeech) { BOOL isSuccess = (0 == error.errorCode); self.errorSpeech(isSuccess); }}/** 停止录音回调 */- (void)onEndOfSpeech{ if (self.endSpeech) { self.endSpeech(); }}/** 开始识别回调 */- (void)onBeginOfSpeech{ if (self.beginSpeech) { self.beginSpeech(); }}/** 音量回调函数 volume 0-30 */- (void)onVolumeChanged:(int)volume{ if (self.volumeSpeech) { self.volumeSpeech(volume); }}#pragma mark 解析方法------------/**************************************************************************//** 解析命令词返回的结果 */+ (NSString *)stringFromAsr:(NSString *)params;{ NSMutableString * resultString = [[NSMutableString alloc] init]; NSString *inputString = nil; NSArray *array = [params componentsSeparatedByString:@"\n"]; for (int index = 0; index < array.count; index++) { NSRange range; NSString *line = [array objectAtIndex:index]; NSRange idRange = [line rangeOfString:@"id="]; NSRange nameRange = [line rangeOfString:@"name="]; NSRange confidenceRange = [line rangeOfString:@"confidence="]; NSRange grammarRange = [line rangeOfString:@" grammar="]; NSRange inputRange = [line rangeOfString:@"input="]; if (confidenceRange.length == 0 || grammarRange.length == 0 || inputRange.length == 0 ) { continue; } // check nomatch if (idRange.length != 0) { NSUInteger idPosX = idRange.location + idRange.length; NSUInteger idLength = nameRange.location - idPosX; range = NSMakeRange(idPosX, idLength); NSString *subString = [line substringWithRange:range]; NSCharacterSet *subSet = [NSCharacterSet whitespaceAndNewlineCharacterSet]; NSString *idValue = [subString stringByTrimmingCharactersInSet:subSet]; if ([idValue isEqualToString:@"nomatch"]) { return @""; } } // Get Confidence Value NSUInteger confidencePosX = confidenceRange.location + confidenceRange.length; NSUInteger confidenceLength = grammarRange.location - confidencePosX; range = NSMakeRange(confidencePosX,confidenceLength); NSString *score = [line substringWithRange:range]; NSUInteger inputStringPosX = inputRange.location + inputRange.length; NSUInteger inputStringLength = line.length - inputStringPosX; range = NSMakeRange(inputStringPosX , inputStringLength); inputString = [line substringWithRange:range]; [resultString appendFormat:@"%@ 置信度%@\n",inputString, score]; } return resultString;}/** 解析听写json格式的数据 params例如: {"sn":1,"ls":true,"bg":0,"ed":0,"ws":[{"bg":0,"cw":[{"w":"白日","sc":0}]},{"bg":0,"cw":[{"w":"依山","sc":0}]},{"bg":0,"cw":[{"w":"尽","sc":0}]},{"bg":0,"cw":[{"w":"黄河入海流","sc":0}]},{"bg":0,"cw":[{"w":"。","sc":0}]}]} */+ (NSString *)stringFromJson:(NSString *)params{ if (params == NULL) { return nil; } NSMutableString *tempStr = [[NSMutableString alloc] init]; // 返回的格式必须为utf8的,否则发生未知错误 NSData *dataJSON = [params dataUsingEncoding:NSUTF8StringEncoding]; NSDictionary *resultDic = [NSJSONSerialization JSONObjectWithData:dataJSON options:kNilOptions error:nil]; if (resultDic != nil) { NSArray *wordArray = [resultDic objectForKey:@"ws"]; for (int i = 0; i < [wordArray count]; i++) { NSDictionary *wsDic = [wordArray objectAtIndex:i]; NSArray *cwArray = [wsDic objectForKey:@"cw"]; for (int j = 0; j < [cwArray count]; j++) { NSDictionary *wDic = [cwArray objectAtIndex:j]; NSString *str = [wDic objectForKey:@"w"]; [tempStr appendString: str]; } } } return tempStr;}/** 解析语法识别返回的结果 */+ (NSString *)stringFromABNFJson:(NSString *)params{ if (params == NULL) { return nil; } NSMutableString *tempStr = [[NSMutableString alloc] init]; NSData *dataJSON = [params dataUsingEncoding:NSUTF8StringEncoding]; NSDictionary *resultDic = [NSJSONSerialization JSONObjectWithData:dataJSON options:kNilOptions error:nil]; NSArray *wordArray = [resultDic objectForKey:@"ws"]; for (int i = 0; i < [wordArray count]; i++) { NSDictionary *wsDic = [wordArray objectAtIndex:i]; NSArray *cwArray = [wsDic objectForKey:@"cw"]; for (int j = 0; j < [cwArray count]; j++) { NSDictionary *wDic = [cwArray objectAtIndex:j]; NSString *str = [wDic objectForKey:@"w"]; NSString *score = [wDic objectForKey:@"sc"]; [tempStr appendString: str]; [tempStr appendFormat:@" 置信度:%@",score]; [tempStr appendString: @"\n"]; } } return tempStr;}/**************************************************************************/@end
使用
初始化方法/// 启动初始化语音程序+ (void)VoiceInitialize{ // 设置sdk的log等级,log保存在下面设置的工作路径中 [IFlySetting setLogFile:LVL_ALL]; // 打开输出在console的log开关 [IFlySetting showLogcat:YES]; // 设置sdk的工作路径 NSArray *paths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES); NSString *cachePath = [paths objectAtIndex:0]; [IFlySetting setLogFilePath:cachePath]; // Appid是应用的身份信息,具有唯一性,初始化时必须要传入Appid。初始化是一个异步过程,可放在 App 启动时执行初始化,具体代码可以参 照 Demo 的 MSCAppDelegate.m。未初始化时使用服务,一般会返回错误码 10111. NSString *initString = [[NSString alloc] initWithFormat:@"appid=%@", VoiceAPPID]; [IFlySpeechUtility createUtility:initString];}初始化调用- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { // Override point for customization after application launch. [VoiceConversion VoiceInitialize]; return YES;}
#import "VoiceConversion.h"@interface ViewController ()@property (nonatomic, strong) VoiceConversion *voiceConversion;@property (nonatomic, strong) UILabel *messageLabel;@end@implementation ViewController- (void)viewDidLoad { [super viewDidLoad]; // Do any additional setup after loading the view, typically from a nib. UIBarButtonItem *startItem = [[UIBarButtonItem alloc] initWithTitle:@"start" style:UIBarButtonItemStyleDone target:self action:@selector(startItemClick:)]; UIBarButtonItem *stopItem = [[UIBarButtonItem alloc] initWithTitle:@"stop" style:UIBarButtonItemStyleDone target:self action:@selector(stopItemClick:)]; UIBarButtonItem *cancelItem = [[UIBarButtonItem alloc] initWithTitle:@"cancel" style:UIBarButtonItemStyleDone target:self action:@selector(cancelItemClick:)]; self.navigationItem.rightBarButtonItems = @[startItem, stopItem, cancelItem]; self.title = @"科大讯飞语音"; [self setUI];}- (void)didReceiveMemoryWarning { [super didReceiveMemoryWarning]; // Dispose of any resources that can be recreated.}#pragma mark - 视图- (void)setUI{ if ([self respondsToSelector:@selector(setEdgesForExtendedLayout:)]) { [self setEdgesForExtendedLayout:UIRectEdgeNone]; } self.messageLabel = [[UILabel alloc] initWithFrame:CGRectMake(10.0, 10.0, CGRectGetWidth(self.view.bounds) - 10.0 * 2, 40.0)]; [self.view addSubview:self.messageLabel]; self.messageLabel.backgroundColor = [UIColor colorWithWhite:0.5 alpha:0.3]; self.messageLabel.textAlignment = NSTextAlignmentCenter;}#pragma mark - 响应- (void)startItemClick:(UIBarButtonItem *)item{ ViewController __weak *weakSelf = self; [self.voiceConversion voiceStart:^(BOOL isStart) { NSLog(@"1 start"); if (isStart) { weakSelf.messageLabel.text = @"正在录音"; } else { weakSelf.messageLabel.text = @"启动识别服务失败,请稍后重试"; } } speechBegin:^{ NSLog(@"2 begin"); } speechEnd:^{ NSLog(@"3 end"); } speechError:^(BOOL isSuccess) { NSLog(@"4 error"); } speechResult:^(NSString *text) { NSLog(@"5 result"); weakSelf.messageLabel.text = text; } speechVolume:^(int volume) { NSLog(@"6 volume"); NSString *volumeString = [NSString stringWithFormat:@"音量:%d", volume]; weakSelf.messageLabel.text = volumeString; }];}- (void)stopItemClick:(UIBarButtonItem *)item{ [self.voiceConversion voiceStop]; self.messageLabel.text = @"停止录音";}- (void)cancelItemClick:(UIBarButtonItem *)item{ [self.voiceConversion voiceCancel]; self.messageLabel.text = @"取消识别";}#pragma mark - getter- (VoiceConversion *)voiceConversion{ if (!_voiceConversion) { _voiceConversion = [[VoiceConversion alloc] init]; } return _voiceConversion;}@end
- iOS语音书写功能(语音转文本)
- iOS语音书写功能(语音转文本)
- iOS语音书写功能(语音转文本)
- 语音转文本
- iOS中 语音识别功能/语音转文字教程详解
- iOS 语音识别转文字
- iOS中 语音识别功能/语音转文字教程详解 韩俊强的博客
- iOS10 新增语音转文字功能(Speech)
- iOS 使用科大讯飞技术实现语音转文字(语音听写)
- .android 语音(声音)转文字
- IBM watson API解析1-Speech To Text(语音转文本)
- Speech 框架构建语音转文本应用详解
- iOS 语音合成,语音播报功能(系统)
- iOS语音功能介绍
- iOS语音功能介绍
- iOS语音功能介绍
- iOS语音功能介绍
- 基于Unity3D的语音转文字功能的实现
- coreData(2)
- 用kaldi平台跑timit实例
- 杨毅:不够优秀就不要腆着脸继续占便宜
- 原生js实现下拉到底事件
- iOS去掉导航栏底部黑线
- iOS语音书写功能(语音转文本)
- 二进制
- 十进制改为十六进制
- 安卓 .9.png图片写聊天对话框的 详细教程
- 个人博客系统--项目实战
- ios tableView编辑模式下cell中创建多个功能
- Tapestry框架使用方法技巧总结
- URLSession慢慢整理
- Timesten_降低permsize 报6226错误