iOS语音书写功能(语音转文本)

来源:互联网 发布:越南看电视的软件 编辑:程序博客网 时间:2024/05/04 10:41

Demo下载地址

最近在项目开发中,需要将语音识别转换成文本的功能。研究了下科大讯飞,附上Demo分享给大家。

研发前先得做一些准备。

1、注册科大讯飞开发者帐号(http://www.xfyun.cn)

2、下载开发平台(iOS、或android,或其他)所需要的SDK(SDK包含:说明文档、SDK即iflyMSC.framework、Demo)

3、项目中添加SDK(添加时,先将SDK复制粘贴到项目文件,再通过addframe的方法添加到项目引用),及相关联的framework

添加方法:TARGETS-Build Phases-Link Binary With Libraries-"+"-Choose frameworks and libraries to add-add other,或选择对应的framework-add

4、使用时要添加对应的头文件


特别说明:

1、使用SDK关联的APPID存在于下载的Demo中,如果SDK有替换的话APPID应该跟着一起替换。

2、添加其他framework  

 libz.tbd

 libc++.tbd

 CoreGraphics.framework

 QuartzCore.framework 

 AddressBook.framework 

 CoreLocation.framework 

 UIKit.framework 

 AudioToolbox.framework 

 Foundation.framework 

 SystemConfiguration.framework

 AVFoundation.framework

 CoreTelephoney.framework

3Bitcode属性设置为NO(TARGETS-Build Settings-Build Options-Enable Bitcode-NO)

4在使用前,务必在AppDelegate的方法中"

- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {}"进行初始化操作。

5、需要有网络的情况下才能使用。


如图

下载的科大讯飞SDK文件


Demo中的APPID


添加SDK,及添加关联framework



设置Bitcode属性为 NO




语音转文件实现代码

.h文件#import <Foundation/Foundation.h>// 导入头文件#import "iflyMSC.framework/Headers/IFlyMSC.h"#import "iflyMSC.framework/Headers/IFlySpeechUtility.h"#import "iflyMSC/IFlySpeechConstant.h"#pragma mark - 初始化参数类/**************************************************************************/@interface IATConfig : NSObject+ (IATConfig *)sharedInstance;+ (NSString *)mandarin;+ (NSString *)cantonese;+ (NSString *)henanese;+ (NSString *)chinese;+ (NSString *)english;+ (NSString *)lowSampleRate;+ (NSString *)highSampleRate;+ (NSString *)isDot;+ (NSString *)noDot;/** 以下参数,需要通过 iFlySpeechRecgonizer 进行设置 */@property (nonatomic, strong) NSString *speechTimeout;@property (nonatomic, strong) NSString *vadEos;@property (nonatomic, strong) NSString *vadBos;@property (nonatomic, strong) NSString *language;@property (nonatomic, strong) NSString *accent;@property (nonatomic, strong) NSString *dot;@property (nonatomic, strong) NSString *sampleRate;/** 以下参数无需设置 不必关 */@property (nonatomic, assign) BOOL haveView;@property (nonatomic, strong) NSArray *accentIdentifer;@property (nonatomic, strong) NSArray *accentNickName;@end/**************************************************************************/#pragma mark - 语音听写类@interface VoiceConversion : NSObject/// 启动初始化语音程序+ (void)VoiceInitialize;/// 开始录音- (void)voiceStart:(void (^)(BOOL isStart))startListening        speechBegin:(void (^)(void))begin          speechEnd:(void (^)(void))end        speechError:(void (^)(BOOL isSuccess))error       speechResult:(void (^)(NSString *text))result       speechVolume:(void (^)(int volume))volume;/// 取消录音- (void)voiceCancel;/// 停止录音- (void)voiceStop;@end

.m文件#import "VoiceConversion.h"#pragma mark - 初始化参数类/**************************************************************************/static NSString *const PUTONGHUA = @"mandarin";static NSString *const YUEYU     = @"cantonese";static NSString *const HENANHUA  = @"henanese";static NSString *const ENGLISH   = @"en_us";static NSString *const CHINESE   = @"zh_cn";@implementation IATConfig- (id)init{    self  = [super init];    if (self)    {        [self defaultSetting];        return  self;    }    return nil;}+ (IATConfig *)sharedInstance{    static IATConfig  * instance = nil;    static dispatch_once_t predict;    dispatch_once(&predict, ^{        instance = [[IATConfig alloc] init];    });    return instance;}- (void)defaultSetting{    _speechTimeout = @"30000";    _vadEos = @"3000";    _vadBos = @"3000";    _dot = @"1";    _sampleRate = @"16000";    _language = CHINESE;    _accent = PUTONGHUA;    _haveView = NO;//默认是不dai界面的    _accentNickName = [[NSArray alloc] initWithObjects:@"粤语", @"普通话", @"河南话", @"英文", nil];}+ (NSString *)mandarin{    return PUTONGHUA;}+ (NSString *)cantonese{    return YUEYU;}+ (NSString *)henanese{    return HENANHUA;}+ (NSString *)chinese{    return CHINESE;}+ (NSString *)english{    return ENGLISH;}+ (NSString *)lowSampleRate{    return @"8000";}+ (NSString *)highSampleRate{    return @"16000";}+ (NSString *)isDot{    return @"1";}+ (NSString *)noDot{    return @"0";}@end/**************************************************************************/#pragma mark - 语音听写类static NSString *const VoiceAPPID   = @"572016e4";static NSString *const VoiceTimeOut = @"20000";@interface VoiceConversion () <IFlySpeechRecognizerDelegate>@property (nonatomic, strong) NSMutableString *resultText;@property (nonatomic, strong) IFlySpeechRecognizer *iFlySpeechRecognizer;@property (nonatomic, copy) void (^beginSpeech)(void);@property (nonatomic, copy) void (^endSpeech)(void);@property (nonatomic, copy) void (^errorSpeech)(BOOL isSuccess);@property (nonatomic, copy) void (^resultSpeech)(NSString *text);@property (nonatomic, copy) void (^volumeSpeech)(int volume);@end@implementation VoiceConversion#pragma mark 初始化------------/// 启动初始化语音程序+ (void)VoiceInitialize{    // 设置sdk的log等级,log保存在下面设置的工作路径中    [IFlySetting setLogFile:LVL_ALL];        // 打开输出在console的log开关    [IFlySetting showLogcat:YES];        // 设置sdk的工作路径    NSArray *paths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES);    NSString *cachePath = [paths objectAtIndex:0];    [IFlySetting setLogFilePath:cachePath];        // Appid是应用的身份信息,具有唯一性,初始化时必须要传入Appid。初始化是一个异步过程,可放在 App 启动时执行初始化,具体代码可以参 照 Demo 的 MSCAppDelegate.m。未初始化时使用服务,一般会返回错误码 10111.    NSString *initString = [[NSString alloc] initWithFormat:@"appid=%@", VoiceAPPID];    [IFlySpeechUtility createUtility:initString];}#pragma mark 实例化------------- (void)dealloc{    [self voiceCancel];}- (NSMutableString *)resultText{    if (!_resultText)    {        _resultText = [[NSMutableString alloc] init];    }        return _resultText;}- (IFlySpeechRecognizer *)iFlySpeechRecognizer{    if (_iFlySpeechRecognizer == nil)    {        _iFlySpeechRecognizer = [IFlySpeechRecognizer sharedInstance];                [_iFlySpeechRecognizer setParameter:@"" forKey:[IFlySpeechConstant PARAMS]];        // 设置听写模式        [_iFlySpeechRecognizer setParameter:@"iat" forKey:[IFlySpeechConstant IFLY_DOMAIN]];    }        return _iFlySpeechRecognizer;}- (void)initializeVoice{    self.iFlySpeechRecognizer.delegate = self;    IATConfig *instance = [IATConfig sharedInstance];            // 设置最长录音时间    [self.iFlySpeechRecognizer setParameter:instance.speechTimeout forKey:[IFlySpeechConstant SPEECH_TIMEOUT]];    // 设置后端点    [self.iFlySpeechRecognizer setParameter:instance.vadEos forKey:[IFlySpeechConstant VAD_EOS]];    // 设置前端点    [self.iFlySpeechRecognizer setParameter:instance.vadBos forKey:[IFlySpeechConstant VAD_BOS]];    // 网络等待时间    [self.iFlySpeechRecognizer setParameter:@"20000" forKey:[IFlySpeechConstant NET_TIMEOUT]];        // 设置采样率,推荐使用16K    [self.iFlySpeechRecognizer setParameter:instance.sampleRate forKey:[IFlySpeechConstant SAMPLE_RATE]];        if ([instance.language isEqualToString:[IATConfig chinese]])    {        // 设置语言        [self.iFlySpeechRecognizer setParameter:instance.language forKey:[IFlySpeechConstant LANGUAGE]];        // 设置方言        [self.iFlySpeechRecognizer setParameter:instance.accent forKey:[IFlySpeechConstant ACCENT]];    }    else if ([instance.language isEqualToString:[IATConfig english]])    {        [self.iFlySpeechRecognizer setParameter:instance.language forKey:[IFlySpeechConstant LANGUAGE]];    }        // 设置是否返回标点符号    [self.iFlySpeechRecognizer setParameter:instance.dot forKey:[IFlySpeechConstant ASR_PTT]];}#pragma mark 语音听写方法------------/// 开始录音- (void)voiceStart:(void (^)(BOOL isStart))startListening        speechBegin:(void (^)(void))begin          speechEnd:(void (^)(void))end        speechError:(void (^)(BOOL isSuccess))error       speechResult:(void (^)(NSString *text))result       speechVolume:(void (^)(int volume))volume{    [self.resultText setString:@""];        // 回调设置    self.beginSpeech = [begin copy];    self.endSpeech = [end copy];    self.errorSpeech = [error copy];    self.resultSpeech = [result copy];    self.volumeSpeech = [volume copy];            // 初始化设置    [self initializeVoice];        [self.iFlySpeechRecognizer cancel];        // 设置音频来源为麦克风    [self.iFlySpeechRecognizer setParameter:IFLY_AUDIO_SOURCE_MIC forKey:@"audio_source"];        // 设置听写结果格式为json    [self.iFlySpeechRecognizer setParameter:@"json" forKey:[IFlySpeechConstant RESULT_TYPE]];        // 保存录音文件,保存在sdk工作路径中,如未设置工作路径,则默认保存在library/cache下    [self.iFlySpeechRecognizer setParameter:@"asr.pcm" forKey:[IFlySpeechConstant ASR_AUDIO_PATH]];        BOOL isStart = [self.iFlySpeechRecognizer startListening];    if (startListening)    {        // 如果开始录音失败,可能是上次请求未结束,暂不支持多路并发        startListening(isStart);    }}/// 取消听写- (void)voiceCancel{    [self.iFlySpeechRecognizer cancel];}/// 停止录音- (void)voiceStop{    [self.iFlySpeechRecognizer stopListening];}#pragma mark IFlySpeechRecognizerDelegate------------/** 识别结果返回代理 @param :results识别结果 @ param :isLast 表示是否最后一次结果 */- (void)onResults:(NSArray *)results isLast:(BOOL)isLast{    NSMutableString *resultString = [[NSMutableString alloc] init];    NSDictionary *dic = results[0];    for (NSString *key in dic)    {        [resultString appendFormat:@"%@",key];    }    NSString *resultFromJson = [[self class] stringFromJson:resultString];    NSString *resultTextTemp = [NSString stringWithFormat:@"%@%@", self.resultText, resultFromJson];    [self.resultText setString:resultTextTemp];    if (isLast)    {        if (self.resultSpeech)        {            // 去掉最后一个句号            NSRange range = [self.resultText rangeOfString:@"。" options:NSBackwardsSearch];            if (range.location != NSNotFound)            {                resultTextTemp = [self.resultText substringToIndex:range.location];                [self.resultText setString:resultTextTemp];            }            self.resultSpeech(self.resultText);        }    }        [self voiceCancel];}/** 识别会话结束返回代理 @ param error 错误码,error.errorCode=0表示正常结束,非0表示发生错误。  */- (void)onError:(IFlySpeechError *)error{    if (self.errorSpeech)    {        BOOL isSuccess = (0 == error.errorCode);        self.errorSpeech(isSuccess);    }}/** 停止录音回调 */- (void)onEndOfSpeech{    if (self.endSpeech)    {        self.endSpeech();    }}/** 开始识别回调 */- (void)onBeginOfSpeech{    if (self.beginSpeech)    {        self.beginSpeech();    }}/** 音量回调函数 volume 0-30 */- (void)onVolumeChanged:(int)volume{    if (self.volumeSpeech)    {        self.volumeSpeech(volume);    }}#pragma mark 解析方法------------/**************************************************************************//** 解析命令词返回的结果 */+ (NSString *)stringFromAsr:(NSString *)params;{    NSMutableString * resultString = [[NSMutableString alloc] init];    NSString *inputString = nil;        NSArray *array = [params componentsSeparatedByString:@"\n"];        for (int index = 0; index < array.count; index++)    {        NSRange range;        NSString *line = [array objectAtIndex:index];                NSRange idRange = [line rangeOfString:@"id="];        NSRange nameRange = [line rangeOfString:@"name="];        NSRange confidenceRange = [line rangeOfString:@"confidence="];        NSRange grammarRange = [line rangeOfString:@" grammar="];                NSRange inputRange = [line rangeOfString:@"input="];                if (confidenceRange.length == 0 || grammarRange.length == 0 || inputRange.length == 0 )        {            continue;        }                // check nomatch        if (idRange.length != 0)        {            NSUInteger idPosX = idRange.location + idRange.length;            NSUInteger idLength = nameRange.location - idPosX;            range = NSMakeRange(idPosX, idLength);                        NSString *subString = [line substringWithRange:range];            NSCharacterSet *subSet = [NSCharacterSet whitespaceAndNewlineCharacterSet];            NSString *idValue = [subString stringByTrimmingCharactersInSet:subSet];            if ([idValue isEqualToString:@"nomatch"])            {                return @"";            }        }                // Get Confidence Value        NSUInteger confidencePosX = confidenceRange.location + confidenceRange.length;        NSUInteger confidenceLength = grammarRange.location - confidencePosX;        range = NSMakeRange(confidencePosX,confidenceLength);                NSString *score = [line substringWithRange:range];                NSUInteger inputStringPosX = inputRange.location + inputRange.length;        NSUInteger inputStringLength = line.length - inputStringPosX;                range = NSMakeRange(inputStringPosX , inputStringLength);        inputString = [line substringWithRange:range];                [resultString appendFormat:@"%@ 置信度%@\n",inputString, score];    }        return resultString;}/** 解析听写json格式的数据 params例如: {"sn":1,"ls":true,"bg":0,"ed":0,"ws":[{"bg":0,"cw":[{"w":"白日","sc":0}]},{"bg":0,"cw":[{"w":"依山","sc":0}]},{"bg":0,"cw":[{"w":"尽","sc":0}]},{"bg":0,"cw":[{"w":"黄河入海流","sc":0}]},{"bg":0,"cw":[{"w":"。","sc":0}]}]} */+ (NSString *)stringFromJson:(NSString *)params{    if (params == NULL)    {        return nil;    }        NSMutableString *tempStr = [[NSMutableString alloc] init];    // 返回的格式必须为utf8的,否则发生未知错误    NSData *dataJSON = [params dataUsingEncoding:NSUTF8StringEncoding];    NSDictionary *resultDic  = [NSJSONSerialization JSONObjectWithData:dataJSON options:kNilOptions error:nil];        if (resultDic != nil)    {        NSArray *wordArray = [resultDic objectForKey:@"ws"];                for (int i = 0; i < [wordArray count]; i++)        {            NSDictionary *wsDic = [wordArray objectAtIndex:i];            NSArray *cwArray = [wsDic objectForKey:@"cw"];                        for (int j = 0; j < [cwArray count]; j++)            {                NSDictionary *wDic = [cwArray objectAtIndex:j];                NSString *str = [wDic objectForKey:@"w"];                [tempStr appendString: str];            }        }    }        return tempStr;}/** 解析语法识别返回的结果 */+ (NSString *)stringFromABNFJson:(NSString *)params{    if (params == NULL)    {        return nil;    }    NSMutableString *tempStr = [[NSMutableString alloc] init];    NSData *dataJSON = [params dataUsingEncoding:NSUTF8StringEncoding];    NSDictionary *resultDic  = [NSJSONSerialization JSONObjectWithData:dataJSON options:kNilOptions error:nil];        NSArray *wordArray = [resultDic objectForKey:@"ws"];    for (int i = 0; i < [wordArray count]; i++)    {        NSDictionary *wsDic = [wordArray objectAtIndex:i];        NSArray *cwArray = [wsDic objectForKey:@"cw"];                for (int j = 0; j < [cwArray count]; j++)        {            NSDictionary *wDic = [cwArray objectAtIndex:j];            NSString *str = [wDic objectForKey:@"w"];            NSString *score = [wDic objectForKey:@"sc"];            [tempStr appendString: str];            [tempStr appendFormat:@" 置信度:%@",score];            [tempStr appendString: @"\n"];        }    }        return tempStr;}/**************************************************************************/@end


使用

初始化方法/// 启动初始化语音程序+ (void)VoiceInitialize{    // 设置sdk的log等级,log保存在下面设置的工作路径中    [IFlySetting setLogFile:LVL_ALL];        // 打开输出在console的log开关    [IFlySetting showLogcat:YES];        // 设置sdk的工作路径    NSArray *paths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES);    NSString *cachePath = [paths objectAtIndex:0];    [IFlySetting setLogFilePath:cachePath];        // Appid是应用的身份信息,具有唯一性,初始化时必须要传入Appid。初始化是一个异步过程,可放在 App 启动时执行初始化,具体代码可以参 照 Demo 的 MSCAppDelegate.m。未初始化时使用服务,一般会返回错误码 10111.    NSString *initString = [[NSString alloc] initWithFormat:@"appid=%@", VoiceAPPID];    [IFlySpeechUtility createUtility:initString];}初始化调用- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {    // Override point for customization after application launch.        [VoiceConversion VoiceInitialize];        return YES;}


#import "VoiceConversion.h"@interface ViewController ()@property (nonatomic, strong) VoiceConversion *voiceConversion;@property (nonatomic, strong) UILabel *messageLabel;@end@implementation ViewController- (void)viewDidLoad {    [super viewDidLoad];    // Do any additional setup after loading the view, typically from a nib.        UIBarButtonItem *startItem = [[UIBarButtonItem alloc] initWithTitle:@"start" style:UIBarButtonItemStyleDone target:self action:@selector(startItemClick:)];    UIBarButtonItem *stopItem = [[UIBarButtonItem alloc] initWithTitle:@"stop" style:UIBarButtonItemStyleDone target:self action:@selector(stopItemClick:)];    UIBarButtonItem *cancelItem = [[UIBarButtonItem alloc] initWithTitle:@"cancel" style:UIBarButtonItemStyleDone target:self action:@selector(cancelItemClick:)];    self.navigationItem.rightBarButtonItems = @[startItem, stopItem, cancelItem];        self.title = @"科大讯飞语音";        [self setUI];}- (void)didReceiveMemoryWarning {    [super didReceiveMemoryWarning];    // Dispose of any resources that can be recreated.}#pragma mark - 视图- (void)setUI{    if ([self respondsToSelector:@selector(setEdgesForExtendedLayout:)])    {        [self setEdgesForExtendedLayout:UIRectEdgeNone];    }        self.messageLabel = [[UILabel alloc] initWithFrame:CGRectMake(10.0, 10.0, CGRectGetWidth(self.view.bounds) - 10.0 * 2, 40.0)];    [self.view addSubview:self.messageLabel];    self.messageLabel.backgroundColor = [UIColor colorWithWhite:0.5 alpha:0.3];    self.messageLabel.textAlignment = NSTextAlignmentCenter;}#pragma mark - 响应- (void)startItemClick:(UIBarButtonItem *)item{    ViewController __weak *weakSelf = self;    [self.voiceConversion voiceStart:^(BOOL isStart) {                NSLog(@"1 start");                if (isStart)        {            weakSelf.messageLabel.text = @"正在录音";        }        else        {            weakSelf.messageLabel.text = @"启动识别服务失败,请稍后重试";        }    } speechBegin:^{        NSLog(@"2 begin");    } speechEnd:^{        NSLog(@"3 end");    } speechError:^(BOOL isSuccess) {        NSLog(@"4 error");    } speechResult:^(NSString *text) {        NSLog(@"5 result");        weakSelf.messageLabel.text = text;    } speechVolume:^(int volume) {        NSLog(@"6 volume");        NSString *volumeString = [NSString stringWithFormat:@"音量:%d", volume];        weakSelf.messageLabel.text = volumeString;    }];}- (void)stopItemClick:(UIBarButtonItem *)item{    [self.voiceConversion voiceStop];        self.messageLabel.text = @"停止录音";}- (void)cancelItemClick:(UIBarButtonItem *)item{    [self.voiceConversion voiceCancel];        self.messageLabel.text = @"取消识别";}#pragma mark - getter- (VoiceConversion *)voiceConversion{    if (!_voiceConversion)    {        _voiceConversion = [[VoiceConversion alloc] init];    }        return _voiceConversion;}@end






1 1
原创粉丝点击