文章标题

来源：互联网发布：ip代理切换软件编辑：程序博客网时间：2024/05/18 00:28
.wav文件根据8K16bit语音分离后的字节的重组方法

废话不多说，直接上代码
@Override    public void saveSplitVoiceByBatch(String originFileId, String jobInfoJson,            String formatFileId) {        try {            if (StringUtils.isNotBlank(originFileId)                    && StringUtils.isNotBlank(jobInfoJson)) {                // 对引擎返回的字节数组进行序列化                List<VoiceData> voiceDataList = JSON.parseArray(jobInfoJson,                        VoiceData.class);                for (VoiceData voiceData : voiceDataList) {                    Integer voiceDataLength = 0;                    for (VoiceDataDTO voiceD : voiceData.getSegs()) {                        voiceDataLength += voiceD.getLength();                    }                    // 设置数组中所有字节长度                    voiceData.setLength(voiceDataLength);                }                // 通过对应的集合中的字节总长度进行排序                Collections.sort(voiceDataList, new Comparator<VoiceData>() {                    @Override                    public int compare(VoiceData data1, VoiceData data2) {                        if (data1.getLength() > data2.getLength()) {                            return -1;                        } else if (data1.getLength() < data2.getLength()) {                            return 1;                        } else {// 相等                            return 0;                        }                    }                });                //获取源文件的文件信息                AttachFile attachFile = attachFileDao.select(formatFileId);                //当分离的语音文件大于等于1份                if (voiceDataList.size() >= 1) {                    //获取总长度第一大的集合                    List<VoiceDataDTO> voiceA = voiceDataList.get(0).getSegs();                    //获取总长度第二大的集合                    List<VoiceDataDTO> voiceB = voiceDataList.get(1).getSegs();                    //得到源文件的字节数组长度                    byte[] waveFile = getVoiceFromFdfs(attachFile);                    // 通过引擎返回的分离长度信息和源文件字节数组对A语音文件进行组装                    InputStream inputStreamA = splitVoiceInfo(waveFile, voiceA);                    //将新合成的A语音文件进行上传                    String attachFileIdA = uploadSplitVoice(attachFile, "A",                            inputStreamA);                    String attachFileIdB = "";                    //当分离的语音文件大于1份,只取前2份引擎分离字节数组字节总长度最大的                    if (voiceDataList.size() > 1) {                        // 通过引擎返回的分离长度信息和源文件字节数组对A语音文件进行组装                        InputStream inputStreamB = splitVoiceInfo(waveFile, voiceB);                        //将新合成的A语音文件进行上传                        attachFileIdB = uploadSplitVoice(attachFile, "B", inputStreamB);                    }                    //通过源文件Id，将新合成的A、B语音文件主键更新在对应的任务表中                    regTaskDao.updateBatchSplit(originFileId, attachFileIdA,                            attachFileIdB);                    logger.info("形成新的A、B语音处理结束，A、B语音对应的文件主键分别为：A:"                            + attachFileIdA + "    B:" + attachFileIdB);                } else {                    regTaskDao.updateBatchSplit(originFileId, formatFileId, "");                }            } else {                logger.info("引擎未返回有效数据");            }        } catch (Exception e) {            e.printStackTrace();        }    }    /**     * 对分离后的语音文件进行上传并返回对应的文件主键     *      * @param attachFile     *            原语音文件     * @param inputStream     *            原语音文件输入流     * @return 上传文件主键     */    public String uploadSplitVoice(AttachFile attachFile, String fileName, InputStream inputStream) {        AttachFile attach = new AttachFile();        String attachFileId = null;        try {            attach.setFileSize((long) (inputStream.available()));            attach.setOriginalName(attachFile.getOriginalName().substring(0,                    attachFile.getOriginalName().lastIndexOf("."))                    + "."                    + fileName                    + "说话人语音"                    + attachFile.getOriginalName().substring( attachFile.getOriginalName().lastIndexOf(".")));            attachFileId = attachFileService.uploadAndConfirm(attach, inputStream);            inputStream.close();        } catch (IOException e) {            e.printStackTrace();        }        return attachFileId;    }    /**     * 通过语音文件获取语音文件的字节数组     *      * @param attachFile     *            原语音文件     * @return byte[] 语音文件对应的字符数组     */    public byte[] getVoiceFromFdfs(AttachFile attachFile) {        FdfsFile fdfsFile = (FdfsFile) attachFile;        byte[] waveFile = null;        ByteArrayOutputStream out = new ByteArrayOutputStream();        try {            fdfsClient.downloadFile(out, fdfsFile.getStoreLocation());            waveFile = out.toByteArray();        } catch (Exception e) {        }        return waveFile;    }    /**     *      *      * @param waveFile     *            原始语音字节数组     * @param voiceData     *            引擎处理后的语音片段字节长度     * @return InputStream 输入流     */    public InputStream splitVoiceInfo(byte[] waveFile,            List<VoiceDataDTO> voiceData) {        byte[] bytes = new byte[0];        byte[] data = new byte[0];        byte[] dataHead = new byte[44];        int dataLength = 0;        int byteLength = 0;        // 语音文件格式头部        dataHead = Arrays.copyOfRange(waveFile, 0, 44);        // 8k16bit的语音        int lenPerMs = 8000 * 16 / 8 / 1000;        // B说话人语音        for (VoiceDataDTO voice : voiceData) {            if (voice.getEnd() * lenPerMs <= waveFile.length) {                bytes = new byte[(voice.getEnd() - voice.getBegin()) * lenPerMs];                bytes = Arrays.copyOfRange(waveFile, voice.getBegin()                        * lenPerMs, voice.getEnd() * lenPerMs);                dataLength = data.length;                byteLength = bytes.length;                byte[] tempData = Arrays.copyOf(data, dataLength + byteLength);// 扩容                System.arraycopy(bytes, 0, tempData, dataLength, byteLength);// 将第二个数组与第一个数组合并                data = tempData;            }        }        logger.info("准备        进入语音头部信息转换：");        dataHead = changeHead(dataHead, data);        InputStream inputStream = new ByteArrayInputStream(dataHead);        return inputStream;    }    /**     * 对语音文件头信息进行组合     */    public byte[] changeHead(byte[] headBytes, byte[] data) {        logger.debug("进入语音头部信息转换：");        // 合并语音头和语音数据        int validLen = headBytes.length + data.length;        byte[] validBytes = new byte[validLen];        System.arraycopy(headBytes, 0, validBytes, 0, headBytes.length);        System.arraycopy(data, 0, validBytes, headBytes.length, data.length);        byte[] orgPreSize = new byte[4];        orgPreSize = Arrays.copyOfRange(validBytes, 4, 8);        byte[] orgNexSize = new byte[4];        orgNexSize = Arrays.copyOfRange(validBytes, 40, 44);        byte[] orgDataType = new byte[4];        orgDataType = Arrays.copyOfRange(validBytes, 36, 40);        logger.info("原始头部信息中： " + CommonUtils.lBytesToInt(orgPreSize) + " ,"                + CommonUtils.lBytesToInt(orgNexSize) + " ,"                + new String(orgDataType));        // 修改头部信息        int dataPreSize = validLen - 8;        int dataNextSize = validLen - 44;        String dataType = "data";        byte[] dataPreSizeBytes = CommonUtils.toLH(dataPreSize);        byte[] dataNextSizeBytes = CommonUtils.toLH(dataNextSize);        byte[] dataTypeBytes = dataType.getBytes();        System.arraycopy(dataPreSizeBytes, 0, validBytes, 4,                dataPreSizeBytes.length);        System.arraycopy(dataNextSizeBytes, 0, validBytes, 40,                dataNextSizeBytes.length);        System.arraycopy(dataTypeBytes, 0, validBytes, 36, dataTypeBytes.length);        byte[] nowPreSize = new byte[4];        nowPreSize = Arrays.copyOfRange(validBytes, 4, 8);        byte[] nowNexSize = new byte[4];        nowNexSize = Arrays.copyOfRange(validBytes, 40, 44);        byte[] nowDataType = new byte[4];        nowDataType = Arrays.copyOfRange(validBytes, 36, 40);        logger.info("新头部信息中： " + CommonUtils.lBytesToInt(nowPreSize) + " ,"                + CommonUtils.lBytesToInt(nowNexSize) + " ,"                + new String(nowDataType));        return validBytes;    }/** * 接收引擎处理后的数组中的语音片段 */public class VoiceData {    /**     * 开始字节长度     */    private List<VoiceDataDTO> segs;    /**     * 结束字节长度     */    private Integer spk;    /**     * 数据字节总长度     */    private Integer length;    /**     * @return the segs     */    public List<VoiceDataDTO> getSegs() {        return segs;    }    /**     * @param segs the segs to set     */    public void setSegs(List<VoiceDataDTO> segs) {        this.segs = segs;    }    /**     * @return the spk     */    public Integer getSpk() {        return spk;    }    /**     * @param spk the spk to set     */    public void setSpk(Integer spk) {        this.spk = spk;    }    /**     * @return the length     */    public Integer getLength() {        return length;    }    /**     * @param length the length to set     */    public void setLength(Integer length) {        this.length = length;    }}/** * 接收引擎处理后的语音片段 */public class VoiceDataDTO {    /**     * 开始字节长度     */    private Integer begin;    /**     * 结束字节长度     */    private Integer end;    /**     * 类型     */    private String type;    /**     * 字节长度     */    private Integer length;    /**     * @return the begin     */    public Integer getBegin() {        return begin;    }    /**     * @param begin the begin to set     */    public void setBegin(Integer begin) {        this.begin = begin;    }    /**     * @return the end     */    public Integer getEnd() {        return end;    }    /**     * @param end the end to set     */    public void setEnd(Integer end) {        this.end = end;    }    /**     * @return the type     */    public String getType() {        return type;    }    /**     * @param type the type to set     */    public void setType(String type) {        this.type = type;    }    /**     * @return the length     */    public Integer getLength() {        return this.end - this.begin;    }    /**     * @param length the length to set     */    public void setLength(Integer length) {        this.length = length;    }}
wav头文件格式：

8KHz采样、16比特量化的线性PCM语音信号的WAVE文件头格式表（共44字节）
偏移地址字节数数据类型内容文件头定义为
00H 4 char “RIFF” char riff_id[4]=”RIFF”
04H 4 long int 文件总长-8 long int size0=文总长-8
08H 8 char “WAVEfmt ” char wave_fmt[8]
10H 4 long int 10 00 00 00H(PCM) long int size1=0x10
14H 2 int 01 00H int fmttag=0x01
16H 2 int int channel=1 或2
18H 4 long int 采样率 long int samplespersec
1CH 4 long int 每秒播放字节数 long int bytepersec
20H 2 int 采样一次占字节数 int blockalign=声道数*量化数/8
22H 2 int 量化数 int bitpersamples=8或16
24H 4 char “data” char data_id=”data”
28H 4 long int 采样数据字节数 long int size2=文长-44
2CH 到文尾 char 采样数据　
阅读全文
0 0