• 微软(TTS)文本转语音服务API实现


    此博客实现与java实现微软文本转语音(TTS)经验总结_java tts_${简简单单}的博客-CSDN博客之上,首先感谢博客源码的提供,本人在上面添加了一些详细的注释,方便大家跟好的理解和使用,毕竟我已经用原文调试了一下午才调通,一些细节的问题给大家标注出来,免得浪费大家的时间,下面直接开始代码吧!

    首先大家需要去微软官网获取到密钥,方便调用时可以使用,大家注意看下图,我们一定要注意给我们分配到的区域,我这里是分配到eastus ,就是east us(美国东部)的意思,大家一定需要注意一下,后面会使用到的,然后终结点里面的地址就是我们获取token的地址

    下面我们准备几个类,方便后面使用,大家把代码都复制到自己项目中,不要有遗漏:

    1. package com.daoversal.util;
    2. public class ByteArray {
    3. private byte[] data;
    4. private int length;
    5. public ByteArray(){
    6. length = 0;
    7. data = new byte[length];
    8. }
    9. public ByteArray(byte[] ba){
    10. data = ba;
    11. length = ba.length;
    12. }
    13. /**
    14. 合并数组
    15. */
    16. public void cat(byte[] second, int offset, int length){
    17. if(this.length + length > data.length) {
    18. int allocatedLength = Math.max(data.length, length);
    19. byte[] allocated = new byte[allocatedLength << 1];
    20. System.arraycopy(data, 0, allocated, 0, this.length);
    21. System.arraycopy(second, offset, allocated, this.length, length);
    22. data = allocated;
    23. }else {
    24. System.arraycopy(second, offset, data, this.length, length);
    25. }
    26. this.length += length;
    27. }
    28. public void cat(byte[] second){
    29. cat(second, 0, second.length);
    30. }
    31. public byte[] getArray(){
    32. if(length == data.length){
    33. return data;
    34. }
    35. byte[] ba = new byte[length];
    36. System.arraycopy(data, 0, ba, 0, this.length);
    37. data = ba;
    38. return ba;
    39. }
    40. public int getLength(){
    41. return length;
    42. }
    43. }
    1. package com.daoversal.util;
    2. import javax.net.ssl.HttpsURLConnection;
    3. import java.net.URL;
    4. public class HttpsConnection {
    5. public static HttpsURLConnection getHttpsConnection(String connectingUrl) throws Exception {
    6. URL url = new URL(connectingUrl);
    7. return (HttpsURLConnection) url.openConnection();
    8. }
    9. }
    1. package com.daoversal.util;
    2. import lombok.extern.slf4j.Slf4j;
    3. import org.w3c.dom.Document;
    4. import org.w3c.dom.Element;
    5. import javax.xml.parsers.DocumentBuilder;
    6. import javax.xml.parsers.DocumentBuilderFactory;
    7. import javax.xml.parsers.ParserConfigurationException;
    8. import javax.xml.transform.OutputKeys;
    9. import javax.xml.transform.Transformer;
    10. import javax.xml.transform.TransformerException;
    11. import javax.xml.transform.TransformerFactory;
    12. import javax.xml.transform.dom.DOMSource;
    13. import javax.xml.transform.stream.StreamResult;
    14. import java.io.StringWriter;
    15. @Slf4j
    16. public class XmlDom {
    17. public static String createDom(String locale, String genderName, String voiceName, String textToSynthesize){
    18. Document doc = null;
    19. Element speak, voice;
    20. try {
    21. DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    22. DocumentBuilder builder = dbf.newDocumentBuilder();
    23. doc = builder.newDocument();
    24. if (doc != null){
    25. speak = doc.createElement("speak");
    26. speak.setAttribute("version", "1.0");
    27. speak.setAttribute("xml:lang", "en-US");
    28. voice = doc.createElement("voice");
    29. voice.setAttribute("xml:lang", locale);
    30. voice.setAttribute("xml:gender", genderName);
    31. voice.setAttribute("name", voiceName);
    32. voice.appendChild(doc.createTextNode(textToSynthesize));
    33. speak.appendChild(voice);
    34. doc.appendChild(speak);
    35. }
    36. } catch (ParserConfigurationException e) {
    37. log.error("Create ssml document failed: {}",e.getMessage());
    38. return null;
    39. }
    40. return transformDom(doc);
    41. }
    42. private static String transformDom(Document doc){
    43. StringWriter writer = new StringWriter();
    44. try {
    45. TransformerFactory tf = TransformerFactory.newInstance();
    46. Transformer transformer;
    47. transformer = tf.newTransformer();
    48. transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
    49. transformer.transform(new DOMSource(doc), new StreamResult(writer));
    50. } catch (TransformerException e) {
    51. log.error("Transform ssml document failed: {}",e.getMessage());
    52. return null;
    53. }
    54. return writer.getBuffer().toString().replaceAll("\n|\r", "");
    55. }
    56. }

     下面这个类我给大家重点讲一下,大家去下面网址看看自己的参数Text to speech API reference (REST) - Speech service - Azure AI services | Microsoft Learn

    AUDIO_24KHZ_48KBITRATE_MONO_MP3 :语言类型,这个不重要,那个声音好听用那个,去下图找:

    ACCESS_TOKEN_URI :就是本文章的第一张图里面,里面获取token的地址,直接将地址复制进来就好了。

    API_KEY :自己的api key,就是密钥。

     TTS_SERVICE_URI : 这个地址一定要对应分配的区域才行,不然会报权限错误

    Synthesis tts speech failed Server returned HTTP response code: 401 for URL: https://.........

    我这里是 east us(美国东部),所以就使用美国东部里面的地址即可。

    1. package com.daoversal.util;
    2. public class TtsConst {
    3. /**
    4. * 音频合成类型(亲测这种效果最佳,其他的你自己去试试)
    5. * 里面有很多类型,可以去里面找自己需要的
    6. * https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-text-to-speech?tabs=streaming
    7. */
    8. public static final String AUDIO_24KHZ_48KBITRATE_MONO_MP3 = "audio-24khz-48kbitrate-mono-mp3";
    9. /**
    10. * 授权url 获取密钥页面 终结点 里面的地址,我们使用这个获取token
    11. */
    12. public static final String ACCESS_TOKEN_URI = "token获取地址";
    13. /**
    14. * api key
    15. */
    16. public static final String API_KEY = "自己的密钥";
    17. /**
    18. * tts服务url,这里一定要根据自己分配的地区找相应的地址才行
    19. */
    20. public static final String TTS_SERVICE_URI = "https://eastus.tts.speech.microsoft.com/cognitiveservices/v1/";
    21. }

    下面参数给大家讲一下:

    textToSynthesize : 传入的合成语音文本内容

    locale:语言类型,大家可以参考,中文在嵌入式语音里面,大家可以在两个页面找到自己需要的语言。

    Embedded Speech - Speech service - Azure AI services | Microsoft Learn

    Language support - Speech service - Azure AI services | Microsoft Learn

    gender:为发声人性别,Male表示男性

     voiceName :发声者名称,大家可以去下图找出对应的,比如中文的话:

    1. package com.daoversal.util;
    2. import lombok.extern.slf4j.Slf4j;
    3. import org.apache.commons.lang3.StringUtils;
    4. import org.springframework.stereotype.Component;
    5. import javax.annotation.Resource;
    6. import javax.net.ssl.HttpsURLConnection;
    7. import java.io.DataOutputStream;
    8. import java.io.InputStream;
    9. @Slf4j
    10. @Component
    11. public class TtsService {
    12. @Resource
    13. private Authentication authentication;
    14. /**
    15. * 合成音频
    16. * @param textToSynthesize 传入需要翻译的文本
    17. * @param locale 要合成的语言类型
    18. * @param gender 性别
    19. * @param voiceName 发音者名称
    20. * @return
    21. */
    22. public byte[] genAudioBytes(String textToSynthesize, String locale, String gender, String voiceName) {
    23. String accessToken = authentication.genAccessToken();
    24. if (StringUtils.isEmpty(accessToken)) {
    25. return new byte[0];
    26. }
    27. try {
    28. HttpsURLConnection webRequest = HttpsConnection.getHttpsConnection(TtsConst.TTS_SERVICE_URI);
    29. webRequest.setRequestProperty("Host", "eastus.tts.speech.microsoft.com");
    30. webRequest.setRequestProperty("Content-Type", "application/ssml+xml");
    31. webRequest.setRequestProperty("X-Microsoft-OutputFormat", TtsConst.AUDIO_24KHZ_48KBITRATE_MONO_MP3);
    32. webRequest.setRequestProperty("Authorization", "Bearer " + accessToken);
    33. webRequest.setRequestProperty("Ocp-Apim-Subscription-Key", TtsConst.API_KEY);
    34. webRequest.setRequestProperty("User-Agent", "Mozilla/5.0");
    35. webRequest.setRequestProperty("Accept", "*/*");
    36. webRequest.setDoInput(true);
    37. webRequest.setDoOutput(true);
    38. webRequest.setConnectTimeout(5000);
    39. webRequest.setReadTimeout(300000);
    40. webRequest.setRequestMethod("POST");
    41. String body = XmlDom.createDom(locale, gender, voiceName, textToSynthesize);
    42. if (StringUtils.isEmpty(body)) {
    43. return new byte[0];
    44. }
    45. byte[] bytes = body.getBytes();
    46. webRequest.setRequestProperty("content-length", String.valueOf(bytes.length));
    47. webRequest.connect();
    48. DataOutputStream dop = new DataOutputStream(webRequest.getOutputStream());
    49. dop.write(bytes);
    50. dop.flush();
    51. dop.close();
    52. InputStream inSt = webRequest.getInputStream();
    53. ByteArray ba = new ByteArray();
    54. int rn2 = 0;
    55. int bufferLength = 4096;
    56. byte[] buf2 = new byte[bufferLength];
    57. while ((rn2 = inSt.read(buf2, 0, bufferLength)) > 0) {
    58. ba.cat(buf2, 0, rn2);
    59. }
    60. inSt.close();
    61. webRequest.disconnect();
    62. return ba.getArray();
    63. } catch (Exception e) {
    64. log.error("Synthesis tts speech failed {}", e.getMessage());
    65. }
    66. return null;
    67. }
    68. }
    1. package com.daoversal.util;
    2. import lombok.extern.slf4j.Slf4j;
    3. import org.apache.commons.lang3.StringUtils;
    4. import org.redisson.api.RedissonClient;
    5. import org.springframework.beans.factory.annotation.Autowired;
    6. import org.springframework.stereotype.Component;
    7. import javax.net.ssl.HttpsURLConnection;
    8. import java.io.BufferedReader;
    9. import java.io.DataOutputStream;
    10. import java.io.InputStream;
    11. import java.io.InputStreamReader;
    12. import java.util.concurrent.TimeUnit;
    13. /**
    14. * 此类获取token,每次调用都需要使用到token的
    15. * token的有效期是10分钟,但是不建议大家10分钟调一次,免得使用了失效的token
    16. */
    17. @Component
    18. @Slf4j
    19. public class Authentication {
    20. @Autowired
    21. private RedissonClient redisson;
    22. public String genAccessToken() {
    23. InputStream inSt;
    24. HttpsURLConnection webRequest;
    25. try {
    26. //先从redis里面取缓存的token,如果没有就远程拉取,有的话就直接使用,大家可根据自己的业务调整
    27. Object ob = redisson.getBucket("accessToken").get();
    28. String accessToken = ob == null ? null : ob.toString();
    29. if (StringUtils.isEmpty(accessToken)) {
    30. webRequest = HttpsConnection.getHttpsConnection(TtsConst.ACCESS_TOKEN_URI);
    31. webRequest.setDoInput(true);
    32. webRequest.setDoOutput(true);
    33. webRequest.setConnectTimeout(5000);
    34. webRequest.setReadTimeout(5000);
    35. webRequest.setRequestMethod("POST");
    36. byte[] bytes = new byte[0];
    37. webRequest.setRequestProperty("content-length", String.valueOf(bytes.length));
    38. //api的key,取微软官网获取
    39. webRequest.setRequestProperty("Ocp-Apim-Subscription-Key", TtsConst.API_KEY);
    40. webRequest.connect();
    41. DataOutputStream dop = new DataOutputStream(webRequest.getOutputStream());
    42. dop.write(bytes);
    43. dop.flush();
    44. dop.close();
    45. inSt = webRequest.getInputStream();
    46. InputStreamReader in = new InputStreamReader(inSt);
    47. BufferedReader bufferedReader = new BufferedReader(in);
    48. StringBuilder strBuffer = new StringBuilder();
    49. String line = null;
    50. while ((line = bufferedReader.readLine()) != null) {
    51. strBuffer.append(line);
    52. }
    53. bufferedReader.close();
    54. in.close();
    55. inSt.close();
    56. webRequest.disconnect();
    57. accessToken = strBuffer.toString();
    58. //获取到了token,缓存到redis里面,5分钟失效
    59. redisson.getBucket("accessToken").set(accessToken,5L, TimeUnit.MINUTES);
    60. //设置accessToken的过期时间为5分钟
    61. log.info("New tts access token {}", accessToken);
    62. }
    63. return accessToken;
    64. } catch (Exception e) {
    65. log.error("Generate tts access token failed {}", e.getMessage());
    66. }
    67. return null;
    68. }
    69. }

    最后就是调用了,大家可以测试了:

    1. package com.daoversal.web;
    2. import com.daoversal.framework.http.Response;
    3. import com.daoversal.task.DvWeekCountTask;
    4. import com.daoversal.task.RechargeTask;
    5. import com.daoversal.task.UserGradeCountTask;
    6. import com.daoversal.task.WindControlMsgTask;
    7. import com.daoversal.util.TtsService;
    8. import io.swagger.annotations.Api;
    9. import io.swagger.annotations.ApiOperation;
    10. import okhttp3.*;
    11. import org.springframework.boot.configurationprocessor.json.JSONException;
    12. import org.springframework.boot.configurationprocessor.json.JSONObject;
    13. import org.springframework.web.bind.annotation.GetMapping;
    14. import org.springframework.web.bind.annotation.PostMapping;
    15. import org.springframework.web.bind.annotation.RequestMapping;
    16. import org.springframework.web.bind.annotation.RestController;
    17. import javax.annotation.Resource;
    18. import javax.servlet.http.HttpServletResponse;
    19. import java.io.*;
    20. /**
    21. *

    22. * 套餐价值释放记录表 前端控制器
    23. *

    24. *
    25. * @author HayDen
    26. * @since 03 22 10:44:13
    27. */
    28. @RestController
    29. @RequestMapping("/test")
    30. @Api(value = "test")
    31. public class TestController {
    32. @Resource
    33. private TtsService testService;
    34. @PostMapping("/ttsService")
    35. @ApiOperation(value = "获取ttsService", httpMethod = "POST" )
    36. public void ttsService(String text) {
    37. // byte[] bte = testService.genAudioBytes(res,"en-US","Male","en-US-JennyNeural");
    38. byte[] bte = testService.genAudioBytes(text,"zh-CN","Male","zh-CN-YunxiNeural");
    39. String value = "hllo.mp3";
    40. convertByteArrayToFile(bte,value);
    41. System.out.println("213213123");
    42. }
    43. /**
    44. * 此文件是将byte[] 转换成文件存储到指定路径的
    45. * @param arr
    46. * @param value
    47. */
    48. public static void convertByteArrayToFile(byte[] arr,String value) {
    49. try (
    50. BufferedInputStream bis = new BufferedInputStream(new ByteArrayInputStream(arr));
    51. //这里是转换以后的文件存储的路径
    52. FileOutputStream fileOutputStream = new FileOutputStream("/Users/recovery/Downloads/"+value);
    53. BufferedOutputStream bos = new BufferedOutputStream(fileOutputStream)
    54. ) {
    55. int data;
    56. while ((data = bis.read()) != -1) {
    57. bos.write(data);
    58. }
    59. bos.flush();
    60. } catch (IOException e) {
    61. e.printStackTrace();
    62. }
    63. }
    64. }

    最后大家需要注意一下就是如果你选的是英文en-US,但是输入的文本是中文的话他是不会翻译的,所以大家一定要注意自己的语言类型不要弄错了,如果有疑问可以留言哦,我看到肯定会毫无保留的给大家说明的。

  • 相关阅读:
    NSSCTF做题第9页(2)
    SpringBoot实现定时任务
    面试经典150题——Day22
    Flutter学习笔记 --单一子元素组件
    MySQL高级SQL语句
    【新闻稿】Solv 与 zCloak 联合开发跨境贸易场景下可编程数字凭证项目,获得新加坡、加纳两国央行支持...
    LAS Spark 在 TPC-DS 的优化揭秘
    Matter Test-Harness自动化测试系统
    锁的基础说明
    (附源码)springboot太原学院贫困生申请管理系统 毕业设计 101517
  • 原文地址:https://blog.csdn.net/qq_38935605/article/details/133136466