使用实时语音合成
前提条件
- 确保已按照配置CPP环境(Linux)配置完毕。
- 请参考SDK(websocket)获取最新版本SDK包。
初始化Client
初始化RttsClient,其参数包括AuthInfo
请求参数
请求类为RttsRequest,详见表 RttsRequest。
通过set方法可以设置具体参数,详见表 RttsRequest设置参数
方法名称 |
是否必选 |
参数类型 |
描述 |
---|---|---|---|
SetAudioFormat |
否 |
String |
设置语音格式,默认pcm。 |
SetAudioProperty |
否 |
String |
设置语音合成特征字符串,{language}_{speaker}_{domain},即“语种_人员标识_领域”。默认chinese_xiaoyan_common 。详见API文档。 |
SetSampleRate |
否 |
String |
设置采样率:8000、16000,默认8000。 |
SetPitch |
否 |
Integer |
设置音高,-500~500,默认0。 |
SetVolume |
否 |
Integer |
设置音量,0~100,默认50。 |
SetSpeed |
否 |
Integer |
设置语速,-500~500,默认0。 |
SetSubtitle |
否 |
String |
设置字幕,部分发音人支持字幕时间戳,详见API文档。 |
示例代码
如下示例仅供参考,最新代码请前往SDK(websocket)章节获取并运行。
#include "RttsClient.h" #include "RttsRequest.h" #include "gflags/gflags.h" // refer to https://support.huaweicloud.com/api-sis/sis_03_0115.html // auth info // 认证用的AK和SK硬编码在代码中或明文存储都有很大安全风险,建议在配置文件或环境变量中密文存放,使用时解密,确保安全。 DEFINE_string(ak, "", "access key"); DEFINE_string(sk, "", "secrect key"); // region, for example cn-east-3, cn-north-4 DEFINE_string(region, "cn-north-4", "project region, such as cn-north-4"); // projectId, refer to https://support.huaweicloud.com/api-sis/sis_03_0008.html DEFINE_string(projectId, "", "project id"); DEFINE_string(text, "华为致力于把数字世界带入每个人每个家庭每个组织,构建万物互联的智能世界。", "Text to be synthesized"); DEFINE_string(audioFormat, "pcm", "audio format, such pcm"); DEFINE_string(property, "chinese_xiaoyan_common", ""); DEFINE_string(audioPath, "test.pcm", "audio saved path"); DEFINE_string(sampleRate, "16000", "sample rate of audio"); DEFINE_string(subtitle, "", "subtitle info"); DEFINE_int32(volume, 50, ""); DEFINE_int32(speed, 0, ""); DEFINE_int32(pitch, 0, ""); DEFINE_int32(readTimeOut, 20000, "read time out, default 20s. Increase this value appropriately according to the length of the text"); DEFINE_int32(connectTimeOut, 20000, "connecting time out, default 20s"); DEFINE_bool(isSaved, true, "save the audio as a local file"); class CallBack : public RttsListener { public: void OnConnect() { LOG(INFO) << "rtts Connect success"; } void OnStart(std::string text) { LOG(INFO) << "rtts receive start response " << text; } void OnResp(std::string binaryData) { if (isSaved) { dataContents.push_back(binaryData); } LOG(INFO) << "rtts receive data " << binaryData.size(); } void onResponseSubtitle(std::string message) { LOG(INFO) << message; } void OnEnd(std::string text) { LOG(INFO) << "rtts receive end response " << text; if (isSaved) { std::ofstream fout(filePath, std::ios::binary); if (!fout.is_open()) { LOG(INFO) << "filePath " << filePath << " is invalid"; return; } for (int i = 0; i < dataContents.size(); i++) { fout.write(dataContents[i].data(), dataContents[i].size()); } fout.close(); LOG(INFO) << "success to save file in " << filePath; } } void OnClose() { LOG(INFO) << "rtts receive Close"; } void OnError(std::string text) { LOG(INFO) << "rtts receive error" << text; } void SetFilePath(std::string fPath) { filePath = fPath; } void SetSaved(bool saved) { isSaved = saved; } private: std::string filePath; bool isSaved = false; std::vector<std::string> dataContents; }; void RttsTest() { // 1. config parameter // 1.1 init authInfo speech::huawei_asr::AuthInfo authInfo(FLAGS_ak, FLAGS_sk, FLAGS_region, FLAGS_projectId, FLAGS_endpoint); // 1.2 config Connect parameter speech::huawei_asr::HttpConfig httpConfig; httpConfig.SetReadTimeout(FLAGS_readTimeOut); httpConfig.SetConnectTimeout(FLAGS_connectTimeOut); // 1.3 config callback, callback function are optional, if not set, it will use function in RttsListner speech::huawei_asr::WebsocketService::ptr websocketServicePtr = websocketpp::lib::make_shared<speech::huawei_asr::WebsocketService>(); CallBack callback; callback.SetSaved(FLAGS_isSaved); callback.SetFilePath(FLAGS_audioPath); websocketServicePtr->SetTtsCallBack(&callback); // 1.4 config request parameter speech::huawei_tts::RttsRequest request(FLAGS_text); request.SetAudioFormat(FLAGS_audioFormat); request.SetVolume(FLAGS_volume); request.SetSpeed(FLAGS_speed); request.SetPitch(FLAGS_pitch); request.SetSampleRate(FLAGS_sampleRate); request.SetAudioProperty(FLAGS_property); request.SetSubtitle(FLAGS_subtitle); // 2. init client speech::huawei_tts::RttsClient* rttsClient = new speech::huawei_tts::RttsClient(authInfo, websocketServicePtr, httpConfig); // 3. send request rttsClient->Synthesis(request); // wait for save file, if setSaved false in rttsListener or don't use rttsListener, it can be removed. std::this_thread::sleep_for(std::chrono::milliseconds(2000)); delete rttsClient; } int main(int argc, char *argv[]) { FLAGS_alsologtostderr = true; FLAGS_log_dir = "./logs"; gflags::ParseCommandLineFlags(&argc, &argv, true); google::InitGoogleLogging(argv[0]); RttsTest(); return 0; }
编译脚本
以下编译脚本仅供参考,您可以根据实际业务需求,对RasrDemo.cpp进行定制修改。
cd ${project_dir} mkdir build && cd build mkdir logs cmake .. make -j ./RttsDemo --ak=yourAk --sk=yourSk --region=yourRegion --projectId=yourProjectId --isSaved=true --audioPath=test.pcm