使用实时语音合成
前提条件
- 确保已按照配置CPP环境(Linux)配置完毕。
- 请参考SDK(websocket)获取最新版本SDK包。
初始化Client
初始化RttsClient,其参数包括AuthInfo
请求参数
请求类为RttsRequest,详见表 RttsRequest。
通过set方法可以设置具体参数,详见表 RttsRequest设置参数
|
方法名称 |
是否必选 |
参数类型 |
描述 |
|---|---|---|---|
|
SetAudioFormat |
否 |
String |
设置语音格式,默认pcm。 |
|
SetAudioProperty |
否 |
String |
设置语音合成特征字符串,{language}_{speaker}_{domain},即“语种_人员标识_领域”。默认chinese_xiaoyan_common 。详见API文档。 |
|
SetSampleRate |
否 |
String |
设置采样率:8000、16000,默认8000。 |
|
SetPitch |
否 |
Integer |
设置音高,-500~500,默认0。 |
|
SetVolume |
否 |
Integer |
设置音量,0~100,默认50。 |
|
SetSpeed |
否 |
Integer |
设置语速,-500~500,默认0。 |
|
SetSubtitle |
否 |
String |
设置字幕,部分发音人支持字幕时间戳,详见API文档。 |
示例代码
如下示例仅供参考,最新代码请前往SDK(websocket)章节获取并运行。
#include "RttsClient.h"
#include "RttsRequest.h"
#include "gflags/gflags.h"
// refer to https://support.huaweicloud.com/api-sis/sis_03_0115.html
// auth info
// 认证用的AK和SK硬编码在代码中或明文存储都有很大安全风险,建议在配置文件或环境变量中密文存放,使用时解密,确保安全。
DEFINE_string(ak, "", "access key");
DEFINE_string(sk, "", "secrect key");
// region, for example cn-east-3, cn-north-4
DEFINE_string(region, "cn-north-4", "project region, such as cn-north-4");
// projectId, refer to https://support.huaweicloud.com/api-sis/sis_03_0008.html
DEFINE_string(projectId, "", "project id");
DEFINE_string(text, "华为致力于把数字世界带入每个人每个家庭每个组织,构建万物互联的智能世界。", "Text to be synthesized");
DEFINE_string(audioFormat, "pcm", "audio format, such pcm");
DEFINE_string(property, "chinese_xiaoyan_common", "");
DEFINE_string(audioPath, "test.pcm", "audio saved path");
DEFINE_string(sampleRate, "16000", "sample rate of audio");
DEFINE_string(subtitle, "", "subtitle info");
DEFINE_int32(volume, 50, "");
DEFINE_int32(speed, 0, "");
DEFINE_int32(pitch, 0, "");
DEFINE_int32(readTimeOut, 20000, "read time out, default 20s. Increase this value appropriately according to the length of the text");
DEFINE_int32(connectTimeOut, 20000, "connecting time out, default 20s");
DEFINE_bool(isSaved, true, "save the audio as a local file");
class CallBack : public RttsListener {
public:
void OnConnect() {
LOG(INFO) << "rtts Connect success";
}
void OnStart(std::string text) {
LOG(INFO) << "rtts receive start response " << text;
}
void OnResp(std::string binaryData) {
if (isSaved) {
dataContents.push_back(binaryData);
}
LOG(INFO) << "rtts receive data " << binaryData.size();
}
void onResponseSubtitle(std::string message) {
LOG(INFO) << message;
}
void OnEnd(std::string text) {
LOG(INFO) << "rtts receive end response " << text;
if (isSaved) {
std::ofstream fout(filePath, std::ios::binary);
if (!fout.is_open()) {
LOG(INFO) << "filePath " << filePath << " is invalid";
return;
}
for (int i = 0; i < dataContents.size(); i++) {
fout.write(dataContents[i].data(), dataContents[i].size());
}
fout.close();
LOG(INFO) << "success to save file in " << filePath;
}
}
void OnClose() {
LOG(INFO) << "rtts receive Close";
}
void OnError(std::string text) {
LOG(INFO) << "rtts receive error" << text;
}
void SetFilePath(std::string fPath) {
filePath = fPath;
}
void SetSaved(bool saved) {
isSaved = saved;
}
private:
std::string filePath;
bool isSaved = false;
std::vector<std::string> dataContents;
};
void RttsTest() {
// 1. config parameter
// 1.1 init authInfo
speech::huawei_asr::AuthInfo authInfo(FLAGS_ak, FLAGS_sk, FLAGS_region, FLAGS_projectId, FLAGS_endpoint);
// 1.2 config Connect parameter
speech::huawei_asr::HttpConfig httpConfig;
httpConfig.SetReadTimeout(FLAGS_readTimeOut);
httpConfig.SetConnectTimeout(FLAGS_connectTimeOut);
// 1.3 config callback, callback function are optional, if not set, it will use function in RttsListner
speech::huawei_asr::WebsocketService::ptr websocketServicePtr = websocketpp::lib::make_shared<speech::huawei_asr::WebsocketService>();
CallBack callback;
callback.SetSaved(FLAGS_isSaved);
callback.SetFilePath(FLAGS_audioPath);
websocketServicePtr->SetTtsCallBack(&callback);
// 1.4 config request parameter
speech::huawei_tts::RttsRequest request(FLAGS_text);
request.SetAudioFormat(FLAGS_audioFormat);
request.SetVolume(FLAGS_volume);
request.SetSpeed(FLAGS_speed);
request.SetPitch(FLAGS_pitch);
request.SetSampleRate(FLAGS_sampleRate);
request.SetAudioProperty(FLAGS_property);
request.SetSubtitle(FLAGS_subtitle);
// 2. init client
speech::huawei_tts::RttsClient* rttsClient = new speech::huawei_tts::RttsClient(authInfo, websocketServicePtr, httpConfig);
// 3. send request
rttsClient->Synthesis(request);
// wait for save file, if setSaved false in rttsListener or don't use rttsListener, it can be removed.
std::this_thread::sleep_for(std::chrono::milliseconds(2000));
delete rttsClient;
}
int main(int argc, char *argv[]) {
FLAGS_alsologtostderr = true;
FLAGS_log_dir = "./logs";
gflags::ParseCommandLineFlags(&argc, &argv, true);
google::InitGoogleLogging(argv[0]);
RttsTest();
return 0;
}
编译脚本
以下编译脚本仅供参考,您可以根据实际业务需求,对RasrDemo.cpp进行定制修改。
cd ${project_dir}
mkdir build && cd build
mkdir logs
cmake ..
make -j
./RttsDemo --ak=yourAk --sk=yourSk --region=yourRegion --projectId=yourProjectId --isSaved=true --audioPath=test.pcm