更新时间:2024-12-04 GMT+08:00
分享

使用实时语音合成

前提条件

初始化Client

初始化RttsClient,其参数包括AuthInfo

表1 AuthInfo

参数名称

是否必选

参数类型

描述

ak

String

用户的ak,可参考AK/SK认证

sk

String

用户的sk,可参考AK/SK认证

projectId

String

项目ID,同region一一对应,参考获取项目ID

region

String

区域,如cn-north-4,参考终端节点

endpoint

String

终端节点,参考地区和终端节点。一般使用默认即可。

请求参数

请求类为RttsRequest,详见表 RttsRequest

表2 RttsRequest

参数名称

是否必选

参数类型

描述

text

String

待合成文本。

通过set方法可以设置具体参数,详见表 RttsRequest设置参数

表3 RttsRequest设置参数

方法名称

是否必选

参数类型

描述

SetAudioFormat

String

设置语音格式,默认pcm。

SetAudioProperty

String

设置语音合成特征字符串,{language}_{speaker}_{domain},即“语种_人员标识_领域”。默认chinese_xiaoyan_common 。详见API文档

SetSampleRate

String

设置采样率:8000、16000,默认8000。

SetPitch

Integer

设置音高,-500~500,默认0。

SetVolume

Integer

设置音量,0~100,默认50。

SetSpeed

Integer

设置语速,-500~500,默认0。

SetSubtitle

String

设置字幕,部分发音人支持字幕时间戳,详见API文档

示例代码

如下示例仅供参考,最新代码请前往SDK(websocket)章节获取并运行。

#include "RttsClient.h"
#include "RttsRequest.h"
#include "gflags/gflags.h"
// refer to https://support.huaweicloud.com/api-sis/sis_03_0115.html
// auth info
// 认证用的AK和SK硬编码在代码中或明文存储都有很大安全风险,建议在配置文件或环境变量中密文存放,使用时解密,确保安全。
DEFINE_string(ak, "", "access key");
DEFINE_string(sk, "", "secrect key");
// region, for example cn-east-3, cn-north-4
DEFINE_string(region, "cn-north-4", "project region, such as cn-north-4");
// projectId, refer to https://support.huaweicloud.com/api-sis/sis_03_0008.html
DEFINE_string(projectId, "", "project id");
DEFINE_string(text, "华为致力于把数字世界带入每个人每个家庭每个组织,构建万物互联的智能世界。", "Text to be synthesized");
DEFINE_string(audioFormat, "pcm", "audio format, such pcm");
DEFINE_string(property, "chinese_xiaoyan_common", "");
DEFINE_string(audioPath, "test.pcm", "audio saved path");
DEFINE_string(sampleRate, "16000", "sample rate of audio");
DEFINE_string(subtitle, "", "subtitle info");
DEFINE_int32(volume, 50, "");
DEFINE_int32(speed, 0, "");
DEFINE_int32(pitch, 0, "");
DEFINE_int32(readTimeOut, 20000, "read time out, default 20s. Increase this value appropriately according to the length of the text");
DEFINE_int32(connectTimeOut, 20000, "connecting time out, default 20s");
DEFINE_bool(isSaved, true, "save the audio as a local file");
class CallBack : public RttsListener {
public:
    void OnConnect() {
        LOG(INFO) << "rtts Connect success";
    }
    void OnStart(std::string text) {
        LOG(INFO) << "rtts receive start response " << text;
    }
    void OnResp(std::string binaryData) {
        if (isSaved) {
            dataContents.push_back(binaryData);
        }
        LOG(INFO) << "rtts receive data " << binaryData.size();
    }
    void onResponseSubtitle(std::string message) {
        LOG(INFO) << message;
    }
    void OnEnd(std::string text) {
        LOG(INFO) << "rtts receive end response " << text;
        if (isSaved) {
            std::ofstream fout(filePath,  std::ios::binary);
            if (!fout.is_open()) {
                LOG(INFO) << "filePath " << filePath << " is invalid";
                return;
            }
            for (int i = 0; i < dataContents.size(); i++) {
                fout.write(dataContents[i].data(), dataContents[i].size());
            }
            fout.close();
            LOG(INFO) << "success to save file in " << filePath;
        }
    }
    void OnClose() {
        LOG(INFO) << "rtts receive Close";
    }
    void OnError(std::string text) {
        LOG(INFO) << "rtts receive error" << text;
    }
    void SetFilePath(std::string fPath) {
        filePath = fPath;
    }
    void SetSaved(bool saved) {
        isSaved = saved;
    }
private:
    std::string filePath;
    bool isSaved = false;
    std::vector<std::string> dataContents;
};
void RttsTest() {
    // 1. config parameter
    // 1.1 init authInfo
    speech::huawei_asr::AuthInfo authInfo(FLAGS_ak, FLAGS_sk, FLAGS_region, FLAGS_projectId, FLAGS_endpoint);
    // 1.2 config Connect parameter
    speech::huawei_asr::HttpConfig httpConfig;
    httpConfig.SetReadTimeout(FLAGS_readTimeOut);
    httpConfig.SetConnectTimeout(FLAGS_connectTimeOut);
    // 1.3 config callback, callback function are optional, if not set, it will use function in RttsListner
    speech::huawei_asr::WebsocketService::ptr websocketServicePtr = websocketpp::lib::make_shared<speech::huawei_asr::WebsocketService>();
    CallBack callback;
    callback.SetSaved(FLAGS_isSaved);
    callback.SetFilePath(FLAGS_audioPath);
    websocketServicePtr->SetTtsCallBack(&callback);
    // 1.4 config request parameter
    speech::huawei_tts::RttsRequest request(FLAGS_text);
    request.SetAudioFormat(FLAGS_audioFormat);
    request.SetVolume(FLAGS_volume);
    request.SetSpeed(FLAGS_speed);
    request.SetPitch(FLAGS_pitch);
    request.SetSampleRate(FLAGS_sampleRate);
    request.SetAudioProperty(FLAGS_property);
    request.SetSubtitle(FLAGS_subtitle);
    // 2. init client
    speech::huawei_tts::RttsClient* rttsClient = new speech::huawei_tts::RttsClient(authInfo, websocketServicePtr, httpConfig);
    // 3. send request
    rttsClient->Synthesis(request);
    // wait for save file, if setSaved false in rttsListener or don't use rttsListener, it can be removed.
    std::this_thread::sleep_for(std::chrono::milliseconds(2000));
    delete rttsClient;
}
int main(int argc, char *argv[]) {
    FLAGS_alsologtostderr = true;
    FLAGS_log_dir = "./logs";
    gflags::ParseCommandLineFlags(&argc, &argv, true);
    google::InitGoogleLogging(argv[0]);
    RttsTest();
    return 0;
}

编译脚本

以下编译脚本仅供参考,您可以根据实际业务需求,对RasrDemo.cpp进行定制修改。

cd ${project_dir}
mkdir build && cd build
mkdir logs
cmake ..
make -j
./RttsDemo --ak=yourAk --sk=yourSk --region=yourRegion --projectId=yourProjectId --isSaved=true  --audioPath=test.pcm

相关文档