Updated on 2025-09-12 GMT+08:00

WebSocket Handshake Requests

Function

This API is used to synthesize real-time streaming voices. Users establish a connection each time they wish to synthesize text. Upon sending the text for synthesis, the server responds with the synthesized results. Only one piece of text can be sent per connection. Multiple texts require establishing separate connections for each.

Working Process

As shown in the flowchart below, only one start request needs to be sent for real-time TTS, and the start response, segment response, and end response are received in sequence.

wss-URI

  • wss-URI format

    wss /v1/{project_id}/rtts

  • Parameter descriptions
    Table 1 Parameter descriptions

    Parameter

    Mandatory

    Description

    project_id

    Yes

    Project ID. For details about how to obtain a project ID, see Obtaining a Project ID.

    Table 2 Request header parameters

    Parameter

    Mandatory

    Type

    Description

    X-Auth-Token

    Yes

    String

    User token.

    It is used to obtain the permission to call APIs. For details about how to obtain a user token, see Authentication. The token is the value of X-Subject-Token in the response header.

  • Example request (pseudocode)
    wss://{endpoint}/v1/{project_id}/rtts
    
    Request header:
    X-Auth-Token: MIINRwYJKoZIhvcNAQcCoIINODCCDTQCAQExDTALBglghkgBZQMEAgEwgguVBgkqhkiG...

    Example Python3 request

    # -*- coding: utf-8 -*-
    # This demo is for testing purposes only. You are advised to use the SDK. You need to install websocket-client first by running pip install websocket-client.
    import websocket
    import threading
    import time
    import json
    
    
    def rtts_demo():
        url = 'wss://{{endpoint}}/v1/{{project_id}}/rtts'  # Replace endpoint and project_id with actual values.
        text = 'Text to be synthesized'
        token = 'Token of the region corresponding to the user'
        header = {
            'X-Auth-Token': token
        }
    
        body = {
            'command': 'START',
            'text': text,
            'config': {
                'audio_format': 'pcm',
                'property': 'chinese_xiaoyu_common',
                'sample_rate': '8000'
            }
        }
    
        def _on_message(ws, message):
            if isinstance(message, bytes):
                print('receive data length %d' % len(message))
            else:
                print(message)
    
        def _on_error(ws, error):
            print(error)
    
        ws = websocket.WebSocketApp(url, header, on_message=_on_message, on_error=_on_error)
        _thread = threading.Thread(target=ws.run_forever, args=(None, None, 30, 20))
        _thread.start()
        time.sleep(1)
        ws.send(json.dumps(body), opcode=websocket.ABNF.OPCODE_TEXT)
        time.sleep(10)
        ws.close()
    
    
    if __name__ == '__main__':
        rtts_demo()

    Example Java request

    import okhttp3.OkHttpClient;
    import okhttp3.Request;
    import okhttp3.Response;
    import okhttp3.WebSocket;
    import okhttp3.WebSocketListener;
    import okio.ByteString;
    
    /**
     * This demo is for testing purposes only. You are advised to use the SDK.
     * You need to configure the okhttp and okio JAR files first by downloading from the SDK.
     */
    public class RttsDemo {
      public void rttsDemo() {
        try {
          // Replace endpoint and projectId with the actual values.
          String url = "wss://{{endpoint}}/v1/{{project_id}}/rtts";
          String token = "Token of the corresponding region";
          String text = "Text to be synthesized";
          OkHttpClient okHttpClient = new OkHttpClient();
          Request request = new Request.Builder().url(url).header("X-Auth-Token", token).build();
          WebSocket webSocket = okHttpClient.newWebSocket(request, new MyListener());
          webSocket.send("{\"command\": \"START\", \"text\":\"" + text
              + "\", \"config\": {\"audio_format\": \"pcm\", \"property\": \"chinese_xiaoyu_common\"}}");
          Thread.sleep(10000);
          webSocket.close(1000, null);
    
        } catch (Exception e) {
          e.printStackTrace();
        }
    
      }
    
      class MyListener extends WebSocketListener {
        @Override
    
        public void onOpen(WebSocket webSocket, Response response) {
          System.out.println("conneected");
        }
    
        @Override
        public void onClosed(WebSocket webSocket, int code, String reason) {
          System.out.println("closed");
        }
    
        @Override
        public void onFailure(WebSocket webSocket, Throwable t, Response response) {
          t.printStackTrace();
        }
    
        @Override
        public void onMessage(WebSocket webSocket, String text) {
          System.out.println(text);
        }
    
        public void onMessage(WebSocket webSocket, ByteString bytes) {
          byte[] data = bytes.toByteArray();
          System.out.println("receive data length is " + data.length);
        }
    
      }
    
      public static void main(String[] args) {
        RttsDemo rttsDemo = new RttsDemo();
        rttsDemo.rttsDemo();
      }
    }