Streaming

Function

The speech used for streaming recognition must be shorter than one minute. This mode is applicable to dialog recognition.

This API allows you to input speech segments in streaming mode and obtain the final result soon after the recognition work is complete. After a speech segment is input, the Real-Time ASR engine can immediately decode it and extract its features rather than wait until all segments are input. Therefore, after the last speech segment is input, you can get the final result after only a short period of time (the time for processing the last speech segment + the time for generating the final result). The streaming mode shortens the overall time for obtaining the final result and greatly improves user experience.

wss-URI

wss-URI format
wss /v1/{project_id}/rasr/short-stream

Parameter descriptions

**Table 1** Parameter descriptions
Parameter	Mandatory	Description
project_id	Yes	Project ID. For details about how to obtain a project ID, see Obtaining a Project ID.

**Table 2** Request header parameters
Parameter	Mandatory	Type	Description
X-Auth-Token	Yes	String	User token. It is used to obtain the permission to call APIs. For details about how to obtain a user token, see Authentication. The token is the value of X-Subject-Token in the response header.

Example request (pseudocode)
```
wss://{endpoint}/v1/{project_id}/rasr/short-stream

Request header:
X-Auth-Token: MIINRwYJKoZIhvcNAQcCoIINODCCDTQCAQExDTALBglghkgBZQMEAgEwgguVBgkqhkiG...
```
The endpoint is the request URL for calling an API. Endpoints vary according to services and regions. For details, see Endpoints.

Example Python 3 request

# -*- coding: utf-8 -*-
# This demo is used only for tests. You are advised to use the SDK. The websocket-client must be installed in advance by running the pip install websocket-client command.
import websocket
import threading
import time
import json

def rasr_demo():
    url = 'wss://{{endpoint}}/v1/{{project_id}}/rasr/short-stream'  # Replace endpoint and project_id with the actual values.
    audio_path = 'Audio path'
    token ='Token of the region to which the user belongs'
    header = {
        'X-Auth-Token': token
    }
    with open(audio_path, 'rb') as f:
        data = f.read()
    body = {
        'command': 'START',
        'config': {
            'audio_format': 'pcm8k16bit',
            'property': 'chinese_8k_general'
        }
    }
    def _on_message(ws, message):
        print(message)
    def _on_error(ws, error):
        print(error)
    ws = websocket.WebSocketApp(url, header, on_message=_on_message, on_error=_on_error)
    _thread = threading.Thread(target=ws.run_forever, args=(None, None, 30, 20))
    _thread.start()
    time.sleep(1)
    ws.send(json.dumps(body), opcode=websocket.ABNF.OPCODE_TEXT)
    now_index = 0
    byte_len = 4000
    while now_index < len(data):
        next_index = now_index + byte_len
        if next_index > len(data):
            next_index = len(data)
        send_array = data[now_index: next_index]
        ws.send(send_array, opcode=websocket.ABNF.OPCODE_BINARY)
        now_index += byte_len
        time.sleep(0.05)
    ws.send("{\"command\": \"END\", \"cancel\": \"false\"}", opcode=websocket.ABNF.OPCODE_TEXT)
    time.sleep(10)
    ws.close()
if __name__ == '__main__':
    rasr_demo()

Example Java request

import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;
import okhttp3.WebSocket;
import okhttp3.WebSocketListener;
import okio.ByteString;
import java.net.URL;
/**
* This demo is used only for tests. You are advised to use the SDK.
 * The okhttp.jar and okio.jar files have been configured. You can download the JAR files from the SDK.
 */
public class RasrDemo {
  public void rasrDemo() {
    try {
      // Replace endpoint and projectId with the actual values.
      String url = "wss://{{endpoint}}/v1/{{project_id}}/rasr/short-stream";
      String token = "Token of the corresponding region";
      byte[] data = null; //Byte array that stores the audio to be sent
      OkHttpClient okHttpClient = new OkHttpClient();
      Request request = new Request.Builder().url(url).header("X-Auth-Token", token).build();
      WebSocket webSocket = okHttpClient.newWebSocket(request, new MyListener());
      webSocket.send("{\"command\": \"START\", \"config\": {\"audio_format\": \"pcm8k16bit\", \"property\": \"chinese_8k_general\"}}");
      webSocket.send(ByteString.of(data));
      webSocket.send("{  \"command\": \"END\",  \"cancel\": false}");
      Thread.sleep(10000);
      webSocket.close(1000, null);

    } catch (Exception e) {
      e.printStackTrace();
    }


  }

  class MyListener extends WebSocketListener {
    @Override

    public void onOpen(WebSocket webSocket, Response response) {
      System.out.println("conneected");
    }
    @Override
    public void onClosed(WebSocket webSocket, int code, String reason) {
      System.out.println("closed");
    }
    @Override
    public void onFailure(WebSocket webSocket, Throwable t, Response response) {
      t.printStackTrace();
    }
    @Override
    public void onMessage(WebSocket webSocket, String text) {
      System.out.println(text);
    }
  }
  public static void main(String[] args) {
    RasrDemo rasrDemo = new RasrDemo();
    rasrDemo.rasrDemo();
  }
}

JavaScript (Node.js v18.20.2 (LTS) is recommended.)

// Import the ws library of Node.js.
const WebSocket = require('ws');
function shortStreamDemo(endpoint,audioPath, projectID, token) {
    const url = `wss://${endpoint}/v1/${projectID}/rasr/short-stream`;  // Replace endpoint and projectID with the actual values.
    // Read the content of the audio file.
    const fs = require('fs');
    let data = fs.readFileSync(audioPath);
    //Add the token to the HTTP header.
    const headers = {
        'X-Auth-Token': token,
        // Optionally add the enterprise ID.
        // 'Enterprise-Project-Id': Enterprise ID
    };
    // Create a WebSocket instance.
    const ws = new WebSocket(url, {
        headers // Add a custom HTTP header.
    });
    ws.on('open', async () => {
        const body = {
            command: 'START',
            config: {
                audio_format: 'pcm16k16bit',
                property: 'chinese_16k_general'
            }
        };
        ws.send(JSON.stringify(body));
        let nowIndex = 0;
        const byteLen = 3200; // Empty values are not allowed. Recommended range: 2000 to 10000.
        while (nowIndex < data.length) {
            const nextIndex = nowIndex + byteLen;
            const sendArray = data.slice(nowIndex, nextIndex > data.length ? data.length : nextIndex);
            ws.send(sendArray, { binary: true });
            nowIndex += byteLen;
            await new Promise(resolve => setTimeout(resolve, 100)); // Simulate a delay (unit: ms).
        }
        const endCommand = JSON.stringify({ command: 'END', cancel: 'false' });
        ws.send(endCommand);
    });
    ws.on('message', (data) => {
        if (data instanceof Buffer) {
            // Convert the Buffer to a UTF-8 encoded string.
            const messageString = data.toString('utf8');
            console.log('Received (converted from Buffer):', messageString);
            const type = JSON.parse(messageString).resp_type;
            if (type ==='END'|| type ==='ERROR') {
                ws.close();
            }
        }
    });
    ws.on('error', (error) => {
        console.error('WebSocket Error:', error);
    });
};
shortStreamDemo(endpoint,audioPath, projectID, token);

Parent topic: WebSocket Handshake Requests

Previous topic: WebSocket Handshake Requests

Next topic: Continuous