更新时间:2025-09-08 GMT+08:00
快速入门
初始化EMS客户端
本示例用于初始化EMS客户端配置并启动EMS服务。
# 引入模块 import os, torch, torch_npu from ems import Ems, EmsConfig, EmsException, CcConfig, CcKvOption, KvBufferWrapper # 初始化Ems config = EmsConfig(cc_config=cc_config) try: Ems.init(config) except EmsException as e: print(f"exception: {e}.") exit(1)
更多关于EMS客户端初始化的内容请参考初始化章节。
读写Context Caching
本示例通过初始化并获取Context Caching配置,保存和加载显存数据。
- 初始化Ems,并向context cache注册一个KVCache结构。
import os, torch, torch_npu from ems import Ems, EmsConfig, EmsException, CcConfig_v1, CcKvOption, KvBufferWrapper # 初始化cc配置 cc_config = CcConfig_v1(rank_id=8, device_id=0, model_id="llama2-13b") # 初始化Ems config = EmsConfig(cc_config=cc_config) try: Ems.init(config) except EmsException as e: print(f"exception: {e}.") exit(1) # 获取context caching对象 cc = Ems.get_cc() if cc is None: print("cc is None.") exit(1) # 注册到Context Caching try: context_caching.register_kvcache(kvcache) # 期望kvcache形状: [layers, k_v_index, GPU_blocks, Block_size, heads, head_size] print("register_kvcache: success") except EmsException as e: print(f"register_kvcache failed: {e}")
- hash语义下的保存和加载显存数据,参数:slot_mapping + hashes + offsets。
# 设置save请求的超时时间 option = CcKvOption(timeout=5000) block_size = 4 # 保存 slot_mapping = [0,1,2,3,4,5,6,7] hashes = [0xABCD, 0x1234] offsets = [4, 4] try: cc_result = cc.save(slot_mapping = slot_mapping, hashes = hashes, offsets = offsets, option = option) except EmsException as e: print(f"failed to save, {e}.") exit(1) # 读取保存的数据 try: cc_result = cc.save(slot_mapping = slot_mapping, hashes = hashes, offsets = offsets, option = option) except EmsException as e: print(f"failed to save, {e}.") exit(1)
- token语义下的保存和加载显存数据,参数:slot_mapping + token_ids + mask。
# 设置save请求的超时时间 option = CcKvOption(timeout=5000) block_size = 4 # 保存 slot_mapping = [0, 1, 2, 3, 4, 5] token_ids = [101, 102, 103, 104, 105, 106] mask = [0, 0, 0, 0, 1, 1] try: cc_result = cc.save(slot_mapping = slot_mapping, token_ids = token_ids, mask = mask, option = option) except EmsException as e: print(f"failed to save, {e}.") exit(1) # 读取保存的数据 try: cc_result = cc.save(slot_mapping = slot_mapping, token_ids = token_ids, mask = mask, option = option) except EmsException as e: print(f"failed to save, {e}.") exit(1)