大Bitmap初始化
Bitmap,即位图类型,开源Redis直接使⽤STRING类型表达,因此可能会产⽣超⼤的STRING数据,进⽽在某些场景下出现⼤KEY的性能问题。GeminiDB Redis的Bitmap类型采⽤的是特殊编码的格式,内部采⽤分片算法,可以规避产⽣⼀个超⼤的STRING数据,并且可以⽀持更⾼效的随机位数的插入和删除操作。
但是,实际应⽤场景中,我们可能会从其它地⽅获取⼀个超⼤的Bitmap数据,⽽这些数据通常会⽤STRING类型来表达。 对于⼀个超⼤的Bitmap数据,例如 64 MB,如果直接使⽤ SET 命令插入GeminiDB Redis,会执⾏较⻓时间,并且对其它正常访问产⽣⼲扰,造成时延抖动。 因此我们提供了⼀套平滑的插入⽅案,其原理是,对于超⼤的初始数据,我们先将其拆分为较⼩的字串(例如1MB),然后⾸次插入仍然采⽤SET命令,然后通过⼀个GETBIT的只读命令将其转化为Bitmap类型,后续的字串,通过APPEND命令进⾏插入即可。
注意事项
- 该功能需升级到特定版本,您可以提工单联系客服咨询实例版本是否支持该功能。
- 由于APPEND命令对顺序有要求,因此整个流程要避免出现APPEND乱序(并发APPEND的场景)。
- 可以使⽤PIPELINE模式加速,PIPELINE本⾝也是保证执⾏顺序的,因此不会有乱序的问题。
- 拆分的粒度可以根据实际情况选择,拆的越细,产生的时延毛刺就越小,但是初始化时间就越长,通常建议256KB-1MB左右的值。
代码参考
- C++
#include <string> #include <vector> #include "hiredis/hiredis.h" constexpr std::size_t kBitmapSubSize = 1024 * 1024; // 1 MB void SmoothInitBitmap(std::string bitmap) { // Split bitmap std::vector<std::string> sub_bitmaps; std::size_t pos = 0; while (pos < bitmap.size()) { sub_bitmaps.emplace_back(bitmap.substr(pos, kBitmapSubSize)); pos += kBitmapSubSize; } std::string key = "BITMAP_KEY"; // Connect to redis redisContext* redis = redisConnect("127.0.0.1", 6666); redisReply* reply = nullptr; // First part use 'SET' command reply = (redisReply*)redisCommand(redis, "SET %b %b", key.data(), key.size(), sub_bitmaps[0].data(), sub_bitmaps[0].size()); freeReplyObject(reply); // Use 'GETBIT' to transform to bitmap format reply = (redisReply*)redisCommand(redis, "GETBIT %b 0", key.data(), key.size()); freeReplyObject(reply); // Use 'APPEND' for remaining bitmap data for (auto i = 1u; i < sub_bitmaps.size(); ++i) { reply = (redisReply*)redisCommand(redis, "APPEND %b %b", key.data(), key.size(), sub_bitmaps[i].data(), sub_bitmaps[i].size()); freeReplyObject(reply); } } int main() { std::string bitmap ="123457890abcdef123457890abcdef123457890abcdef123457890abcdef123457890abcdef123456"; SmoothInitBitmap(bitmap); }
- JAVA(Jedis)
package nosql.cloud.huawei.jedis; import redis.clients.jedis.Jedis; import java.nio.ByteBuffer; import java.util.BitSet; public class BitMapOperation { private Jedis jedis; public BitMapOperation(Jedis jedis) { this.jedis = jedis; } /** * SetBit operation especially for big bitmap * * @param key key * @param value value * @param groupLength groupLength (Unit: byte) */ public void setBitGrouped(byte[] key, BitSet value, int groupLength) { if (value.isEmpty()) { jedis.set(key, new byte[0]); return; } byte[] byteArray = disposeBitMap(value); // round count int round = byteArray.length % groupLength == 0 ? byteArray.length / groupLength : byteArray.length / groupLength + 1; // last round length int lastPacketLength = byteArray.length % groupLength == 0 ? groupLength : byteArray.length % groupLength; if (round == 1) { // if only one round byte[] lastPacketByte = new byte[lastPacketLength]; System.arraycopy(byteArray, 0, lastPacketByte, 0, lastPacketLength); // set and getBit setAndGetBit(key, lastPacketByte); return; } byte[] packetByte = new byte[groupLength]; byte[] lastPacketByte = new byte[lastPacketLength]; for (int i = 0; i < round; i++) { if (i == 0) { // first set System.arraycopy(byteArray, i * groupLength, packetByte, 0, groupLength); // set and getBit setAndGetBit(key, packetByte); } else if (i != round - 1) { // regular append System.arraycopy(byteArray, i * groupLength, packetByte, 0, groupLength); jedis.append(key, packetByte); } else { // last append System.arraycopy(byteArray, i * groupLength, lastPacketByte, 0, lastPacketLength); jedis.append(key, lastPacketByte); } } } private byte[] disposeBitMap(BitSet bitSet) { // get words and count the number of word(Long) long[] words = bitSet.toLongArray(); int n = words.length; if (n == 0) return new byte[0]; for (int i = 0; i < n; i++) { // reverse words[i] = reverseLong(words[i]); } return longToBytes(words); } public static byte[] longToBytes(long[] longArray) { ByteBuffer buffer = ByteBuffer.allocate(longArray.length * 8); for (long value : longArray) { buffer.putLong(value); } return buffer.array(); } public void setAndGetBit(byte[] key, byte[] value) { jedis.set(key, value); jedis.getbit(key, 0); } public static long reverseLong(long n) { n = (n >>> 32) | (n << 32); n = ((n & 0xFFFF0000FFFF0000L) >>> 16) | ((n & 0x0000FFFF0000FFFFL) << 16); n = ((n & 0xFF00FF00FF00FF00L) >>> 8) | ((n & 0x00FF00FF00FF00FFL) << 8); n = ((n & 0xF0F0F0F0F0F0F0F0L) >>> 4) | ((n & 0x0F0F0F0F0F0F0F0FL) << 4); n = ((n & 0xCCCCCCCCCCCCCCCCL) >>> 2) | ((n & 0x3333333333333333L) << 2); n = ((n & 0xAAAAAAAAAAAAAAAAL) >>> 1) | ((n & 0x5555555555555555L) << 1); return n; } }
- Python
import redis import random import string from bitmap import BitMap # pip install bitmap # 参数 max_bytes = 1024 * 1024 * 64 # 构造一个64MB的bitmap max_bits = max_bytes * 8 # 一个byte可以存储8个bit,对应大概5亿多元素 # 这个方案不需要python内置的bitmap类型 # index_list 存储了所有要设置为1的下标 index_list = [] for i in range(1000000): index_list.append(random.randint(0, max_bits - 1)) # 使用bytearray构造位图 byte_array = bytearray(max_bytes) for i in index_list: index = i // 8 offset = i % 8 byte_array[index] |= (1 << (7 - offset)) # 转化成bytes类型,用于后续的redis操作 bitmap_str = bytes(byte_array) # 连接到redis r = redis.Redis(host='127.0.0.1', port=6379) r.execute_command("auth a") key = "BITMAP_KEY" # 分割参数 bitmap_pos = 0 bitmap_sub_size = 256 * 1024 # 调整分片大小 step = bitmap_sub_size - 1 # 处理第一部分 first_part = bitmap_str[bitmap_pos : bitmap_pos + step] r.execute_command("SET", key, first_part) r.execute_command("GETBIT", key, 0) # 使用getbit进行bitmap编码优化 # 处理剩余的部分 bitmap_pos += step while bitmap_pos < len(bitmap_str) : rest_part = bitmap_str[bitmap_pos : bitmap_pos + step] r.execute_command("APPEND", key, rest_part) bitmap_pos += step # 下面是测试验证的代码,注意会比较耗时,因为需要执行100w次getbit进行验证 # 注意,最后一个bitcount命令是O(n)命令,会产生百毫秒的毛刺,请勿随意在生产环境使用 # 构造一个python内置的bitmap类型进行数据验证(可选) bm = BitMap(max_bits) for i in index_list: bm.set(i) print('BitMap.count(): ' + str(bm.count())) # 调用redis命令校验是否设置正确 success = True for i in index_list: if r.execute_command("GETBIT", key, i) != 1: print('GETBIT check error, pos is' + str(i)) success = False if success: print('GETBIT check success') print("Bitcount: " + str(r.execute_command("BITCOUNT", key)))