大Bitmap初始化
Bitmap,即位图类型,开源Redis直接使⽤STRING类型表达,因此可能会产⽣超⼤的STRING数据,进⽽在某些场景下出现⼤KEY的性能问题。GeminiDB Redis的Bitmap类型采⽤的是特殊编码的格式,内部采⽤分片算法,可以规避产⽣⼀个超⼤的STRING数据,并且可以⽀持更⾼效的随机位数的插入和删除操作。
但是,实际应⽤场景中,我们可能会从其它地⽅获取⼀个超⼤的Bitmap数据,⽽这些数据通常会⽤STRING类型来表达。 对于一个超大的Bitmap数据,例如 64 MB,如果直接使用SET命令插入GeminiDB Redis,会执⾏较⻓时间,并且对其它正常访问产⽣⼲扰,造成时延抖动。 因此我们提供了⼀套平滑的插入⽅案,其原理是,对于超⼤的初始数据,我们先将其拆分为较⼩的字串(例如1MB),然后⾸次插入仍然采⽤SET命令,然后通过⼀个GETBIT的只读命令将其转化为Bitmap类型,后续的字串,通过APPEND命令进⾏插入即可。
注意事项
代码参考
#include <string>
#include <vector>
#include "hiredis/hiredis.h"
constexpr std::size_t kBitmapSubSize = 1024 * 1024;  // 1 MB
void SmoothInitBitmap(std::string bitmap) {
 // Split bitmap
 std::vector<std::string> sub_bitmaps;
 std::size_t pos = 0;
 while (pos < bitmap.size()) {
   sub_bitmaps.emplace_back(bitmap.substr(pos, kBitmapSubSize));
   pos += kBitmapSubSize;
}
 std::string key = "BITMAP_KEY";
 // Connect to redis
 redisContext* redis = redisConnect("127.0.0.1", 6666);
 redisReply* reply = nullptr;
 // First part use 'SET' command
 reply = (redisReply*)redisCommand(redis, "SET %b %b", key.data(), key.size(), sub_bitmaps[0].data(), sub_bitmaps[0].size());
 freeReplyObject(reply);
 // Use 'GETBIT' to transform to bitmap format
 reply = (redisReply*)redisCommand(redis, "GETBIT %b 0", key.data(), key.size());
 freeReplyObject(reply);
 // Use 'APPEND' for remaining bitmap data
 for (auto i = 1u; i < sub_bitmaps.size(); ++i) {
   reply = (redisReply*)redisCommand(redis, "APPEND %b %b", key.data(), key.size(), sub_bitmaps[i].data(), sub_bitmaps[i].size());
   freeReplyObject(reply);
}
}
int main() {
 std::string bitmap ="123457890abcdef123457890abcdef123457890abcdef123457890abcdef123457890abcdef123456";
 SmoothInitBitmap(bitmap);
}
   package nosql.cloud.huawei.jedis;
import redis.clients.jedis.Jedis;
import java.nio.ByteBuffer;
import java.util.BitSet;
public class BitMapOperation {
   private Jedis jedis;
   public BitMapOperation(Jedis jedis) {
       this.jedis = jedis;
  }
   /**
    * SetBit operation especially for big bitmap
    *
    * @param key         key
    * @param value       value
    * @param groupLength groupLength (Unit: byte)
    */
   public void setBitGrouped(byte[] key, BitSet value, int groupLength) {
       if (value.isEmpty()) {
           jedis.set(key, new byte[0]);
           return;
      }
       byte[] byteArray = disposeBitMap(value);
       // round count
       int round = byteArray.length % groupLength == 0 ? byteArray.length / groupLength : byteArray.length / groupLength + 1;
       // last round length
       int lastPacketLength = byteArray.length % groupLength == 0 ? groupLength : byteArray.length % groupLength;
       if (round == 1) {
           // if only one round
           byte[] lastPacketByte = new byte[lastPacketLength];
           System.arraycopy(byteArray, 0, lastPacketByte, 0, lastPacketLength);
           // set and getBit
           setAndGetBit(key, lastPacketByte);
           return;
      }
       byte[] packetByte = new byte[groupLength];
       byte[] lastPacketByte = new byte[lastPacketLength];
       for (int i = 0; i < round; i++) {
           if (i == 0) {
               // first set
               System.arraycopy(byteArray, i * groupLength, packetByte, 0, groupLength);
               // set and getBit
               setAndGetBit(key, packetByte);
          } else if (i != round - 1) {
               // regular append
               System.arraycopy(byteArray, i * groupLength, packetByte, 0, groupLength);
               jedis.append(key, packetByte);
          } else {
               // last append
               System.arraycopy(byteArray, i * groupLength, lastPacketByte, 0, lastPacketLength);
               jedis.append(key, lastPacketByte);
          }
      }
  }
   private byte[] disposeBitMap(BitSet bitSet) {
       // get words and count the number of word(Long)
       long[] words = bitSet.toLongArray();
       int n = words.length;
       if (n == 0)
           return new byte[0];
       for (int i = 0; i < n; i++) {
           // reverse
           words[i] = reverseLong(words[i]);
      }
       return longToBytes(words);
  }
   public static byte[] longToBytes(long[] longArray) {
       ByteBuffer buffer = ByteBuffer.allocate(longArray.length * 8);
       for (long value : longArray) {
           buffer.putLong(value);
      }
       return buffer.array();
  }
   public void setAndGetBit(byte[] key, byte[] value) {
       jedis.set(key, value);
       jedis.getbit(key, 0);
  }
   public static long reverseLong(long n) {
       n = (n >>> 32) | (n << 32);
       n = ((n & 0xFFFF0000FFFF0000L) >>> 16) | ((n & 0x0000FFFF0000FFFFL) << 16);
       n = ((n & 0xFF00FF00FF00FF00L) >>> 8) | ((n & 0x00FF00FF00FF00FFL) << 8);
       n = ((n & 0xF0F0F0F0F0F0F0F0L) >>> 4) | ((n & 0x0F0F0F0F0F0F0F0FL) << 4);
       n = ((n & 0xCCCCCCCCCCCCCCCCL) >>> 2) | ((n & 0x3333333333333333L) << 2);
       n = ((n & 0xAAAAAAAAAAAAAAAAL) >>> 1) | ((n & 0x5555555555555555L) << 1);
       return n;
  }
}
   import redis
import random
import string
from bitmap import BitMap  # pip install bitmap
# 参数
max_bytes = 1024 * 1024 * 64 # 构造一个64MB的bitmap
max_bits = max_bytes * 8 # 一个byte可以存储8个bit,对应大概5亿多元素
# 这个方案不需要python内置的bitmap类型
# index_list 存储了所有要设置为1的下标
index_list = []
for i in range(1000000):
    index_list.append(random.randint(0, max_bits - 1))
# 使用bytearray构造位图
byte_array = bytearray(max_bytes)
for i in index_list:
    index = i // 8
    offset = i % 8
    byte_array[index] |= (1 << (7 - offset))
# 转化成bytes类型,用于后续的redis操作
bitmap_str = bytes(byte_array)
# 连接到redis
r = redis.Redis(host='127.0.0.1', port=6379)
r.execute_command("auth a")
key = "BITMAP_KEY"
# 分割参数
bitmap_pos = 0
bitmap_sub_size = 256 * 1024 # 调整分片大小
step = bitmap_sub_size - 1
# 处理第一部分
first_part = bitmap_str[bitmap_pos : bitmap_pos + step]
r.execute_command("SET", key, first_part)
r.execute_command("GETBIT", key, 0) # 使用getbit进行bitmap编码优化
# 处理剩余的部分
bitmap_pos += step
while bitmap_pos < len(bitmap_str) :
    rest_part = bitmap_str[bitmap_pos : bitmap_pos + step]
    r.execute_command("APPEND", key, rest_part)
    bitmap_pos += step
# 下面是测试验证的代码,注意会比较耗时,因为需要执行100w次getbit进行验证
# 注意,最后一个bitcount命令是O(n)命令,会产生百毫秒的毛刺,请勿随意在生产环境使用
# 构造一个python内置的bitmap类型进行数据验证(可选)
bm = BitMap(max_bits)
for i in index_list:
    bm.set(i)
print('BitMap.count(): ' + str(bm.count()))
# 调用redis命令校验是否设置正确
success = True
for i in index_list:
    if r.execute_command("GETBIT", key, i) != 1:
        print('GETBIT check error, pos is' + str(i))
        success = False
if success:
    print('GETBIT check success')
print("Bitcount: " + str(r.execute_command("BITCOUNT", key)))
    
  