Updated on 2024-08-06 GMT+08:00

Large Bitmap Initialization

The open-source Redis uses string bitmaps, which may create super large strings and affect the performance of big keys in some scenarios. GeminiDB Redis API uses bitmaps in a special encoding format. The internal sharding algorithm prevents super large strings from being created and allows you to insert and delete a random number of bits efficiently.

However, in practice, a super large bitmap of the string type may be obtained from other sources. For example, it takes a long time to use the SET command to insert a super large bitmap (64 MB) into GeminiDB Redis API. Other normal accesses may be interfered with and a jitter may be created because of latency. To address these issues, we provide a smooth insertion solution. A super large bitmap is split into smaller strings (for example, 1 MB). The SET command is used for the first insertion, and then a GETBIT read-only command is used to convert the strings to bitmaps. The subsequent character strings are inserted by running the APPEND command.

Precautions

  • This function is available to a specific version. You can submit a service ticket to contact customer service to check whether your instance version supports this function.
  • The APPEND command has requirements on the sequence. Therefore, APPEND disorder must be avoided in the entire process (in concurrent APPEND scenarios).
  • PIPELINE acceleration and PIPELINE can ensure the execution sequence.
  • The finer the splitting granularity, the shorter the glitch, the longer the initialization time. Generally, the recommended granularity is 256 KB to 1 MB. You can decide splitting granularity based on the site requirements.

Code Reference

  • C++
    #include <string>
    #include <vector>
    
    #include "hiredis/hiredis.h"
    constexpr std::size_t kBitmapSubSize = 1024 * 1024;  // 1 MB
    void SmoothInitBitmap(std::string bitmap) {
     // Split bitmap
     std::vector<std::string> sub_bitmaps;
     std::size_t pos = 0;
     while (pos < bitmap.size()) {
       sub_bitmaps.emplace_back(bitmap.substr(pos, kBitmapSubSize));
       pos += kBitmapSubSize;
    }
     std::string key = "BITMAP_KEY";
     // Connect to redis
     redisContext* redis = redisConnect("127.0.0.1", 6666);
     redisReply* reply = nullptr;
     // First part use 'SET' command
     reply = (redisReply*)redisCommand(redis, "SET %b %b", key.data(), key.size(), sub_bitmaps[0].data(), sub_bitmaps[0].size());
     freeReplyObject(reply);
     // Use 'GETBIT' to transform to bitmap format
     reply = (redisReply*)redisCommand(redis, "GETBIT %b 0", key.data(), key.size());
     freeReplyObject(reply);
     // Use 'APPEND' for remaining bitmap data
     for (auto i = 1u; i < sub_bitmaps.size(); ++i) {
       reply = (redisReply*)redisCommand(redis, "APPEND %b %b", key.data(), key.size(), sub_bitmaps[i].data(), sub_bitmaps[i].size());
       freeReplyObject(reply);
    }
    }
    int main() {
     std::string bitmap ="123457890abcdef123457890abcdef123457890abcdef123457890abcdef123457890abcdef123456";
     SmoothInitBitmap(bitmap);
    }
  • JAVA(Jedis)
    package nosql.cloud.huawei.jedis;
    
    import redis.clients.jedis.Jedis;
    
    import java.nio.ByteBuffer;
    import java.util.BitSet;
    
    public class BitMapOperation {
       private Jedis jedis;
    
       public BitMapOperation(Jedis jedis) {
           this.jedis = jedis;
      }
    
       /**
        * SetBit operation especially for big bitmap
        *
        * @param key         key
        * @param value       value
        * @param groupLength groupLength (Unit: byte)
        */
       public void setBitGrouped(byte[] key, BitSet value, int groupLength) {
           if (value.isEmpty()) {
               jedis.set(key, new byte[0]);
               return;
          }
    
           byte[] byteArray = disposeBitMap(value);
    
           // round count
           int round = byteArray.length % groupLength == 0 ? byteArray.length / groupLength : byteArray.length / groupLength + 1;
           // last round length
           int lastPacketLength = byteArray.length % groupLength == 0 ? groupLength : byteArray.length % groupLength;
    
           if (round == 1) {
               // if only one round
               byte[] lastPacketByte = new byte[lastPacketLength];
               System.arraycopy(byteArray, 0, lastPacketByte, 0, lastPacketLength);
               // set and getBit
               setAndGetBit(key, lastPacketByte);
               return;
          }
    
           byte[] packetByte = new byte[groupLength];
           byte[] lastPacketByte = new byte[lastPacketLength];
           for (int i = 0; i < round; i++) {
               if (i == 0) {
                   // first set
                   System.arraycopy(byteArray, i * groupLength, packetByte, 0, groupLength);
                   // set and getBit
                   setAndGetBit(key, packetByte);
              } else if (i != round - 1) {
                   // regular append
                   System.arraycopy(byteArray, i * groupLength, packetByte, 0, groupLength);
                   jedis.append(key, packetByte);
              } else {
                   // last append
                   System.arraycopy(byteArray, i * groupLength, lastPacketByte, 0, lastPacketLength);
                   jedis.append(key, lastPacketByte);
              }
          }
      }
    
       private byte[] disposeBitMap(BitSet bitSet) {
           // get words and count the number of word(Long)
           long[] words = bitSet.toLongArray();
           int n = words.length;
           if (n == 0)
               return new byte[0];
           for (int i = 0; i < n; i++) {
               // reverse
               words[i] = reverseLong(words[i]);
          }
           return longToBytes(words);
      }
    
       public static byte[] longToBytes(long[] longArray) {
           ByteBuffer buffer = ByteBuffer.allocate(longArray.length * 8);
           for (long value : longArray) {
               buffer.putLong(value);
          }
           return buffer.array();
      }
    
       public void setAndGetBit(byte[] key, byte[] value) {
           jedis.set(key, value);
           jedis.getbit(key, 0);
      }
    
       public static long reverseLong(long n) {
           n = (n >>> 32) | (n << 32);
           n = ((n & 0xFFFF0000FFFF0000L) >>> 16) | ((n & 0x0000FFFF0000FFFFL) << 16);
           n = ((n & 0xFF00FF00FF00FF00L) >>> 8) | ((n & 0x00FF00FF00FF00FFL) << 8);
           n = ((n & 0xF0F0F0F0F0F0F0F0L) >>> 4) | ((n & 0x0F0F0F0F0F0F0F0FL) << 4);
           n = ((n & 0xCCCCCCCCCCCCCCCCL) >>> 2) | ((n & 0x3333333333333333L) << 2);
           n = ((n & 0xAAAAAAAAAAAAAAAAL) >>> 1) | ((n & 0x5555555555555555L) << 1);
           return n;
      }
    }
  • Python
    import redis
    import random
    import string
    from bitmap import BitMap  # pip install bitmap
    # Parameters
    max_bytes = 1024 * 1024 * 64 # Construct a 64 MB bitmap.
    max_bits = max_bytes * 8 # A byte consists of eight bits (over 500 million characters).
    # Python built-in bitmaps are not required.
    # index_list All subscripts that are to be set to 1 are stored.
    index_list = []
    for i in range(1000000):
        index_list.append(random.randint(0, max_bits - 1))
    # Create a bitmap in a byte array.
    byte_array = bytearray(max_bytes)
    for i in index_list:
        index = i // 8
        offset = i % 8
        byte_array[index] |= (1 << (7 - offset))
    # Convert the bitmap to bytes for subsequent operations.
    bitmap_str = bytes(byte_array)
    # Connect to Redis.
    r = redis.Redis(host='127.0.0.1', port=6379)
    r.execute_command("auth a")
    key = "BITMAP_KEY"
    #Separate parameters.
    bitmap_pos = 0
    bitmap_sub_size = 256 * 1024 # Adjust the splitting granularity.
    step = bitmap_sub_size - 1
    # Process the first part.
    first_part = bitmap_str[bitmap_pos : bitmap_pos + step]
    r.execute_command("SET", key, first_part)
    r.execute_command("GETBIT", key, 0) # Run GETBIT to optimize bitmap code.
    # Process the remaining part.
    bitmap_pos += step
    while bitmap_pos < len(bitmap_str) :
        rest_part = bitmap_str[bitmap_pos : bitmap_pos + step]
        r.execute_command("APPEND", key, rest_part)
        bitmap_pos += step
    # The following is the test and verification code. Executing the code takes a long time as the GETBIT command will be executed for 1 million times.
    # The BITCOUNT command with time complexity of O(N) generates 100-millisecond glitches. Do not use this command in the production environment.
    # (Optional) Construct a Python built-in bitmap data verification.
    bm = BitMap(max_bits)
    for i in index_list:
        bm.set(i)
    print('BitMap.count(): ' + str(bm.count()))
    # Call the Redis command to check whether the settings are correct.
    success = True
    for i in index_list:
        if r.execute_command("GETBIT", key, i) != 1:
            print('GETBIT check error, pos is' + str(i))
            success = False
    
    if success:
        print('GETBIT check success')
    
    print("Bitcount: " + str(r.execute_command("BITCOUNT", key)))