Large Bitmap Initialization
The open-source Redis uses string bitmaps, which may create super large strings and affect the performance of big keys in some scenarios. GeminiDB Redis API uses bitmaps in a special encoding format. The internal sharding algorithm prevents super large strings from being created and allows you to insert and delete a random number of bits efficiently.
However, in practice, a super large bitmap of the string type may be obtained from other sources. For example, it takes a long time to use the SET command to insert a super large bitmap (64 MB) into GeminiDB Redis API. Other normal accesses may be interfered with and a jitter may be created because of latency. To address these issues, we provide a smooth insertion solution. A super large bitmap is split into smaller strings (for example, 1 MB). The SET command is used for the first insertion, and then a GETBIT read-only command is used to convert the strings to bitmaps. The subsequent character strings are inserted by running the APPEND command.
Precautions
- This function is available to a specific version. You can submit a service ticket to contact customer service to check whether your instance version supports this function.
- The APPEND command has requirements on the sequence. Therefore, APPEND disorder must be avoided in the entire process (in concurrent APPEND scenarios).
- PIPELINE acceleration and PIPELINE can ensure the execution sequence.
- The smaller (recommended: 256 KB to 1 MB) the substrings are, the less the latency will vary, but initialization will take longer.
Code Reference
- C++
#include <string> #include <vector> #include "hiredis/hiredis.h" constexpr std::size_t kBitmapSubSize = 1024 * 1024; // 1 MB void SmoothInitBitmap(std::string bitmap) { // Split bitmap std::vector<std::string> sub_bitmaps; std::size_t pos = 0; while (pos < bitmap.size()) { sub_bitmaps.emplace_back(bitmap.substr(pos, kBitmapSubSize)); pos += kBitmapSubSize; } std::string key = "BITMAP_KEY"; // Connect to redis redisContext* redis = redisConnect("127.0.0.1", 6666); redisReply* reply = nullptr; // First part use 'SET' command reply = (redisReply*)redisCommand(redis, "SET %b %b", key.data(), key.size(), sub_bitmaps[0].data(), sub_bitmaps[0].size()); freeReplyObject(reply); // Use 'GETBIT' to transform to bitmap format reply = (redisReply*)redisCommand(redis, "GETBIT %b 0", key.data(), key.size()); freeReplyObject(reply); // Use 'APPEND' for remaining bitmap data for (auto i = 1u; i < sub_bitmaps.size(); ++i) { reply = (redisReply*)redisCommand(redis, "APPEND %b %b", key.data(), key.size(), sub_bitmaps[i].data(), sub_bitmaps[i].size()); freeReplyObject(reply); } } int main() { std::string bitmap ="123457890abcdef123457890abcdef123457890abcdef123457890abcdef123457890abcdef123456"; SmoothInitBitmap(bitmap); }
- JAVA(Jedis)
package nosql.cloud.huawei.jedis; import redis.clients.jedis.Jedis; import java.nio.ByteBuffer; import java.util.BitSet; public class BitMapOperation { private Jedis jedis; public BitMapOperation(Jedis jedis) { this.jedis = jedis; } /** * SetBit operation especially for big bitmap * * @param key key * @param value value * @param groupLength groupLength (Unit: byte) */ public void setBitGrouped(byte[] key, BitSet value, int groupLength) { if (value.isEmpty()) { jedis.set(key, new byte[0]); return; } byte[] byteArray = disposeBitMap(value); // round count int round = byteArray.length % groupLength == 0 ? byteArray.length / groupLength : byteArray.length / groupLength + 1; // last round length int lastPacketLength = byteArray.length % groupLength == 0 ? groupLength : byteArray.length % groupLength; if (round == 1) { // if only one round byte[] lastPacketByte = new byte[lastPacketLength]; System.arraycopy(byteArray, 0, lastPacketByte, 0, lastPacketLength); // set and getBit setAndGetBit(key, lastPacketByte); return; } byte[] packetByte = new byte[groupLength]; byte[] lastPacketByte = new byte[lastPacketLength]; for (int i = 0; i < round; i++) { if (i == 0) { // first set System.arraycopy(byteArray, i * groupLength, packetByte, 0, groupLength); // set and getBit setAndGetBit(key, packetByte); } else if (i != round - 1) { // regular append System.arraycopy(byteArray, i * groupLength, packetByte, 0, groupLength); jedis.append(key, packetByte); } else { // last append System.arraycopy(byteArray, i * groupLength, lastPacketByte, 0, lastPacketLength); jedis.append(key, lastPacketByte); } } } private byte[] disposeBitMap(BitSet bitSet) { // get words and count the number of word(Long) long[] words = bitSet.toLongArray(); int n = words.length; if (n == 0) return new byte[0]; for (int i = 0; i < n; i++) { // reverse words[i] = reverseLong(words[i]); } return longToBytes(words); } public static byte[] longToBytes(long[] longArray) { ByteBuffer buffer = ByteBuffer.allocate(longArray.length * 8); for (long value : longArray) { buffer.putLong(value); } return buffer.array(); } public void setAndGetBit(byte[] key, byte[] value) { jedis.set(key, value); jedis.getbit(key, 0); } public static long reverseLong(long n) { n = (n >>> 32) | (n << 32); n = ((n & 0xFFFF0000FFFF0000L) >>> 16) | ((n & 0x0000FFFF0000FFFFL) << 16); n = ((n & 0xFF00FF00FF00FF00L) >>> 8) | ((n & 0x00FF00FF00FF00FFL) << 8); n = ((n & 0xF0F0F0F0F0F0F0F0L) >>> 4) | ((n & 0x0F0F0F0F0F0F0F0FL) << 4); n = ((n & 0xCCCCCCCCCCCCCCCCL) >>> 2) | ((n & 0x3333333333333333L) << 2); n = ((n & 0xAAAAAAAAAAAAAAAAL) >>> 1) | ((n & 0x5555555555555555L) << 1); return n; } }
- Python
import redis import random import string from bitmap import BitMap # pip install bitmap # Parameters max_bytes = 1024 * 1024 * 64 # Construct a 64 MB bitmap. max_bits = max_bytes * 8 # A byte consists of eight bits (over 500 million characters). # Python built-in bitmaps are not required. # index_list All subscripts that are to be set to 1 are stored. index_list = [] for i in range(1000000): index_list.append(random.randint(0, max_bits - 1)) # Create a bitmap in a byte array. byte_array = bytearray(max_bytes) for i in index_list: index = i // 8 offset = i % 8 byte_array[index] |= (1 << (7 - offset)) # Convert the bitmap to bytes for subsequent operations. bitmap_str = bytes(byte_array) # Connect to Redis. r = redis.Redis(host='127.0.0.1', port=6379) r.execute_command("auth a") key = "BITMAP_KEY" #Separate parameters. bitmap_pos = 0 bitmap_sub_size = 256 * 1024 # Adjust the splitting granularity. step = bitmap_sub_size - 1 # Process the first part. first_part = bitmap_str[bitmap_pos : bitmap_pos + step] r.execute_command("SET", key, first_part) r.execute_command("GETBIT", key, 0) # Run GETBIT to optimize bitmap code. # Process the remaining part. bitmap_pos += step while bitmap_pos < len(bitmap_str) : rest_part = bitmap_str[bitmap_pos : bitmap_pos + step] r.execute_command("APPEND", key, rest_part) bitmap_pos += step # The following is the test and verification code. Executing the code takes a long time as the GETBIT command will be executed for 1 million times. # The BITCOUNT command with time complexity of O(N) generates 100-millisecond glitches. Do not use this command in the production environment. # (Optional) Construct a Python built-in bitmap data verification. bm = BitMap(max_bits) for i in index_list: bm.set(i) print('BitMap.count(): ' + str(bm.count())) # Call the Redis command to check whether the settings are correct. success = True for i in index_list: if r.execute_command("GETBIT", key, i) != 1: print('GETBIT check error, pos is' + str(i)) success = False if success: print('GETBIT check success') print("Bitcount: " + str(r.execute_command("BITCOUNT", key)))
Feedback
Was this page helpful?
Provide feedbackThank you very much for your feedback. We will continue working to improve the documentation.See the reply and handling status in My Cloud VOC.
For any further questions, feel free to contact us through the chatbot.
Chatbot