您现在的位置是：网站首页 > 程序设计 > redis

redis源码分析——5、dict实现

2021年9月10日 00:25 共3437人围观

redis redis源码

简介dics是一种常用的数据结构，而C语言没有提供内置类型，本文一起看看redis中dict的实现

1. 怎么样自己实现一个dict
2. redis中dict的实现
3. 基本操作

1. 怎么样自己实现一个dict

不像C++、java等高级语言内置了map，C语言并没有提供dict库，所以如果想使用dict就需要自己实现。那么实现一个dict有方法呢？

数组法
这也是最容易实现的一种方法，简单说就是开辟一个长度为N的大数组（通常N是一个质数），然后通过一个哈希函数计算key的哈希值d，然后用d%N得到key对于的数组下边。设想一下，如果数组开的足够大，而且哈希函数足够散列，我们可以保证不同的key的对应的下标绝不会冲突，那么这样就可以实现一个dict了。
上述介绍实现dict的方法在实际中有很大的限制，比如我们不知道数组开大多大合适，即使开了一百万，那么随着业务的增长，这一百万也不够。其次没有一个哈希函数能够保证不同key的哈希值不重复。
鉴于此，我们在用数组实现dict的时候数组长度一般是动态的，根据元素个数/数组长度这一指标来判断扩缩容，其次我们找一个尽量能打散的哈希函数。最后，对于冲突的key，我们可以简单的使用拉链法解决冲突。
红黑树法
C++中的map底层实现就是红黑树。红黑树的核心就是尽量保证二叉树是左右平衡的，所以红黑树的实现较为复杂，需要区分各种情况旋转。不需要哈希函数，只需要实现key的比较函数就可以了，所以也就不存在key冲突。
跳表法
如果想实现红黑树的效果，但是又不想实现红黑树，那么跳表是个不错的选择，它的效率几乎和红黑树等价。redis中zset底层就使用的了调表，具体实现后续再说。

2. redis中dict的实现

redis中dict的实现用了上述第一种方法也就是数组拉链法，对于哈希函数，redis选用了Murmurhash2。下面看看结构定义。

  typedef struct dictEntry { 
      void *key; 
      union { 
          void *val; 
          uint64_t u64; 
          int64_t s64; 
          double d; 
      } v; 
      struct dictEntry *next; 
  } dictEntry; /* entry可以认为是一个节点，除了存放value以外还存放了key和next，这是因为如果发生了冲突，我们需要进一步比较key */ 

  typedef struct dictType { 
      uint64_t (*hashFunction)(const void *key); 
      void *(*keyDup)(void *privdata, const void *key); 
      void *(*valDup)(void *privdata, const void *obj); 
      int (*keyCompare)(void *privdata, const void *key1, const void *key2); 
      void (*keyDestructor)(void *privdata, void *key); 
      void (*valDestructor)(void *privdata, void *obj); 
  } dictType; /* 这里定义一些接口，可以实现不同的处理函数达，这样就达到了接口的效果 */ 

  /* This is our hash table structure. Every dictionary has two of this as we 
   * implement incremental rehashing, for the old to the new table. */ 
  typedef struct dictht { 
      dictEntry **table; /* 这儿为什么是指针的指针？ 其实是指针数组，每个元素存放的entry链表的头指针 */ 
      unsigned long size; 
      unsigned long sizemask; 
      unsigned long used; 
  } dictht; /* 定义了数组，table其实是个指针数组，根据下标可以访问到key*/ 

  typedef struct dict { 
      dictType *type;   /* 自定义各种字典的实现接口 */ 
      void *privdata; /* 自定义数据字段 */ 
      dictht ht[2]; /* 2个table，主要是在rehash时候用*/ 
      long rehashidx; /* rehashing not in progress if rehashidx == -1 */ 
      unsigned long iterators; /* number of iterators currently running */ 
  } dict;

3. 基本操作

公共操作
根据key查找数组中的index

    /* Returns the index of a free slot that can be populated with 
     * a hash entry for the given 'key'. 
     * If the key already exists, -1 is returned 
     * and the optional output parameter may be filled. 
     * 
     * Note that if we are in the process of rehashing the hash table, the 
     * index is always returned in the context of the second (new) hash table. */ 
    /* 根据key计算其所应该在的槽位，如果key已经存在，则返回-1  
     * 函数同时传入key和哈希值两个参数，这是因为可能出现冲突，仅仅靠哈希值还不能判断是否存在，必须还要对比key是否相同 
     * 如果传入的existing不为空，则existing返回已经存在的结点地址 
     */ 
    static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing) 
    { 
        unsigned long idx, table; 
        dictEntry *he; 
        if (existing) *existing = NULL; 

        /* Expand the hash table if needed */ 
        if (_dictExpandIfNeeded(d) == DICT_ERR) 
            return -1; 
        for (table = 0; table <= 1; table++) { 
            idx = hash & d->ht[table].sizemask; 
            /* Search if this slot does not already contain the given key */ 
            he = d->ht[table].table[idx]; 
            while(he) { 
                if (key==he->key || dictCompareKeys(d, key, he->key)) { 
                    if (existing) *existing = he; 
                    return -1; 
                } 
                he = he->next; 
            } 
            if (!dictIsRehashing(d)) break; 
        } 
        return idx; 
    }

扩容

    /* Expand the hash table if needed */ 
    static int _dictExpandIfNeeded(dict *d) 
    { 
        /* Incremental rehashing already in progress. Return. */ 
        if (dictIsRehashing(d)) return DICT_OK; 

        /* If the hash table is empty expand it to the initial size. */ 
        /* 如果数组还是空，则直接扩容到DICT_HT_INITIAL_SIZE，默认是4 */ 
        if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE); 

        /* If we reached the 1:1 ratio, and we are allowed to resize the hash 
         * table (global setting) or we should avoid it but the ratio between 
         * elements/buckets is over the "safe" threshold, we resize doubling 
         * the number of buckets. */ 
        /* 触发扩容有几个条件 
         * 如果“填充率”>=1，也就是used>=size时应该扩容，当然也要看当前容不容许扩容 
         * 如果“填充率”>dict_force_resize_ratio（默认是5），也就是说平均冲突率大于5的时候就强制扩容 
         */ 
        if (d->ht[0].used >= d->ht[0].size && 
            (dict_can_resize || 
             d->ht[0].used/d->ht[0].size > dict_force_resize_ratio)) 
        { 
            return dictExpand(d, d->ht[0].used*2); 
        } 
        return DICT_OK; 
    }

rehash

    /* 几乎所有的dict操作函数中都有_dictRehashStep调用，这其实是将扩容过程分摊了，否组扩容可能会消耗很长时间 */ 
    static void _dictRehashStep(dict *d) { 
        if (d->iterators == 0) dictRehash(d,1); 
    } 

    /* Performs N steps of incremental rehashing. Returns 1 if there are still 
     * keys to move from the old to the new hash table, otherwise 0 is returned. 
     * 
     * Note that a rehashing step consists in moving a bucket (that may have more 
     * than one key as we use chaining) from the old to the new hash table, however 
     * since part of the hash table may be composed of empty spaces, it is not 
     * guaranteed that this function will rehash even a single bucket, since it 
     * will visit at max N*10 empty buckets in total, otherwise the amount of 
     * work it does would be unbound and the function may block for a long time. */ 
    int dictRehash(dict *d, int n) { 
        int empty_visits = n*10; /* Max number of empty buckets to visit. */ 
        if (!dictIsRehashing(d)) return 0; 

        while(n-- && d->ht[0].used != 0) { 
            dictEntry *de, *nextde; 

            /* Note that rehashidx can't overflow as we are sure there are more 
             * elements because ht[0].used != 0 */ 
            assert(d->ht[0].size > (unsigned long)d->rehashidx); 
            while(d->ht[0].table[d->rehashidx] == NULL) { /* 找到非空结点 */ 
                d->rehashidx++; 
                if (--empty_visits == 0) return 1; /* 这一行在3.0是没有的，这么写是防止while卡在空结点上 */ 
            } 
            de = d->ht[0].table[d->rehashidx]; 
            /* Move all the keys in this bucket from the old to the new hash HT */ 
            while(de) { 
                uint64_t h; 

                nextde = de->next; 
                /* Get the index in the new hash table */ 
                h = dictHashKey(d, de->key) & d->ht[1].sizemask; /* 将该槽下的所有结点插入到新table的对应槽位 */ 
                de->next = d->ht[1].table[h]; 
                d->ht[1].table[h] = de; 
                d->ht[0].used--; 
                d->ht[1].used++; 
                de = nextde; 
            } 
            d->ht[0].table[d->rehashidx] = NULL; 
            d->rehashidx++; 
        } 

        /* Check if we already rehashed the whole table... */ 
        /* 如果h[0]的元素为空，这说明rehash完成，则将h[0]释放，且h[0]接管h[1]*/ 
        if (d->ht[0].used == 0) { 
            zfree(d->ht[0].table); 
            d->ht[0] = d->ht[1]; 
            _dictReset(&d->ht[1]); 
            d->rehashidx = -1; 
            return 0; 
        } 

        /* More to rehash... */ 
        return 1; 
    }

创建

  /* Create a new hash table */ 
  dict *dictCreate(dictType *type, 
          void *privDataPtr) 
  { 
      dict *d = zmalloc(sizeof(*d)); 

      _dictInit(d,type,privDataPtr); 
      return d; 
  } 

  /* Initialize the hash table */ 
  int _dictInit(dict *d, dictType *type, 
          void *privDataPtr) 
  { 
      _dictReset(&d->ht[0]); /* 所有成员置为0或者null*/ 
      _dictReset(&d->ht[1]); 
      d->type = type; 
      d->privdata = privDataPtr; 
      d->rehashidx = -1; 
      d->iterators = 0; 
      return DICT_OK; 
  }

插入

  /* 根据key新建一个entry，如果key已经存在，则返回null */ 
  dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) 
  { 
      long index; 
      dictEntry *entry; 
      dictht *ht; 

      if (dictIsRehashing(d)) _dictRehashStep(d); 

      /* Get the index of the new element, or -1 if 
       * the element already exists. */ 
      /* 对于新加结点，如果key已经存在，则直接返回NULL*/ 
      if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1) 
          return NULL; 

      /* Allocate the memory and store the new entry. 
       * Insert the element in top, with the assumption that in a database 
       * system it is more likely that recently added entries are accessed 
       * more frequently. */ 
      /* 根据就近访问原则，新节点插入到头 */ 
      ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0]; 
      entry = zmalloc(sizeof(*entry)); 
      entry->next = ht->table[index]; 
      ht->table[index] = entry; 
      ht->used++; 

      /* Set the hash entry fields. */ 
      /* 设置结点的key */ 
      dictSetKey(d, entry, key); 
      return entry; 
  } 

  /* Add an element to the target hash table */ 
  int dictAdd(dict *d, void *key, void *val) 
  { 
      /* 首先根据key新建了一个entry */ 
      dictEntry *entry = dictAddRaw(d,key,NULL); 

      if (!entry) return DICT_ERR; 
      /* 给该entry设置value */ 
      dictSetVal(d, entry, val); 
      return DICT_OK; 
  }

删除

  /* Remove an element, returning DICT_OK on success or DICT_ERR if the 
   * element was not found. */ 
  int dictDelete(dict *ht, const void *key) { 
      return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR; 
  } 

  /* Search and remove an element. This is an helper function for 
   * dictDelete() and dictUnlink(), please check the top comment 
   * of those functions. */ 
  /* 根据key删除元素 */ 
  static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) { 
      uint64_t h, idx; 
      dictEntry *he, *prevHe; 
      int table; 

      if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL; /* dict为空，短路返回*/ 

      if (dictIsRehashing(d)) _dictRehashStep(d); 
      h = dictHashKey(d, key); /* 计算hash值 */ 

      for (table = 0; table <= 1; table++) { /* 分别变量h[0]和h[1] */ 
          idx = h & d->ht[table].sizemask; /* 根据key的hash值计算出key在hash tale中的槽位 */ 
          he = d->ht[table].table[idx]; /* 得到同hash 值的entry列表 */ 
          prevHe = NULL; 
          while(he) { 
              if (key==he->key || dictCompareKeys(d, key, he->key)) { /* 找到了key */ 
                  /* Unlink the element from the list */ 
                  if (prevHe) 
                      prevHe->next = he->next; /* 链表的删除逻辑 */ 
                  else 
                      d->ht[table].table[idx] = he->next; 
                  if (!nofree) { 
                      dictFreeKey(d, he); 
                      dictFreeVal(d, he); 
                      zfree(he); 
                  } 
                  d->ht[table].used--; 
                  return he; 
              } 
              prevHe = he; 
              he = he->next; 
          } 
          if (!dictIsRehashing(d)) break; 
      } 
      return NULL; /* not found */ 
  }

上一篇：用C++写了个协程库，欢迎star

下一篇： redis源码分析——9、AOF持久化

redis源码分析——7、对象object

redis源码分析——4、压缩列表ziplist实现

redis源码分析——3、简单动态字符串

redis源码分析——2、读写应答

redis源码分析——1、网络框架

Redis的RESP协议

redis源码分析——8、RDB持久化

redis源码分析——9、AOF持久化

redis源码分析——10、主从同步

Redis4.0新特性之-大KEY删除

redis源码分析——6、跳表skiplist的实现