Java源码阅读--HashMap

今天简单的看了一下JDK1.8中HashMap的源代码。简单的记录一下。JDK1.7没看,这里就不展开了,后文贴的链接写的比较好。


HashMap的几个字段和节点定义

  • loadFactor:装载因子,默认值为0.75,它决定了bucket填充程度;
  • threshold:总是2^k幂,决定了HashMap能够放进去的数据量。一旦 t = (待放入的元素数目 / 0.75 + 1.0f) > threshold 或者总的元素 > threshold的时候,那么就需要调整HashMap中table的大小。
  • transient Node<K,V>[] table;: 节点数组,不同的hash的key可能映射到table中的同一个index处。其内部组织形式可以根据节点的个数组织为链表或者红黑树。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
// The default initial capacity - MUST be a power of two.
static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16

// The maximum capacity, used if a higher value is implicitly specified
static final int MAXIMUM_CAPACITY = 1 << 30;

// The load factor used when none specified in constructor.
static final float DEFAULT_LOAD_FACTOR = 0.75f;

// The bin count threshold for using a tree rather than list for a bin.
static final int TREEIFY_THRESHOLD = 8;

//The bin count threshold for untreeifying a (split) bin during a resize operation.
static final int UNTREEIFY_THRESHOLD = 6;

// The smallest table capacity for which bins may be treeified.
static final int MIN_TREEIFY_CAPACITY = 64;

/**
* The table, initialized on first use, and resized as
* necessary. When allocated, length is always a power of two.
* (We also tolerate length zero in some operations to allow
* bootstrapping mechanics that are currently not needed.)
*/
// 节点数组, 大小总是2^k幂。
transient Node<K,V>[] table;

/**
* Holds cached entrySet(). Note that AbstractMap fields are used
* for keySet() and values().
*/
transient Set<Map.Entry<K,V>> entrySet;

/**
* The number of key-value mappings contained in this map.
*/
transient int size;

/**
* The number of times this HashMap has been structurally modified
* Structural modifications are those that change the number of mappings in
* the HashMap or otherwise modify its internal structure (e.g.,
* rehash). This field is used to make iterators on Collection-views of
* the HashMap fail-fast. (See ConcurrentModificationException).
*/
// 迭代中fail-fast中用到.
transient int modCount;

/**
* this field holds the initial array capacity, or zero signifying DEFAULT_INITIAL_CAPACITY
*/
int threshold;

// 负载因子.
final float loadFactor;

// 数组链表节点.
// HashMap.java中的定义.
static class Node<K,V> implements Map.Entry<K,V> {
final int hash;
final K key;
V value;
Node<K,V> next;
// ...
}

// 红黑树节点.
// HashMap中的定义.
static final class TreeNode<K,V> extends LinkedHashMap.Entry<K,V> {
TreeNode<K,V> parent; // red-black tree links
TreeNode<K,V> left;
TreeNode<K,V> right;
TreeNode<K,V> prev; // needed to unlink next upon deletion
boolean red;
//...
}

/**
* HashMap.Node subclass for normal LinkedHashMap entries.
*/
// LinkedHashMap中的定义.
static class Entry<K,V> extends HashMap.Node<K,V> {
Entry<K,V> before, after;
Entry(int hash, K key, V value, Node<K,V> next) {
super(hash, key, value, next);
}
}

HashMap构造函数

  • JDK1.8.121
  • 重载了4个构造函数.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
/**
* 指定容量和指定负载因子.
*/
public HashMap(int initialCapacity, float loadFactor) {
if (initialCapacity < 0)
throw new IllegalArgumentException("Illegal initial capacity: " +
initialCapacity);
if (initialCapacity > MAXIMUM_CAPACITY)
initialCapacity = MAXIMUM_CAPACITY;
if (loadFactor <= 0 || Float.isNaN(loadFactor))
throw new IllegalArgumentException("Illegal load factor: " +
loadFactor);

// 注意这里并没有初始table数组.
this.loadFactor = loadFactor;
// tableForSize静态函数总是使得threshold为 >=initialCapacity的最小2^n
// 4740->8192
// 9971->16384
this.threshold = tableSizeFor(initialCapacity);
}

/* 指定容量,默认负载因子.*/
public HashMap(int initialCapacity) {
this(initialCapacity, DEFAULT_LOAD_FACTOR);
}

/* 默认初始大小和默认的负载因子. */
public HashMap() {
this.loadFactor = DEFAULT_LOAD_FACTOR; // all other fields defaulted
}

// 默认的负载因子,初始大小初始化为足够容纳m中的所有pair.
public HashMap(Map<? extends K, ? extends V> m) {
this.loadFactor = DEFAULT_LOAD_FACTOR;
putMapEntries(m, false);
}


// 调整table的初始大小
/**
* Returns a power of two size for the given target capacity.
*/
static final int tableSizeFor(int cap) {
int n = cap - 1;
n |= n >>> 1;
n |= n >>> 2;
n |= n >>> 4;
n |= n >>> 8;
n |= n >>> 16;
return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
}

table的初始化

  • 上面的构造函数中并没有看到table字段的初始化。那么table的初始化在哪里?
  • 简而言之, 使用Map来初始化HashMap的时候,会调用resize()和putVal()方法。resize()方法会进行真正的Node[]的创建。putVal()初次调用的时候也会调用resize()。所以Node[] table真正的初始化是在resize()中。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
// 使用其他的map来初始化当前HashMap
public HashMap(Map<? extends K, ? extends V> m) {
this.loadFactor = DEFAULT_LOAD_FACTOR;
putMapEntries(m, false);
}

// 放入多个pair.
final void putMapEntries(Map<? extends K, ? extends V> m, boolean evict) {
int s = m.size();
if (s > 0) {
// 根据loadFactor进行tableSizeFor调整.
}
else if (s > threshold)
resize(); // 更新capacity和threshold,重新进行内存分配.

// 放入map中的元素.
for (Map.Entry<? extends K, ? extends V> e : m.entrySet()) {
K key = e.getKey();
V value = e.getValue();
// put元素的时候会先计算key对应的hash.
putVal(hash(key), key, value, false, evict);
}
}
}

// 所有的key-value pair的放入其实都是调用本方法。
/**
* 1. 如果当前bucket为空时,调用resize方法进行初始化;
* 2. 根据key的hash值计算出所在bucket节点位置;
* 3. 如果没有发生冲突,调用newNode方法封装key-value键值对,并将其放置到
* bucket对应位置下,否则,跳转到步骤4;
* 4. 如果发生冲突:
* 如果该key已存在,更新原有oldValue为新的value,并返回oldValue;
* 如果key所在的节点为treeNode,调用rbtree的putTreeVal方法将改节点挂到rbtree上;
* 如果插入节点后,当前bucket节点下链表长度超过 TREEIFY_THRESHOLD- 1,需要将原有
* 的数据结构链表变为rbtree;
* 5. 数据put完成之后,如果当前长度 > threshold,调用resize方法扩容。
*/

final V putVal(int hash, K key, V value, boolean onlyIfAbsent, boolean evict) {
Node<K,V>[] tab; Node<K,V> p; int n, i;
// table为null的时候会调用resize,返回新创建的table,是Node[]类型的。
if ((tab = table) == null || (n = tab.length) == 0)
n = (tab = resize()).length;
// 因为n是HashMap的长度值,永远都是2^k幂。(n-1)通过与一个较大的hash码进行与计算.
// 计算后的结果完全限定在了[0, n-1]之间。而与计算又是非常快的。
if ((p = tab[i = (n - 1) & hash]) == null)
tab[i] = newNode(hash, key, value, null);
else {
Node<K,V> e; K k;
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
e = p;
else if (p instanceof TreeNode)
e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
else {
for (int binCount = 0; ; ++binCount) {
if ((e = p.next) == null) {
p.next = newNode(hash, key, value, null);
if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
treeifyBin(tab, hash);
break;
}
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
break;
p = e;
}
}
if (e != null) { // existing mapping for key
V oldValue = e.value;
if (!onlyIfAbsent || oldValue == null)
e.value = value;
afterNodeAccess(e);
return oldValue;
}
}
++modCount;
if (++size > threshold)
resize();
afterNodeInsertion(evict);
return null;
}

// 重新调整HashMap的大小.
/**
* 1. 在调用构造函数能HashMap(Map)的时候会首先调用resize();
* 2. 在往构造好的HashMap中放入该元素的时候也会根据capacity和threshold的值的情况调用该方法。所以第一次调用putVal()的时候会调用resize()方法.
*/

/**
* 每一次扩容,newCapacity和newThreshold均是扩容前值的两倍
* index = hash & (n -1) // n为2^k幂。
* 在进行&运算的时候,扩容后只是多了一个最高位1,那么新位置要么保持原位置不变
*,要么在原位置 + oldCapacity(oldCapacity就是n的一半),这个设计的巧妙就在
* 节省了一部分重新计算hash的时间,而且hash值高位出现0和1的概率均等,在resize
* 的过程又将节点平均分配到两个bucket节点。
*/
final Node<K,V>[] resize() {
Node<K,V>[] oldTab = table;
int oldCap = (oldTab == null) ? 0 : oldTab.length;
int oldThr = threshold;
int newCap, newThr = 0;
if (oldCap > 0) {
if (oldCap >= MAXIMUM_CAPACITY) {
threshold = Integer.MAX_VALUE;
return oldTab;
}
else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
oldCap >= DEFAULT_INITIAL_CAPACITY)
newThr = oldThr << 1; // double threshold
}
else if (oldThr > 0) // initial capacity was placed in threshold
newCap = oldThr;
// 初始化HashMap的时候并没有初始化table元素.此时oldTab == null.
else { // zero initial threshold signifies using defaults
newCap = DEFAULT_INITIAL_CAPACITY;
newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
}
if (newThr == 0) {
float ft = (float)newCap * loadFactor;
newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
(int)ft : Integer.MAX_VALUE);
}
threshold = newThr;
@SuppressWarnings({"rawtypes","unchecked"})
// 这里创建Node[]
Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
// 初始化了table元素.
table = newTab;
if (oldTab != null) {
//原数组放入到新newTab中。
//...
}
return newTab;
}

Key的hasCode计算

  • key的hashCode的计算如下:key.hashCode() ^ (h >>> 16)。将高位bit散布到低位。这是JDK8采用了红黑树平衡后的新的计算方式。保持简单的同时尽可能的用到了高位bit。
  • hash的作用就是高16位不变,低16位和高16位做异或运算,来达到减少碰撞的目的。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/**
* Computes key.hashCode() and spreads (XORs) higher bits of hash
* to lower. Because the table uses power-of-two masking, sets of
* hashes that vary only in bits above the current mask will
* always collide. (Among known examples are sets of Float keys
* holding consecutive whole numbers in small tables.) So we
* apply a transform that spreads the impact of higher bits
* downward. There is a tradeoff between speed, utility, and
* quality of bit-spreading. Because many common sets of hashes
* are already reasonably distributed (so don't benefit from
* spreading), and because we use trees to handle large sets of
* collisions in bins, we just XOR some shifted bits in the
* cheapest possible way to reduce systematic lossage, as well as
* to incorporate impact of the highest bits that would otherwise
* never be used in index calculations because of table bounds.
*/
static final int hash(Object key) {
int h;
return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}

获取节点的值

  • static final class TreeNode<K,V> extends LinkedHashMap.Entry<K,V>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
public V get(Object key) {
Node<K,V> e;
return (e = getNode(hash(key), key)) == null ? null : e.value;
}

/**
* Implements Map.get and related methods
*/
final Node<K,V> getNode(int hash, Object key) {
Node<K,V>[] tab; Node<K,V> first, e; int n; K k;
// 获取key的hashCode映射到的Node[]中的第一个节点。
if ((tab = table) != null && (n = tab.length) > 0 &&
(first = tab[(n - 1) & hash]) != null) {
if (first.hash == hash && // always check first node
((k = first.key) == key || (key != null && key.equals(k))))
return first;
if ((e = first.next) != null) {
// 是TreeNode,那么在红黑树中查找哦.
if (first instanceof TreeNode)
return ((TreeNode<K,V>)first).getTreeNode(hash, key);
// 不是TreeNode,则链式查找.
do {
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
return e;
} while ((e = e.next) != null);
}
}
return null;
}

参考