1 | //! Collection types. |
2 | //! |
3 | //! Rust's standard collection library provides efficient implementations of the |
4 | //! most common general purpose programming data structures. By using the |
5 | //! standard implementations, it should be possible for two libraries to |
6 | //! communicate without significant data conversion. |
7 | //! |
8 | //! To get this out of the way: you should probably just use [`Vec`] or [`HashMap`]. |
9 | //! These two collections cover most use cases for generic data storage and |
10 | //! processing. They are exceptionally good at doing what they do. All the other |
11 | //! collections in the standard library have specific use cases where they are |
12 | //! the optimal choice, but these cases are borderline *niche* in comparison. |
13 | //! Even when `Vec` and `HashMap` are technically suboptimal, they're probably a |
14 | //! good enough choice to get started. |
15 | //! |
16 | //! Rust's collections can be grouped into four major categories: |
17 | //! |
18 | //! * Sequences: [`Vec`], [`VecDeque`], [`LinkedList`] |
19 | //! * Maps: [`HashMap`], [`BTreeMap`] |
20 | //! * Sets: [`HashSet`], [`BTreeSet`] |
21 | //! * Misc: [`BinaryHeap`] |
22 | //! |
23 | //! # When Should You Use Which Collection? |
24 | //! |
25 | //! These are fairly high-level and quick break-downs of when each collection |
26 | //! should be considered. Detailed discussions of strengths and weaknesses of |
27 | //! individual collections can be found on their own documentation pages. |
28 | //! |
29 | //! ### Use a `Vec` when: |
30 | //! * You want to collect items up to be processed or sent elsewhere later, and |
31 | //! don't care about any properties of the actual values being stored. |
32 | //! * You want a sequence of elements in a particular order, and will only be |
33 | //! appending to (or near) the end. |
34 | //! * You want a stack. |
35 | //! * You want a resizable array. |
36 | //! * You want a heap-allocated array. |
37 | //! |
38 | //! ### Use a `VecDeque` when: |
39 | //! * You want a [`Vec`] that supports efficient insertion at both ends of the |
40 | //! sequence. |
41 | //! * You want a queue. |
42 | //! * You want a double-ended queue (deque). |
43 | //! |
44 | //! ### Use a `LinkedList` when: |
45 | //! * You want a [`Vec`] or [`VecDeque`] of unknown size, and can't tolerate |
46 | //! amortization. |
47 | //! * You want to efficiently split and append lists. |
48 | //! * You are *absolutely* certain you *really*, *truly*, want a doubly linked |
49 | //! list. |
50 | //! |
51 | //! ### Use a `HashMap` when: |
52 | //! * You want to associate arbitrary keys with an arbitrary value. |
53 | //! * You want a cache. |
54 | //! * You want a map, with no extra functionality. |
55 | //! |
56 | //! ### Use a `BTreeMap` when: |
57 | //! * You want a map sorted by its keys. |
58 | //! * You want to be able to get a range of entries on-demand. |
59 | //! * You're interested in what the smallest or largest key-value pair is. |
60 | //! * You want to find the largest or smallest key that is smaller or larger |
61 | //! than something. |
62 | //! |
63 | //! ### Use the `Set` variant of any of these `Map`s when: |
64 | //! * You just want to remember which keys you've seen. |
65 | //! * There is no meaningful value to associate with your keys. |
66 | //! * You just want a set. |
67 | //! |
68 | //! ### Use a `BinaryHeap` when: |
69 | //! |
70 | //! * You want to store a bunch of elements, but only ever want to process the |
71 | //! "biggest" or "most important" one at any given time. |
72 | //! * You want a priority queue. |
73 | //! |
74 | //! # Performance |
75 | //! |
76 | //! Choosing the right collection for the job requires an understanding of what |
77 | //! each collection is good at. Here we briefly summarize the performance of |
78 | //! different collections for certain important operations. For further details, |
79 | //! see each type's documentation, and note that the names of actual methods may |
80 | //! differ from the tables below on certain collections. |
81 | //! |
82 | //! Throughout the documentation, we will follow a few conventions. For all |
83 | //! operations, the collection's size is denoted by n. If another collection is |
84 | //! involved in the operation, it contains m elements. Operations which have an |
85 | //! *amortized* cost are suffixed with a `*`. Operations with an *expected* |
86 | //! cost are suffixed with a `~`. |
87 | //! |
88 | //! All amortized costs are for the potential need to resize when capacity is |
89 | //! exhausted. If a resize occurs it will take *O*(*n*) time. Our collections never |
90 | //! automatically shrink, so removal operations aren't amortized. Over a |
91 | //! sufficiently large series of operations, the average cost per operation will |
92 | //! deterministically equal the given cost. |
93 | //! |
94 | //! Only [`HashMap`] has expected costs, due to the probabilistic nature of hashing. |
95 | //! It is theoretically possible, though very unlikely, for [`HashMap`] to |
96 | //! experience worse performance. |
97 | //! |
98 | //! ## Sequences |
99 | //! |
100 | //! | | get(i) | insert(i) | remove(i) | append | split_off(i) | |
101 | //! |----------------|------------------------|-------------------------|------------------------|-----------|------------------------| |
102 | //! | [`Vec`] | *O*(1) | *O*(*n*-*i*)* | *O*(*n*-*i*) | *O*(*m*)* | *O*(*n*-*i*) | |
103 | //! | [`VecDeque`] | *O*(1) | *O*(min(*i*, *n*-*i*))* | *O*(min(*i*, *n*-*i*)) | *O*(*m*)* | *O*(min(*i*, *n*-*i*)) | |
104 | //! | [`LinkedList`] | *O*(min(*i*, *n*-*i*)) | *O*(min(*i*, *n*-*i*)) | *O*(min(*i*, *n*-*i*)) | *O*(1) | *O*(min(*i*, *n*-*i*)) | |
105 | //! |
106 | //! Note that where ties occur, [`Vec`] is generally going to be faster than [`VecDeque`], and |
107 | //! [`VecDeque`] is generally going to be faster than [`LinkedList`]. |
108 | //! |
109 | //! ## Maps |
110 | //! |
111 | //! For Sets, all operations have the cost of the equivalent Map operation. |
112 | //! |
113 | //! | | get | insert | remove | range | append | |
114 | //! |--------------|---------------|---------------|---------------|---------------|--------------| |
115 | //! | [`HashMap`] | *O*(1)~ | *O*(1)~* | *O*(1)~ | N/A | N/A | |
116 | //! | [`BTreeMap`] | *O*(log(*n*)) | *O*(log(*n*)) | *O*(log(*n*)) | *O*(log(*n*)) | *O*(*n*+*m*) | |
117 | //! |
118 | //! # Correct and Efficient Usage of Collections |
119 | //! |
120 | //! Of course, knowing which collection is the right one for the job doesn't |
121 | //! instantly permit you to use it correctly. Here are some quick tips for |
122 | //! efficient and correct usage of the standard collections in general. If |
123 | //! you're interested in how to use a specific collection in particular, consult |
124 | //! its documentation for detailed discussion and code examples. |
125 | //! |
126 | //! ## Capacity Management |
127 | //! |
128 | //! Many collections provide several constructors and methods that refer to |
129 | //! "capacity". These collections are generally built on top of an array. |
130 | //! Optimally, this array would be exactly the right size to fit only the |
131 | //! elements stored in the collection, but for the collection to do this would |
132 | //! be very inefficient. If the backing array was exactly the right size at all |
133 | //! times, then every time an element is inserted, the collection would have to |
134 | //! grow the array to fit it. Due to the way memory is allocated and managed on |
135 | //! most computers, this would almost surely require allocating an entirely new |
136 | //! array and copying every single element from the old one into the new one. |
137 | //! Hopefully you can see that this wouldn't be very efficient to do on every |
138 | //! operation. |
139 | //! |
140 | //! Most collections therefore use an *amortized* allocation strategy. They |
141 | //! generally let themselves have a fair amount of unoccupied space so that they |
142 | //! only have to grow on occasion. When they do grow, they allocate a |
143 | //! substantially larger array to move the elements into so that it will take a |
144 | //! while for another grow to be required. While this strategy is great in |
145 | //! general, it would be even better if the collection *never* had to resize its |
146 | //! backing array. Unfortunately, the collection itself doesn't have enough |
147 | //! information to do this itself. Therefore, it is up to us programmers to give |
148 | //! it hints. |
149 | //! |
150 | //! Any `with_capacity` constructor will instruct the collection to allocate |
151 | //! enough space for the specified number of elements. Ideally this will be for |
152 | //! exactly that many elements, but some implementation details may prevent |
153 | //! this. See collection-specific documentation for details. In general, use |
154 | //! `with_capacity` when you know exactly how many elements will be inserted, or |
155 | //! at least have a reasonable upper-bound on that number. |
156 | //! |
157 | //! When anticipating a large influx of elements, the `reserve` family of |
158 | //! methods can be used to hint to the collection how much room it should make |
159 | //! for the coming items. As with `with_capacity`, the precise behavior of |
160 | //! these methods will be specific to the collection of interest. |
161 | //! |
162 | //! For optimal performance, collections will generally avoid shrinking |
163 | //! themselves. If you believe that a collection will not soon contain any more |
164 | //! elements, or just really need the memory, the `shrink_to_fit` method prompts |
165 | //! the collection to shrink the backing array to the minimum size capable of |
166 | //! holding its elements. |
167 | //! |
168 | //! Finally, if ever you're interested in what the actual capacity of the |
169 | //! collection is, most collections provide a `capacity` method to query this |
170 | //! information on demand. This can be useful for debugging purposes, or for |
171 | //! use with the `reserve` methods. |
172 | //! |
173 | //! ## Iterators |
174 | //! |
175 | //! [Iterators][crate::iter] |
176 | //! are a powerful and robust mechanism used throughout Rust's |
177 | //! standard libraries. Iterators provide a sequence of values in a generic, |
178 | //! safe, efficient and convenient way. The contents of an iterator are usually |
179 | //! *lazily* evaluated, so that only the values that are actually needed are |
180 | //! ever actually produced, and no allocation need be done to temporarily store |
181 | //! them. Iterators are primarily consumed using a `for` loop, although many |
182 | //! functions also take iterators where a collection or sequence of values is |
183 | //! desired. |
184 | //! |
185 | //! All of the standard collections provide several iterators for performing |
186 | //! bulk manipulation of their contents. The three primary iterators almost |
187 | //! every collection should provide are `iter`, `iter_mut`, and `into_iter`. |
188 | //! Some of these are not provided on collections where it would be unsound or |
189 | //! unreasonable to provide them. |
190 | //! |
191 | //! `iter` provides an iterator of immutable references to all the contents of a |
192 | //! collection in the most "natural" order. For sequence collections like [`Vec`], |
193 | //! this means the items will be yielded in increasing order of index starting |
194 | //! at 0. For ordered collections like [`BTreeMap`], this means that the items |
195 | //! will be yielded in sorted order. For unordered collections like [`HashMap`], |
196 | //! the items will be yielded in whatever order the internal representation made |
197 | //! most convenient. This is great for reading through all the contents of the |
198 | //! collection. |
199 | //! |
200 | //! ``` |
201 | //! let vec = vec![1, 2, 3, 4]; |
202 | //! for x in vec.iter() { |
203 | //! println!("vec contained {x:?}" ); |
204 | //! } |
205 | //! ``` |
206 | //! |
207 | //! `iter_mut` provides an iterator of *mutable* references in the same order as |
208 | //! `iter`. This is great for mutating all the contents of the collection. |
209 | //! |
210 | //! ``` |
211 | //! let mut vec = vec![1, 2, 3, 4]; |
212 | //! for x in vec.iter_mut() { |
213 | //! *x += 1; |
214 | //! } |
215 | //! ``` |
216 | //! |
217 | //! `into_iter` transforms the actual collection into an iterator over its |
218 | //! contents by-value. This is great when the collection itself is no longer |
219 | //! needed, and the values are needed elsewhere. Using `extend` with `into_iter` |
220 | //! is the main way that contents of one collection are moved into another. |
221 | //! `extend` automatically calls `into_iter`, and takes any <code>T: [IntoIterator]</code>. |
222 | //! Calling `collect` on an iterator itself is also a great way to convert one |
223 | //! collection into another. Both of these methods should internally use the |
224 | //! capacity management tools discussed in the previous section to do this as |
225 | //! efficiently as possible. |
226 | //! |
227 | //! ``` |
228 | //! let mut vec1 = vec![1, 2, 3, 4]; |
229 | //! let vec2 = vec![10, 20, 30, 40]; |
230 | //! vec1.extend(vec2); |
231 | //! ``` |
232 | //! |
233 | //! ``` |
234 | //! use std::collections::VecDeque; |
235 | //! |
236 | //! let vec = [1, 2, 3, 4]; |
237 | //! let buf: VecDeque<_> = vec.into_iter().collect(); |
238 | //! ``` |
239 | //! |
240 | //! Iterators also provide a series of *adapter* methods for performing common |
241 | //! threads to sequences. Among the adapters are functional favorites like `map`, |
242 | //! `fold`, `skip` and `take`. Of particular interest to collections is the |
243 | //! `rev` adapter, which reverses any iterator that supports this operation. Most |
244 | //! collections provide reversible iterators as the way to iterate over them in |
245 | //! reverse order. |
246 | //! |
247 | //! ``` |
248 | //! let vec = vec![1, 2, 3, 4]; |
249 | //! for x in vec.iter().rev() { |
250 | //! println!("vec contained {x:?}" ); |
251 | //! } |
252 | //! ``` |
253 | //! |
254 | //! Several other collection methods also return iterators to yield a sequence |
255 | //! of results but avoid allocating an entire collection to store the result in. |
256 | //! This provides maximum flexibility as |
257 | //! [`collect`][crate::iter::Iterator::collect] or |
258 | //! [`extend`][crate::iter::Extend::extend] can be called to |
259 | //! "pipe" the sequence into any collection if desired. Otherwise, the sequence |
260 | //! can be looped over with a `for` loop. The iterator can also be discarded |
261 | //! after partial use, preventing the computation of the unused items. |
262 | //! |
263 | //! ## Entries |
264 | //! |
265 | //! The `entry` API is intended to provide an efficient mechanism for |
266 | //! manipulating the contents of a map conditionally on the presence of a key or |
267 | //! not. The primary motivating use case for this is to provide efficient |
268 | //! accumulator maps. For instance, if one wishes to maintain a count of the |
269 | //! number of times each key has been seen, they will have to perform some |
270 | //! conditional logic on whether this is the first time the key has been seen or |
271 | //! not. Normally, this would require a `find` followed by an `insert`, |
272 | //! effectively duplicating the search effort on each insertion. |
273 | //! |
274 | //! When a user calls `map.entry(key)`, the map will search for the key and |
275 | //! then yield a variant of the `Entry` enum. |
276 | //! |
277 | //! If a `Vacant(entry)` is yielded, then the key *was not* found. In this case |
278 | //! the only valid operation is to `insert` a value into the entry. When this is |
279 | //! done, the vacant entry is consumed and converted into a mutable reference to |
280 | //! the value that was inserted. This allows for further manipulation of the |
281 | //! value beyond the lifetime of the search itself. This is useful if complex |
282 | //! logic needs to be performed on the value regardless of whether the value was |
283 | //! just inserted. |
284 | //! |
285 | //! If an `Occupied(entry)` is yielded, then the key *was* found. In this case, |
286 | //! the user has several options: they can `get`, `insert` or `remove` the |
287 | //! value of the occupied entry. Additionally, they can convert the occupied |
288 | //! entry into a mutable reference to its value, providing symmetry to the |
289 | //! vacant `insert` case. |
290 | //! |
291 | //! ### Examples |
292 | //! |
293 | //! Here are the two primary ways in which `entry` is used. First, a simple |
294 | //! example where the logic performed on the values is trivial. |
295 | //! |
296 | //! #### Counting the number of times each character in a string occurs |
297 | //! |
298 | //! ``` |
299 | //! use std::collections::btree_map::BTreeMap; |
300 | //! |
301 | //! let mut count = BTreeMap::new(); |
302 | //! let message = "she sells sea shells by the sea shore" ; |
303 | //! |
304 | //! for c in message.chars() { |
305 | //! *count.entry(c).or_insert(0) += 1; |
306 | //! } |
307 | //! |
308 | //! assert_eq!(count.get(&'s' ), Some(&8)); |
309 | //! |
310 | //! println!("Number of occurrences of each character" ); |
311 | //! for (char, count) in &count { |
312 | //! println!("{char}: {count}" ); |
313 | //! } |
314 | //! ``` |
315 | //! |
316 | //! When the logic to be performed on the value is more complex, we may simply |
317 | //! use the `entry` API to ensure that the value is initialized and perform the |
318 | //! logic afterwards. |
319 | //! |
320 | //! #### Tracking the inebriation of customers at a bar |
321 | //! |
322 | //! ``` |
323 | //! use std::collections::btree_map::BTreeMap; |
324 | //! |
325 | //! // A client of the bar. They have a blood alcohol level. |
326 | //! struct Person { blood_alcohol: f32 } |
327 | //! |
328 | //! // All the orders made to the bar, by client ID. |
329 | //! let orders = vec![1, 2, 1, 2, 3, 4, 1, 2, 2, 3, 4, 1, 1, 1]; |
330 | //! |
331 | //! // Our clients. |
332 | //! let mut blood_alcohol = BTreeMap::new(); |
333 | //! |
334 | //! for id in orders { |
335 | //! // If this is the first time we've seen this customer, initialize them |
336 | //! // with no blood alcohol. Otherwise, just retrieve them. |
337 | //! let person = blood_alcohol.entry(id).or_insert(Person { blood_alcohol: 0.0 }); |
338 | //! |
339 | //! // Reduce their blood alcohol level. It takes time to order and drink a beer! |
340 | //! person.blood_alcohol *= 0.9; |
341 | //! |
342 | //! // Check if they're sober enough to have another beer. |
343 | //! if person.blood_alcohol > 0.3 { |
344 | //! // Too drunk... for now. |
345 | //! println!("Sorry {id}, I have to cut you off" ); |
346 | //! } else { |
347 | //! // Have another! |
348 | //! person.blood_alcohol += 0.1; |
349 | //! } |
350 | //! } |
351 | //! ``` |
352 | //! |
353 | //! # Insert and complex keys |
354 | //! |
355 | //! If we have a more complex key, calls to `insert` will |
356 | //! not update the value of the key. For example: |
357 | //! |
358 | //! ``` |
359 | //! use std::cmp::Ordering; |
360 | //! use std::collections::BTreeMap; |
361 | //! use std::hash::{Hash, Hasher}; |
362 | //! |
363 | //! #[derive(Debug)] |
364 | //! struct Foo { |
365 | //! a: u32, |
366 | //! b: &'static str, |
367 | //! } |
368 | //! |
369 | //! // we will compare `Foo`s by their `a` value only. |
370 | //! impl PartialEq for Foo { |
371 | //! fn eq(&self, other: &Self) -> bool { self.a == other.a } |
372 | //! } |
373 | //! |
374 | //! impl Eq for Foo {} |
375 | //! |
376 | //! // we will hash `Foo`s by their `a` value only. |
377 | //! impl Hash for Foo { |
378 | //! fn hash<H: Hasher>(&self, h: &mut H) { self.a.hash(h); } |
379 | //! } |
380 | //! |
381 | //! impl PartialOrd for Foo { |
382 | //! fn partial_cmp(&self, other: &Self) -> Option<Ordering> { self.a.partial_cmp(&other.a) } |
383 | //! } |
384 | //! |
385 | //! impl Ord for Foo { |
386 | //! fn cmp(&self, other: &Self) -> Ordering { self.a.cmp(&other.a) } |
387 | //! } |
388 | //! |
389 | //! let mut map = BTreeMap::new(); |
390 | //! map.insert(Foo { a: 1, b: "baz" }, 99); |
391 | //! |
392 | //! // We already have a Foo with an a of 1, so this will be updating the value. |
393 | //! map.insert(Foo { a: 1, b: "xyz" }, 100); |
394 | //! |
395 | //! // The value has been updated... |
396 | //! assert_eq!(map.values().next().unwrap(), &100); |
397 | //! |
398 | //! // ...but the key hasn't changed. b is still "baz", not "xyz". |
399 | //! assert_eq!(map.keys().next().unwrap().b, "baz" ); |
400 | //! ``` |
401 | |
402 | #![stable (feature = "rust1" , since = "1.0.0" )] |
403 | |
404 | #[stable (feature = "rust1" , since = "1.0.0" )] |
405 | // FIXME(#82080) The deprecation here is only theoretical, and does not actually produce a warning. |
406 | #[deprecated (note = "moved to `std::ops::Bound`" , since = "1.26.0" )] |
407 | #[doc (hidden)] |
408 | pub use crate::ops::Bound; |
409 | |
410 | #[stable (feature = "rust1" , since = "1.0.0" )] |
411 | pub use alloc_crate::collections::{binary_heap, btree_map, btree_set}; |
412 | #[stable (feature = "rust1" , since = "1.0.0" )] |
413 | pub use alloc_crate::collections::{linked_list, vec_deque}; |
414 | #[stable (feature = "rust1" , since = "1.0.0" )] |
415 | pub use alloc_crate::collections::{BTreeMap, BTreeSet, BinaryHeap}; |
416 | #[stable (feature = "rust1" , since = "1.0.0" )] |
417 | pub use alloc_crate::collections::{LinkedList, VecDeque}; |
418 | |
419 | #[stable (feature = "rust1" , since = "1.0.0" )] |
420 | #[doc (inline)] |
421 | pub use self::hash_map::HashMap; |
422 | #[stable (feature = "rust1" , since = "1.0.0" )] |
423 | #[doc (inline)] |
424 | pub use self::hash_set::HashSet; |
425 | |
426 | #[stable (feature = "try_reserve" , since = "1.57.0" )] |
427 | pub use alloc_crate::collections::TryReserveError; |
428 | #[unstable ( |
429 | feature = "try_reserve_kind" , |
430 | reason = "Uncertain how much info should be exposed" , |
431 | issue = "48043" |
432 | )] |
433 | pub use alloc_crate::collections::TryReserveErrorKind; |
434 | |
435 | mod hash; |
436 | |
437 | #[stable (feature = "rust1" , since = "1.0.0" )] |
438 | pub mod hash_map { |
439 | //! A hash map implemented with quadratic probing and SIMD lookup. |
440 | #[stable (feature = "rust1" , since = "1.0.0" )] |
441 | pub use super::hash::map::*; |
442 | |
443 | #[stable (feature = "hashmap_build_hasher" , since = "1.7.0" )] |
444 | pub use crate::hash::random::DefaultHasher; |
445 | #[stable (feature = "hashmap_build_hasher" , since = "1.7.0" )] |
446 | pub use crate::hash::random::RandomState; |
447 | } |
448 | |
449 | #[stable (feature = "rust1" , since = "1.0.0" )] |
450 | pub mod hash_set { |
451 | //! A hash set implemented as a `HashMap` where the value is `()`. |
452 | #[stable (feature = "rust1" , since = "1.0.0" )] |
453 | pub use super::hash::set::*; |
454 | } |
455 | |