| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | use crate::codepointtrie::CodePointMapRange; |
| 6 | |
| 7 | /// This is an iterator that coalesces adjacent ranges in an iterator over code |
| 8 | /// point ranges |
| 9 | pub(crate) struct RangeListIteratorCoalescer<I, T> { |
| 10 | iter: I, |
| 11 | peek: Option<CodePointMapRange<T>>, |
| 12 | } |
| 13 | |
| 14 | impl<I, T: Eq> RangeListIteratorCoalescer<I, T> |
| 15 | where |
| 16 | I: Iterator<Item = CodePointMapRange<T>>, |
| 17 | { |
| 18 | pub fn new(iter: I) -> Self { |
| 19 | Self { iter, peek: None } |
| 20 | } |
| 21 | } |
| 22 | |
| 23 | impl<I, T: Eq> Iterator for RangeListIteratorCoalescer<I, T> |
| 24 | where |
| 25 | I: Iterator<Item = CodePointMapRange<T>>, |
| 26 | { |
| 27 | type Item = CodePointMapRange<T>; |
| 28 | |
| 29 | fn next(&mut self) -> Option<Self::Item> { |
| 30 | // Get the initial range we're working with: either a leftover |
| 31 | // range from last time, or the next range |
| 32 | let mut ret = if let Some(peek) = self.peek.take() { |
| 33 | peek |
| 34 | } else if let Some(next) = self.iter.next() { |
| 35 | next |
| 36 | } else { |
| 37 | // No ranges, exit early |
| 38 | return None; |
| 39 | }; |
| 40 | |
| 41 | // Keep pulling ranges |
| 42 | #[allow (clippy::while_let_on_iterator)] |
| 43 | // can't move the iterator, also we want it to be explicit that we're not draining the iterator |
| 44 | while let Some(next) = self.iter.next() { |
| 45 | if *next.range.start() == ret.range.end() + 1 && next.value == ret.value { |
| 46 | // Range has no gap, coalesce |
| 47 | ret.range = *ret.range.start()..=*next.range.end(); |
| 48 | } else { |
| 49 | // Range has a gap, return what we have so far, update |
| 50 | // peek |
| 51 | self.peek = Some(next); |
| 52 | return Some(ret); |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | // Ran out of elements, exit |
| 57 | Some(ret) |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | #[cfg (test)] |
| 62 | mod tests { |
| 63 | use core::fmt::Debug; |
| 64 | use icu::collections::codepointinvlist::CodePointInversionListBuilder; |
| 65 | use icu::collections::codepointtrie::TrieValue; |
| 66 | use icu::properties::maps::{self, CodePointMapDataBorrowed}; |
| 67 | use icu::properties::sets::{self, CodePointSetDataBorrowed}; |
| 68 | use icu::properties::{GeneralCategory, Script}; |
| 69 | |
| 70 | fn test_set(data: CodePointSetDataBorrowed<'static>, name: &str) { |
| 71 | let mut builder = CodePointInversionListBuilder::new(); |
| 72 | let mut builder_complement = CodePointInversionListBuilder::new(); |
| 73 | |
| 74 | for range in data.iter_ranges() { |
| 75 | builder.add_range32(&range) |
| 76 | } |
| 77 | |
| 78 | for range in data.iter_ranges_complemented() { |
| 79 | builder_complement.add_range32(&range) |
| 80 | } |
| 81 | |
| 82 | builder.complement(); |
| 83 | let set1 = builder.build(); |
| 84 | let set2 = builder_complement.build(); |
| 85 | assert_eq!(set1, set2, "Set {name} failed to complement correctly" ); |
| 86 | } |
| 87 | |
| 88 | fn test_map<T: TrieValue + Debug>( |
| 89 | data: &CodePointMapDataBorrowed<'static, T>, |
| 90 | value: T, |
| 91 | name: &str, |
| 92 | ) { |
| 93 | let mut builder = CodePointInversionListBuilder::new(); |
| 94 | let mut builder_complement = CodePointInversionListBuilder::new(); |
| 95 | |
| 96 | for range in data.iter_ranges_for_value(value) { |
| 97 | builder.add_range32(&range) |
| 98 | } |
| 99 | |
| 100 | for range in data.iter_ranges_for_value_complemented(value) { |
| 101 | builder_complement.add_range32(&range) |
| 102 | } |
| 103 | |
| 104 | builder.complement(); |
| 105 | let set1 = builder.build(); |
| 106 | let set2 = builder_complement.build(); |
| 107 | assert_eq!( |
| 108 | set1, set2, |
| 109 | "Map {name} failed to complement correctly with value {value:?}" |
| 110 | ); |
| 111 | } |
| 112 | |
| 113 | #[test ] |
| 114 | fn test_complement_sets() { |
| 115 | // Stress test the RangeListIteratorComplementer logic by ensuring it works for |
| 116 | // a whole bunch of binary properties |
| 117 | test_set(sets::ascii_hex_digit(), "ASCII_Hex_Digit" ); |
| 118 | test_set(sets::alnum(), "Alnum" ); |
| 119 | test_set(sets::alphabetic(), "Alphabetic" ); |
| 120 | test_set(sets::bidi_control(), "Bidi_Control" ); |
| 121 | test_set(sets::bidi_mirrored(), "Bidi_Mirrored" ); |
| 122 | test_set(sets::blank(), "Blank" ); |
| 123 | test_set(sets::cased(), "Cased" ); |
| 124 | test_set(sets::case_ignorable(), "Case_Ignorable" ); |
| 125 | test_set( |
| 126 | sets::full_composition_exclusion(), |
| 127 | "Full_Composition_Exclusion" , |
| 128 | ); |
| 129 | test_set(sets::changes_when_casefolded(), "Changes_When_Casefolded" ); |
| 130 | test_set(sets::changes_when_casemapped(), "Changes_When_Casemapped" ); |
| 131 | test_set( |
| 132 | sets::changes_when_nfkc_casefolded(), |
| 133 | "Changes_When_NFKC_Casefolded" , |
| 134 | ); |
| 135 | test_set(sets::changes_when_lowercased(), "Changes_When_Lowercased" ); |
| 136 | test_set(sets::changes_when_titlecased(), "Changes_When_Titlecased" ); |
| 137 | test_set(sets::changes_when_uppercased(), "Changes_When_Uppercased" ); |
| 138 | test_set(sets::dash(), "Dash" ); |
| 139 | test_set(sets::deprecated(), "Deprecated" ); |
| 140 | test_set( |
| 141 | sets::default_ignorable_code_point(), |
| 142 | "Default_Ignorable_Code_Point" , |
| 143 | ); |
| 144 | test_set(sets::diacritic(), "Diacritic" ); |
| 145 | test_set(sets::emoji_modifier_base(), "Emoji_Modifier_Base" ); |
| 146 | test_set(sets::emoji_component(), "Emoji_Component" ); |
| 147 | test_set(sets::emoji_modifier(), "Emoji_Modifier" ); |
| 148 | test_set(sets::emoji(), "Emoji" ); |
| 149 | test_set(sets::emoji_presentation(), "Emoji_Presentation" ); |
| 150 | test_set(sets::extender(), "Extender" ); |
| 151 | test_set(sets::extended_pictographic(), "Extended_Pictographic" ); |
| 152 | test_set(sets::graph(), "Graph" ); |
| 153 | test_set(sets::grapheme_base(), "Grapheme_Base" ); |
| 154 | test_set(sets::grapheme_extend(), "Grapheme_Extend" ); |
| 155 | test_set(sets::grapheme_link(), "Grapheme_Link" ); |
| 156 | test_set(sets::hex_digit(), "Hex_Digit" ); |
| 157 | test_set(sets::hyphen(), "Hyphen" ); |
| 158 | test_set(sets::id_continue(), "Id_Continue" ); |
| 159 | test_set(sets::ideographic(), "Ideographic" ); |
| 160 | test_set(sets::id_start(), "Id_Start" ); |
| 161 | test_set(sets::ids_binary_operator(), "Ids_Binary_Operator" ); |
| 162 | test_set(sets::ids_trinary_operator(), "Ids_Trinary_Operator" ); |
| 163 | test_set(sets::join_control(), "Join_Control" ); |
| 164 | test_set(sets::logical_order_exception(), "Logical_Order_Exception" ); |
| 165 | test_set(sets::lowercase(), "Lowercase" ); |
| 166 | test_set(sets::math(), "Math" ); |
| 167 | test_set(sets::noncharacter_code_point(), "Noncharacter_Code_Point" ); |
| 168 | test_set(sets::nfc_inert(), "NFC_Inert" ); |
| 169 | test_set(sets::nfd_inert(), "NFD_Inert" ); |
| 170 | test_set(sets::nfkc_inert(), "NFKC_Inert" ); |
| 171 | test_set(sets::nfkd_inert(), "NFKD_Inert" ); |
| 172 | test_set(sets::pattern_syntax(), "Pattern_Syntax" ); |
| 173 | test_set(sets::pattern_white_space(), "Pattern_White_Space" ); |
| 174 | test_set( |
| 175 | sets::prepended_concatenation_mark(), |
| 176 | "Prepended_Concatenation_Mark" , |
| 177 | ); |
| 178 | test_set(sets::print(), "Print" ); |
| 179 | test_set(sets::quotation_mark(), "Quotation_Mark" ); |
| 180 | test_set(sets::radical(), "Radical" ); |
| 181 | test_set(sets::regional_indicator(), "Regional_Indicator" ); |
| 182 | test_set(sets::soft_dotted(), "Soft_Dotted" ); |
| 183 | test_set(sets::segment_starter(), "Segment_Starter" ); |
| 184 | test_set(sets::case_sensitive(), "Case_Sensitive" ); |
| 185 | test_set(sets::sentence_terminal(), "Sentence_Terminal" ); |
| 186 | test_set(sets::terminal_punctuation(), "Terminal_Punctuation" ); |
| 187 | test_set(sets::unified_ideograph(), "Unified_Ideograph" ); |
| 188 | test_set(sets::uppercase(), "Uppercase" ); |
| 189 | test_set(sets::variation_selector(), "Variation_Selector" ); |
| 190 | test_set(sets::white_space(), "White_Space" ); |
| 191 | test_set(sets::xdigit(), "Xdigit" ); |
| 192 | test_set(sets::xid_continue(), "XID_Continue" ); |
| 193 | test_set(sets::xid_start(), "XID_Start" ); |
| 194 | } |
| 195 | |
| 196 | #[test ] |
| 197 | fn test_complement_maps() { |
| 198 | let gc = maps::general_category(); |
| 199 | let script = maps::script(); |
| 200 | test_map(&gc, GeneralCategory::UppercaseLetter, "gc" ); |
| 201 | test_map(&gc, GeneralCategory::OtherPunctuation, "gc" ); |
| 202 | test_map(&script, Script::Devanagari, "script" ); |
| 203 | test_map(&script, Script::Latin, "script" ); |
| 204 | test_map(&script, Script::Common, "script" ); |
| 205 | } |
| 206 | } |
| 207 | |