| 1 | // Take a look at the license at the top of the repository in the LICENSE file. |
| 2 | |
| 3 | use crate::js::token::{Keyword, Operation, ReservedChar, Token, Tokens}; |
| 4 | use std::vec::IntoIter; |
| 5 | |
| 6 | pub(crate) struct VariableNameGenerator<'a> { |
| 7 | letter: char, |
| 8 | lower: Option<Box<VariableNameGenerator<'a>>>, |
| 9 | prepend: Option<&'a str>, |
| 10 | } |
| 11 | |
| 12 | impl<'a> VariableNameGenerator<'a> { |
| 13 | pub(crate) fn new(prepend: Option<&'a str>, nb_letter: usize) -> VariableNameGenerator<'a> { |
| 14 | if nb_letter > 1 { |
| 15 | VariableNameGenerator { |
| 16 | letter: 'a' , |
| 17 | lower: Some(Box::new(VariableNameGenerator::new(None, nb_letter - 1))), |
| 18 | prepend, |
| 19 | } |
| 20 | } else { |
| 21 | VariableNameGenerator { |
| 22 | letter: 'a' , |
| 23 | lower: None, |
| 24 | prepend, |
| 25 | } |
| 26 | } |
| 27 | } |
| 28 | |
| 29 | pub(crate) fn next(&mut self) { |
| 30 | self.incr_letters(); |
| 31 | } |
| 32 | |
| 33 | #[allow (clippy::inherent_to_string)] |
| 34 | pub(crate) fn to_string(&self) -> String { |
| 35 | if let Some(ref lower) = self.lower { |
| 36 | format!( |
| 37 | " {}{}{}" , |
| 38 | self.prepend.unwrap_or("" ), |
| 39 | self.letter, |
| 40 | lower.to_string() |
| 41 | ) |
| 42 | } else { |
| 43 | format!(" {}{}" , self.prepend.unwrap_or("" ), self.letter) |
| 44 | } |
| 45 | } |
| 46 | |
| 47 | #[allow (dead_code)] |
| 48 | pub(crate) fn len(&self) -> usize { |
| 49 | let first = match self.prepend { |
| 50 | Some(s) => s.len(), |
| 51 | None => 0, |
| 52 | } + 1; |
| 53 | first |
| 54 | + match self.lower { |
| 55 | Some(ref s) => s.len(), |
| 56 | None => 0, |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | pub(crate) fn incr_letters(&mut self) { |
| 61 | let max = [('z' , 'A' ), ('Z' , '0' ), ('9' , 'a' )]; |
| 62 | |
| 63 | for (m, next) in &max { |
| 64 | if self.letter == *m { |
| 65 | self.letter = *next; |
| 66 | if self.letter == 'a' { |
| 67 | if let Some(ref mut lower) = self.lower { |
| 68 | lower.incr_letters(); |
| 69 | } else { |
| 70 | self.lower = Some(Box::new(VariableNameGenerator::new(None, 1))); |
| 71 | } |
| 72 | } |
| 73 | return; |
| 74 | } |
| 75 | } |
| 76 | self.letter = ((self.letter as u8) + 1) as char; |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | /// Replace given tokens with others. |
| 81 | /// |
| 82 | /// # Example |
| 83 | /// |
| 84 | /// ```rust |
| 85 | /// extern crate minifier; |
| 86 | /// use minifier::js::{Keyword, Token, replace_tokens_with, simple_minify}; |
| 87 | /// |
| 88 | /// fn main() { |
| 89 | /// let js = r#" |
| 90 | /// function replaceByNull(data, func) { |
| 91 | /// for (var i = 0; i < data.length; ++i) { |
| 92 | /// if func(data[i]) { |
| 93 | /// data[i] = null; |
| 94 | /// } |
| 95 | /// } |
| 96 | /// } |
| 97 | /// }"# .into(); |
| 98 | /// let js_minified = simple_minify(js) |
| 99 | /// .apply(|f| { |
| 100 | /// replace_tokens_with(f, |t| { |
| 101 | /// if *t == Token::Keyword(Keyword::Null) { |
| 102 | /// Some(Token::Other("N" )) |
| 103 | /// } else { |
| 104 | /// None |
| 105 | /// } |
| 106 | /// }) |
| 107 | /// }); |
| 108 | /// println!("{}" , js_minified.to_string()); |
| 109 | /// } |
| 110 | /// ``` |
| 111 | /// |
| 112 | /// The previous code will have all its `null` keywords replaced with `N`. In such cases, |
| 113 | /// don't forget to include the definition of `N` in the returned minified javascript: |
| 114 | /// |
| 115 | /// ```js |
| 116 | /// var N = null; |
| 117 | /// ``` |
| 118 | #[inline ] |
| 119 | pub fn replace_tokens_with<'a, 'b: 'a, F: Fn(&Token<'a>) -> Option<Token<'b>>>( |
| 120 | mut tokens: Tokens<'a>, |
| 121 | callback: F, |
| 122 | ) -> Tokens<'a> { |
| 123 | for token: &mut Token<'a> in tokens.0.iter_mut() { |
| 124 | if let Some(t: Token<'b>) = callback(token) { |
| 125 | *token = t; |
| 126 | } |
| 127 | } |
| 128 | tokens |
| 129 | } |
| 130 | |
| 131 | /// Replace a given token with another. |
| 132 | #[inline ] |
| 133 | pub fn replace_token_with<'a, 'b: 'a, F: Fn(&Token<'a>) -> Option<Token<'b>>>( |
| 134 | token: Token<'a>, |
| 135 | callback: &F, |
| 136 | ) -> Token<'a> { |
| 137 | if let Some(t: Token<'b>) = callback(&token) { |
| 138 | t |
| 139 | } else { |
| 140 | token |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | /// When looping over `Tokens`, if you encounter `Keyword::Var`, `Keyword::Let` or |
| 145 | /// `Token::Other` using this function will allow you to get the variable name's |
| 146 | /// position and the variable value's position (if any). |
| 147 | /// |
| 148 | /// ## Note |
| 149 | /// |
| 150 | /// It'll return the value only if there is an `Operation::Equal` found. |
| 151 | /// |
| 152 | /// # Examples |
| 153 | /// |
| 154 | /// ``` |
| 155 | /// extern crate minifier; |
| 156 | /// use minifier::js::{Keyword, get_variable_name_and_value_positions, simple_minify}; |
| 157 | /// |
| 158 | /// fn main() { |
| 159 | /// let source = r#"var x = 1;var z;var y = "2";"# ; |
| 160 | /// let mut result = Vec::new(); |
| 161 | /// |
| 162 | /// let tokens = simple_minify(source); |
| 163 | /// |
| 164 | /// for pos in 0..tokens.len() { |
| 165 | /// match tokens[pos].get_keyword() { |
| 166 | /// Some(k) if k == Keyword::Let || k == Keyword::Var => { |
| 167 | /// if let Some(x) = get_variable_name_and_value_positions(&tokens, pos) { |
| 168 | /// result.push(x); |
| 169 | /// } |
| 170 | /// } |
| 171 | /// _ => {} |
| 172 | /// } |
| 173 | /// } |
| 174 | /// assert_eq!(result, vec![(2, Some(6)), (10, None), (14, Some(22))]); |
| 175 | /// } |
| 176 | /// ``` |
| 177 | pub fn get_variable_name_and_value_positions<'a>( |
| 178 | tokens: &'a Tokens<'a>, |
| 179 | pos: usize, |
| 180 | ) -> Option<(usize, Option<usize>)> { |
| 181 | if pos >= tokens.len() { |
| 182 | return None; |
| 183 | } |
| 184 | let mut tmp = pos; |
| 185 | match tokens[pos] { |
| 186 | Token::Keyword(Keyword::Let) | Token::Keyword(Keyword::Var) => { |
| 187 | tmp += 1; |
| 188 | } |
| 189 | Token::Other(_) if pos > 0 => { |
| 190 | let mut pos = pos - 1; |
| 191 | while pos > 0 { |
| 192 | if tokens[pos].is_comment() || tokens[pos].is_white_character() { |
| 193 | pos -= 1; |
| 194 | } else if tokens[pos] == Token::Char(ReservedChar::Comma) |
| 195 | || tokens[pos] == Token::Keyword(Keyword::Let) |
| 196 | || tokens[pos] == Token::Keyword(Keyword::Var) |
| 197 | { |
| 198 | break; |
| 199 | } else { |
| 200 | return None; |
| 201 | } |
| 202 | } |
| 203 | } |
| 204 | _ => return None, |
| 205 | } |
| 206 | while tmp < tokens.len() { |
| 207 | if tokens[tmp].is_other() { |
| 208 | let mut tmp2 = tmp + 1; |
| 209 | while tmp2 < tokens.len() { |
| 210 | if tokens[tmp2] == Token::Operation(Operation::Equal) { |
| 211 | tmp2 += 1; |
| 212 | while tmp2 < tokens.len() { |
| 213 | let token = &tokens[tmp2]; |
| 214 | if token.is_string() |
| 215 | || token.is_other() |
| 216 | || token.is_regex() |
| 217 | || token.is_number() |
| 218 | || token.is_floating_number() |
| 219 | { |
| 220 | return Some((tmp, Some(tmp2))); |
| 221 | } else if !tokens[tmp2].is_comment() && !tokens[tmp2].is_white_character() { |
| 222 | break; |
| 223 | } |
| 224 | tmp2 += 1; |
| 225 | } |
| 226 | break; |
| 227 | } else if matches!( |
| 228 | tokens[tmp2].get_char(), |
| 229 | Some(ReservedChar::Comma) | Some(ReservedChar::SemiColon) |
| 230 | ) { |
| 231 | return Some((tmp, None)); |
| 232 | } else if !(tokens[tmp2].is_comment() |
| 233 | || tokens[tmp2].is_white_character() |
| 234 | && tokens[tmp2].get_char() != Some(ReservedChar::Backline)) |
| 235 | { |
| 236 | break; |
| 237 | } |
| 238 | tmp2 += 1; |
| 239 | } |
| 240 | } else { |
| 241 | // We don't care about syntax errors. |
| 242 | } |
| 243 | tmp += 1; |
| 244 | } |
| 245 | None |
| 246 | } |
| 247 | |
| 248 | #[inline ] |
| 249 | fn get_next<'a>(it: &mut IntoIter<Token<'a>>) -> Option<Token<'a>> { |
| 250 | for t: Token<'a> in it { |
| 251 | if t.is_comment() || t.is_white_character() { |
| 252 | continue; |
| 253 | } |
| 254 | return Some(t); |
| 255 | } |
| 256 | None |
| 257 | } |
| 258 | |
| 259 | /// Convenient function used to clean useless tokens in a token list. |
| 260 | /// |
| 261 | /// # Example |
| 262 | /// |
| 263 | /// ```rust,no_run |
| 264 | /// extern crate minifier; |
| 265 | /// |
| 266 | /// use minifier::js::{clean_tokens, simple_minify}; |
| 267 | /// use std::fs; |
| 268 | /// |
| 269 | /// fn main() { |
| 270 | /// let content = fs::read("some_file.js" ).expect("file not found" ); |
| 271 | /// let source = String::from_utf8_lossy(&content); |
| 272 | /// let s = simple_minify(&source); // First we get the tokens list. |
| 273 | /// let s = s.apply(clean_tokens); // We now have a cleaned token list! |
| 274 | /// println!("result: {:?}" , s); |
| 275 | /// } |
| 276 | /// ``` |
| 277 | pub fn clean_tokens(tokens: Tokens<'_>) -> Tokens<'_> { |
| 278 | let mut v = Vec::with_capacity(tokens.len() / 3 * 2); |
| 279 | let mut it = tokens.0.into_iter(); |
| 280 | |
| 281 | loop { |
| 282 | let token = get_next(&mut it); |
| 283 | if token.is_none() { |
| 284 | break; |
| 285 | } |
| 286 | let token = token.unwrap(); |
| 287 | if token.is_white_character() { |
| 288 | continue; |
| 289 | } else if token.get_char() == Some(ReservedChar::SemiColon) { |
| 290 | if v.is_empty() { |
| 291 | continue; |
| 292 | } |
| 293 | if let Some(next) = get_next(&mut it) { |
| 294 | if next != Token::Char(ReservedChar::CloseCurlyBrace) { |
| 295 | v.push(token); |
| 296 | } |
| 297 | v.push(next); |
| 298 | } |
| 299 | continue; |
| 300 | } |
| 301 | v.push(token); |
| 302 | } |
| 303 | v.into() |
| 304 | } |
| 305 | |
| 306 | /// Returns true if the token is a "useful" one (so not a comment or a "useless" |
| 307 | /// character). |
| 308 | pub fn clean_token(token: &Token<'_>, next_token: &Option<&Token<'_>>) -> bool { |
| 309 | !token.is_comment() && { |
| 310 | if let Some(x: ReservedChar) = token.get_char() { |
| 311 | !x.is_white_character() |
| 312 | && (x != ReservedChar::SemiColon |
| 313 | || *next_token != Some(&Token::Char(ReservedChar::CloseCurlyBrace))) |
| 314 | } else { |
| 315 | true |
| 316 | } |
| 317 | } |
| 318 | } |
| 319 | |
| 320 | #[inline ] |
| 321 | fn get_next_except<'a, F: Fn(&Token<'a>) -> bool>( |
| 322 | it: &mut IntoIter<Token<'a>>, |
| 323 | f: &F, |
| 324 | ) -> Option<Token<'a>> { |
| 325 | for t: Token<'a> in it { |
| 326 | if (t.is_comment() || t.is_white_character()) && f(&t) { |
| 327 | continue; |
| 328 | } |
| 329 | return Some(t); |
| 330 | } |
| 331 | None |
| 332 | } |
| 333 | |
| 334 | /// Same as `clean_tokens` except that if a token is considered as not desired, |
| 335 | /// the callback is called. If the callback returns `false` as well, it will |
| 336 | /// be removed. |
| 337 | /// |
| 338 | /// # Example |
| 339 | /// |
| 340 | /// ```rust,no_run |
| 341 | /// extern crate minifier; |
| 342 | /// |
| 343 | /// use minifier::js::{clean_tokens_except, simple_minify, ReservedChar}; |
| 344 | /// use std::fs; |
| 345 | /// |
| 346 | /// fn main() { |
| 347 | /// let content = fs::read("some_file.js" ).expect("file not found" ); |
| 348 | /// let source = String::from_utf8_lossy(&content); |
| 349 | /// let s = simple_minify(&source); // First we get the tokens list. |
| 350 | /// let s = s.apply(|f| { |
| 351 | /// clean_tokens_except(f, |c| { |
| 352 | /// c.get_char() != Some(ReservedChar::Backline) |
| 353 | /// }) |
| 354 | /// }); // We now have a cleaned token list which kept backlines! |
| 355 | /// println!("result: {:?}" , s); |
| 356 | /// } |
| 357 | /// ``` |
| 358 | pub fn clean_tokens_except<'a, F: Fn(&Token<'a>) -> bool>(tokens: Tokens<'a>, f: F) -> Tokens<'a> { |
| 359 | let mut v = Vec::with_capacity(tokens.len() / 3 * 2); |
| 360 | let mut it = tokens.0.into_iter(); |
| 361 | |
| 362 | loop { |
| 363 | let token = get_next_except(&mut it, &f); |
| 364 | if token.is_none() { |
| 365 | break; |
| 366 | } |
| 367 | let token = token.unwrap(); |
| 368 | if token.is_white_character() { |
| 369 | if f(&token) { |
| 370 | continue; |
| 371 | } |
| 372 | } else if token.get_char() == Some(ReservedChar::SemiColon) { |
| 373 | if v.is_empty() { |
| 374 | if !f(&token) { |
| 375 | v.push(token); |
| 376 | } |
| 377 | continue; |
| 378 | } |
| 379 | if let Some(next) = get_next_except(&mut it, &f) { |
| 380 | if next != Token::Char(ReservedChar::CloseCurlyBrace) || !f(&token) { |
| 381 | v.push(token); |
| 382 | } |
| 383 | v.push(next); |
| 384 | } else if !f(&token) { |
| 385 | v.push(token); |
| 386 | } |
| 387 | continue; |
| 388 | } |
| 389 | v.push(token); |
| 390 | } |
| 391 | v.into() |
| 392 | } |
| 393 | |
| 394 | /// Returns true if the token is a "useful" one (so not a comment or a "useless" |
| 395 | /// character). |
| 396 | #[inline ] |
| 397 | pub fn clean_token_except<'a, F: Fn(&Token<'a>) -> bool>( |
| 398 | token: &Token<'a>, |
| 399 | next_token: &Option<&Token<'_>>, |
| 400 | f: &F, |
| 401 | ) -> bool { |
| 402 | if !clean_token(token, next_token) { |
| 403 | !f(token) |
| 404 | } else { |
| 405 | true |
| 406 | } |
| 407 | } |
| 408 | |
| 409 | pub(crate) fn get_array<'a>( |
| 410 | tokens: &'a Tokens<'a>, |
| 411 | array_name: &str, |
| 412 | ) -> Option<(Vec<usize>, usize)> { |
| 413 | let mut ret = Vec::new(); |
| 414 | |
| 415 | let mut looking_for_var = false; |
| 416 | let mut looking_for_equal = false; |
| 417 | let mut looking_for_array_start = false; |
| 418 | let mut getting_values = false; |
| 419 | |
| 420 | for pos in 0..tokens.len() { |
| 421 | if looking_for_var { |
| 422 | match tokens[pos] { |
| 423 | Token::Other(s) => { |
| 424 | looking_for_var = false; |
| 425 | if s == array_name { |
| 426 | looking_for_equal = true; |
| 427 | } |
| 428 | } |
| 429 | ref s => { |
| 430 | looking_for_var = s.is_comment() || s.is_white_character(); |
| 431 | } |
| 432 | } |
| 433 | } else if looking_for_equal { |
| 434 | match tokens[pos] { |
| 435 | Token::Operation(Operation::Equal) => { |
| 436 | looking_for_equal = false; |
| 437 | looking_for_array_start = true; |
| 438 | } |
| 439 | ref s => { |
| 440 | looking_for_equal = s.is_comment() || s.is_white_character(); |
| 441 | } |
| 442 | } |
| 443 | } else if looking_for_array_start { |
| 444 | match tokens[pos] { |
| 445 | Token::Char(ReservedChar::OpenBracket) => { |
| 446 | looking_for_array_start = false; |
| 447 | getting_values = true; |
| 448 | } |
| 449 | ref s => { |
| 450 | looking_for_array_start = s.is_comment() || s.is_white_character(); |
| 451 | } |
| 452 | } |
| 453 | } else if getting_values { |
| 454 | match &tokens[pos] { |
| 455 | Token::Char(ReservedChar::CloseBracket) => { |
| 456 | return Some((ret, pos)); |
| 457 | } |
| 458 | s if s.is_comment() || s.is_white_character() => {} |
| 459 | _ => { |
| 460 | ret.push(pos); |
| 461 | } |
| 462 | } |
| 463 | } else { |
| 464 | match tokens[pos] { |
| 465 | Token::Keyword(Keyword::Let) | Token::Keyword(Keyword::Var) => { |
| 466 | looking_for_var = true; |
| 467 | } |
| 468 | _ => {} |
| 469 | } |
| 470 | } |
| 471 | } |
| 472 | None |
| 473 | } |
| 474 | |
| 475 | #[test ] |
| 476 | fn check_get_array() { |
| 477 | let source = r#"var x = [ ]; var y = ['hello', |
| 478 | 12]; var z = []; var w = 12;"# ; |
| 479 | |
| 480 | let tokens = crate::js::token::tokenize(source); |
| 481 | |
| 482 | let ar = get_array(&tokens, "x" ); |
| 483 | assert!(ar.is_some()); |
| 484 | assert_eq!(ar.unwrap().1, 9); |
| 485 | |
| 486 | let ar = get_array(&tokens, "y" ); |
| 487 | assert!(ar.is_some()); |
| 488 | assert_eq!(ar.unwrap().1, 27); |
| 489 | |
| 490 | let ar = get_array(&tokens, "z" ); |
| 491 | assert!(ar.is_some()); |
| 492 | assert_eq!(ar.unwrap().1, 37); |
| 493 | |
| 494 | let ar = get_array(&tokens, "w" ); |
| 495 | assert!(ar.is_none()); |
| 496 | |
| 497 | let ar = get_array(&tokens, "W" ); |
| 498 | assert!(ar.is_none()); |
| 499 | } |
| 500 | |
| 501 | #[test ] |
| 502 | fn check_get_variable_name_and_value_positions() { |
| 503 | let source = r#"var x = 1;var y = "2",we=4;"# ; |
| 504 | let mut result = Vec::new(); |
| 505 | let mut pos = 0; |
| 506 | |
| 507 | let tokens = crate::js::token::tokenize(source); |
| 508 | |
| 509 | while pos < tokens.len() { |
| 510 | if let Some(x) = get_variable_name_and_value_positions(&tokens, pos) { |
| 511 | result.push(x); |
| 512 | pos = x.0; |
| 513 | } |
| 514 | pos += 1; |
| 515 | } |
| 516 | assert_eq!(result, vec![(2, Some(6)), (10, Some(18)), (20, Some(22))]); |
| 517 | |
| 518 | let mut result = Vec::new(); |
| 519 | let tokens = crate::js::clean_tokens(tokens); |
| 520 | pos = 0; |
| 521 | |
| 522 | while pos < tokens.len() { |
| 523 | if let Some(x) = get_variable_name_and_value_positions(&tokens, pos) { |
| 524 | result.push(x); |
| 525 | pos = x.0; |
| 526 | } |
| 527 | pos += 1; |
| 528 | } |
| 529 | assert_eq!(result, vec![(1, Some(3)), (6, Some(8)), (10, Some(12))]); |
| 530 | } |
| 531 | |
| 532 | #[test ] |
| 533 | fn replace_tokens() { |
| 534 | let source = r#" |
| 535 | var x = ['a', 'b', null, 'd', {'x': null, 'e': null, 'z': 'w'}]; |
| 536 | var n = null; |
| 537 | "# ; |
| 538 | let expected_result = "var x=['a','b',N,'d',{'x':N,'e':N,'z':'w'}];var n=N" ; |
| 539 | |
| 540 | let res = crate::js::simple_minify(source) |
| 541 | .apply(crate::js::clean_tokens) |
| 542 | .apply(|f| { |
| 543 | replace_tokens_with(f, |t| { |
| 544 | if *t == Token::Keyword(Keyword::Null) { |
| 545 | Some(Token::Other("N" )) |
| 546 | } else { |
| 547 | None |
| 548 | } |
| 549 | }) |
| 550 | }); |
| 551 | assert_eq!(res.to_string(), expected_result); |
| 552 | } |
| 553 | |
| 554 | #[test ] |
| 555 | fn check_iterator() { |
| 556 | let source = r#" |
| 557 | var x = ['a', 'b', null, 'd', {'x': null, 'e': null, 'z': 'w'}]; |
| 558 | var n = null; |
| 559 | "# ; |
| 560 | let expected_result = "var x=['a','b',N,'d',{'x':N,'e':N,'z':'w'}];var n=N;" ; |
| 561 | |
| 562 | let mut iter = crate::js::simple_minify(source).into_iter().peekable(); |
| 563 | let mut tokens = Vec::new(); |
| 564 | while let Some(token) = iter.next() { |
| 565 | if crate::js::clean_token(&token, &iter.peek()) { |
| 566 | tokens.push(if token == Token::Keyword(Keyword::Null) { |
| 567 | Token::Other("N" ) |
| 568 | } else { |
| 569 | token |
| 570 | }); |
| 571 | } |
| 572 | } |
| 573 | let tokens: Tokens = tokens.into(); |
| 574 | assert_eq!(tokens.to_string(), expected_result); |
| 575 | } |
| 576 | |