JavaScriptMinifier.php 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851
  1. <?php
  2. /**
  3. * JavaScript Minifier
  4. *
  5. * @file
  6. * @author Paul Copperman <paul.copperman@gmail.com>
  7. * @license Apache-2.0
  8. * @license MIT
  9. * @license GPL-2.0-or-later
  10. * @license LGPL-2.1-or-later
  11. */
  12. /**
  13. * This class is meant to safely minify javascript code, while leaving syntactically correct
  14. * programs intact. Other libraries, such as JSMin require a certain coding style to work
  15. * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
  16. * slow, because they construct a complete parse tree before outputting the code minified.
  17. * So this class is meant to allow arbitrary (but syntactically correct) input, while being
  18. * fast enough to be used for on-the-fly minifying.
  19. *
  20. * This class was written with ECMA-262 Edition 3 in mind ("ECMAScript 3"). Parsing features
  21. * new to ECMAScript 5 or later might not be supported. However, Edition 5.1 better reflects
  22. * how actual JS engines worked and work and is simpler and more readable prose. As such,
  23. * the below code will refer to sections of the 5.1 specification.
  24. *
  25. * See <https://www.ecma-international.org/ecma-262/5.1/>.
  26. */
  27. class JavaScriptMinifier {
  28. /* Parsing states.
  29. * The state machine is only necessary to decide whether to parse a slash as division
  30. * operator or as regexp literal.
  31. * States are named after the next expected item. We only distinguish states when the
  32. * distinction is relevant for our purpose.
  33. */
  34. const STATEMENT = 0;
  35. const CONDITION = 1;
  36. const PROPERTY_ASSIGNMENT = 2;
  37. const EXPRESSION = 3;
  38. const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion
  39. const EXPRESSION_OP = 5;
  40. const EXPRESSION_FUNC = 6;
  41. const EXPRESSION_TERNARY = 7; // used to determine the role of a colon
  42. const EXPRESSION_TERNARY_OP = 8;
  43. const EXPRESSION_TERNARY_FUNC = 9;
  44. const PAREN_EXPRESSION = 10; // expression which is not on the top level
  45. const PAREN_EXPRESSION_OP = 11;
  46. const PAREN_EXPRESSION_FUNC = 12;
  47. const PROPERTY_EXPRESSION = 13; // expression which is within an object literal
  48. const PROPERTY_EXPRESSION_OP = 14;
  49. const PROPERTY_EXPRESSION_FUNC = 15;
  50. /* Token types */
  51. const TYPE_UN_OP = 101; // unary operators
  52. const TYPE_INCR_OP = 102; // ++ and --
  53. const TYPE_BIN_OP = 103; // binary operators
  54. const TYPE_ADD_OP = 104; // + and - which can be either unary or binary ops
  55. const TYPE_HOOK = 105; // ?
  56. const TYPE_COLON = 106; // :
  57. const TYPE_COMMA = 107; // ,
  58. const TYPE_SEMICOLON = 108; // ;
  59. const TYPE_BRACE_OPEN = 109; // {
  60. const TYPE_BRACE_CLOSE = 110; // }
  61. const TYPE_PAREN_OPEN = 111; // ( and [
  62. const TYPE_PAREN_CLOSE = 112; // ) and ]
  63. const TYPE_RETURN = 113; // keywords: break, continue, return, throw
  64. const TYPE_IF = 114; // keywords: catch, for, with, switch, while, if
  65. const TYPE_DO = 115; // keywords: case, var, finally, else, do, try
  66. const TYPE_FUNC = 116; // keywords: function
  67. const TYPE_LITERAL = 117; // all literals, identifiers and unrecognised tokens
  68. const ACTION_GOTO = 201;
  69. const ACTION_PUSH = 202;
  70. const ACTION_POP = 203;
  71. // Sanity limit to avoid excessive memory usage
  72. const STACK_LIMIT = 1000;
  73. /**
  74. * Maximum line length
  75. *
  76. * This is not a strict maximum, but a guideline. Longer lines will be
  77. * produced when literals (e.g. quoted strings) longer than this are
  78. * encountered, or when required to guard against semicolon insertion.
  79. *
  80. * This is a private member (instead of constant) to allow tests to
  81. * set it to 1, to verify ASI and line-breaking behaviour.
  82. */
  83. private static $maxLineLength = 1000;
  84. /**
  85. * Returns minified JavaScript code.
  86. *
  87. * @param string $s JavaScript code to minify
  88. * @return string Minified code
  89. */
  90. public static function minify( $s ) {
  91. // First we declare a few tables that contain our parsing rules
  92. // $opChars : Characters which can be combined without whitespace between them.
  93. $opChars = [
  94. // ECMAScript 5.1 § 7.7 Punctuators
  95. // Unlike the spec, these are individual symbols, not sequences.
  96. '{' => true,
  97. '}' => true,
  98. '(' => true,
  99. ')' => true,
  100. '[' => true,
  101. ']' => true,
  102. '.' => true,
  103. ';' => true,
  104. ',' => true,
  105. '<' => true,
  106. '>' => true,
  107. '=' => true,
  108. '!' => true,
  109. '+' => true,
  110. '-' => true,
  111. '*' => true,
  112. '%' => true,
  113. '&' => true,
  114. '|' => true,
  115. '^' => true,
  116. '~' => true,
  117. '?' => true,
  118. ':' => true,
  119. '/' => true,
  120. // ECMAScript 5.1 § 7.8.4 String Literals
  121. '"' => true,
  122. "'" => true,
  123. ];
  124. // $tokenTypes : Map keywords and operators to their corresponding token type
  125. $tokenTypes = [
  126. // ECMAScript 5.1 § 11.4 Unary Operators
  127. // ECMAScript 5.1 § 11.6 Additive Operators
  128. // UnaryExpression includes PostfixExpression, which includes 'new'.
  129. 'new' => self::TYPE_UN_OP,
  130. 'delete' => self::TYPE_UN_OP,
  131. 'void' => self::TYPE_UN_OP,
  132. 'typeof' => self::TYPE_UN_OP,
  133. '++' => self::TYPE_INCR_OP,
  134. '--' => self::TYPE_INCR_OP,
  135. '+' => self::TYPE_ADD_OP,
  136. '-' => self::TYPE_ADD_OP,
  137. '~' => self::TYPE_UN_OP,
  138. '!' => self::TYPE_UN_OP,
  139. // ECMAScript 5.1 § 11.5 Multiplicative Operators
  140. '*' => self::TYPE_BIN_OP,
  141. '/' => self::TYPE_BIN_OP,
  142. '%' => self::TYPE_BIN_OP,
  143. // ECMAScript 5.1 § 11.7 Bitwise Shift Operators
  144. '<<' => self::TYPE_BIN_OP,
  145. '>>' => self::TYPE_BIN_OP,
  146. '>>>' => self::TYPE_BIN_OP,
  147. // ECMAScript 5.1 § 11.8 Relational Operators
  148. '<' => self::TYPE_BIN_OP,
  149. '>' => self::TYPE_BIN_OP,
  150. '<=' => self::TYPE_BIN_OP,
  151. '>=' => self::TYPE_BIN_OP,
  152. // ECMAScript 5.1 § 11.9 Equality Operators
  153. '==' => self::TYPE_BIN_OP,
  154. '!=' => self::TYPE_BIN_OP,
  155. '===' => self::TYPE_BIN_OP,
  156. '!==' => self::TYPE_BIN_OP,
  157. 'instanceof' => self::TYPE_BIN_OP,
  158. 'in' => self::TYPE_BIN_OP,
  159. // ECMAScript 5.1 § 11.10 Binary Bitwise Operators
  160. '&' => self::TYPE_BIN_OP,
  161. '^' => self::TYPE_BIN_OP,
  162. '|' => self::TYPE_BIN_OP,
  163. // ECMAScript 5.1 § 11.11 Binary Logical Operators
  164. '&&' => self::TYPE_BIN_OP,
  165. '||' => self::TYPE_BIN_OP,
  166. // ECMAScript 5.1 § 11.12 Conditional Operator
  167. // Also known as ternary.
  168. '?' => self::TYPE_HOOK,
  169. ':' => self::TYPE_COLON,
  170. // ECMAScript 5.1 § 11.13 Assignment Operators
  171. '=' => self::TYPE_BIN_OP,
  172. '*=' => self::TYPE_BIN_OP,
  173. '/=' => self::TYPE_BIN_OP,
  174. '%=' => self::TYPE_BIN_OP,
  175. '+=' => self::TYPE_BIN_OP,
  176. '-=' => self::TYPE_BIN_OP,
  177. '<<=' => self::TYPE_BIN_OP,
  178. '>>=' => self::TYPE_BIN_OP,
  179. '>>>=' => self::TYPE_BIN_OP,
  180. '&=' => self::TYPE_BIN_OP,
  181. '^=' => self::TYPE_BIN_OP,
  182. '|=' => self::TYPE_BIN_OP,
  183. // ECMAScript 5.1 § 11.14 Comma Operator
  184. ',' => self::TYPE_COMMA,
  185. // The keywords that disallow LineTerminator before their
  186. // (sometimes optional) Expression or Identifier.
  187. //
  188. // keyword ;
  189. // keyword [no LineTerminator here] Identifier ;
  190. // keyword [no LineTerminator here] Expression ;
  191. //
  192. // See also ECMAScript 5.1:
  193. // - § 12.7 The continue Statement
  194. // - $ 12.8 The break Statement
  195. // - § 12.9 The return Statement
  196. // - § 12.13 The throw Statement
  197. 'continue' => self::TYPE_RETURN,
  198. 'break' => self::TYPE_RETURN,
  199. 'return' => self::TYPE_RETURN,
  200. 'throw' => self::TYPE_RETURN,
  201. // The keywords require a parenthesised Expression or Identifier
  202. // before the next Statement.
  203. //
  204. // keyword ( Expression ) Statement
  205. // keyword ( Identifier ) Statement
  206. //
  207. // See also ECMAScript 5.1:
  208. // - § 12.5 The if Statement
  209. // - § 12.6 Iteration Statements (do, while, for)
  210. // - § 12.10 The with Statement
  211. // - § 12.11 The switch Statement
  212. // - § 12.13 The throw Statement
  213. 'if' => self::TYPE_IF,
  214. 'catch' => self::TYPE_IF,
  215. 'while' => self::TYPE_IF,
  216. 'for' => self::TYPE_IF,
  217. 'switch' => self::TYPE_IF,
  218. 'with' => self::TYPE_IF,
  219. // The keywords followed by an Identifier, Statement,
  220. // Expression, or Block.
  221. //
  222. // var Identifier
  223. // else Statement
  224. // do Statement
  225. // case Expression
  226. // try Block
  227. // finally Block
  228. //
  229. // See also ECMAScript 5.1:
  230. // - § 12.2 Variable Statement
  231. // - § 12.5 The if Statement (else)
  232. // - § 12.6 Iteration Statements (do, while, for)
  233. // - § 12.11 The switch Statement (case)
  234. // - § 12.14 The try Statement
  235. 'var' => self::TYPE_DO,
  236. 'else' => self::TYPE_DO,
  237. 'do' => self::TYPE_DO,
  238. 'case' => self::TYPE_DO,
  239. 'try' => self::TYPE_DO,
  240. 'finally' => self::TYPE_DO,
  241. // ECMAScript 5.1 § 13 Function Definition
  242. 'function' => self::TYPE_FUNC,
  243. // Can be one of:
  244. // - DecimalLiteral (ECMAScript 5.1 § 7.8.3 Numeric Literals)
  245. // - MemberExpression (ECMAScript 5.1 § 11.2 Left-Hand-Side Expressions)
  246. '.' => self::TYPE_BIN_OP,
  247. // Can be one of:
  248. // - Block (ECMAScript 5.1 § 12.1 Block)
  249. // - ObjectLiteral (ECMAScript 5.1 § 11.1 Primary Expressions)
  250. '{' => self::TYPE_BRACE_OPEN,
  251. '}' => self::TYPE_BRACE_CLOSE,
  252. // Can be one of:
  253. // - Parenthesised Identifier or Expression after a
  254. // TYPE_IF or TYPE_FUNC keyword.
  255. // - PrimaryExpression (ECMAScript 5.1 § 11.1 Primary Expressions)
  256. // - CallExpression (ECMAScript 5.1 § 11.2 Left-Hand-Side Expressions)
  257. '(' => self::TYPE_PAREN_OPEN,
  258. ')' => self::TYPE_PAREN_CLOSE,
  259. // Can be one of:
  260. // - ArrayLiteral (ECMAScript 5.1 § 11.1 Primary Expressions)
  261. '[' => self::TYPE_PAREN_OPEN,
  262. ']' => self::TYPE_PAREN_CLOSE,
  263. // Can be one of:
  264. // - End of any statement
  265. // - EmptyStatement (ECMAScript 5.1 § 12.3 Empty Statement)
  266. ';' => self::TYPE_SEMICOLON,
  267. ];
  268. // $model : This is the main table for our state machine. For every state/token pair
  269. // the desired action is defined.
  270. //
  271. // The state pushed onto the stack by ACTION_PUSH will be returned to by ACTION_POP.
  272. //
  273. // A given state/token pair MAY NOT specify both ACTION_POP and ACTION_GOTO.
  274. // In the event of such mistake, ACTION_POP is used instead of ACTION_GOTO.
  275. $model = [
  276. // Statement - This is the initial state.
  277. self::STATEMENT => [
  278. self::TYPE_UN_OP => [
  279. self::ACTION_GOTO => self::EXPRESSION,
  280. ],
  281. self::TYPE_INCR_OP => [
  282. self::ACTION_GOTO => self::EXPRESSION,
  283. ],
  284. self::TYPE_ADD_OP => [
  285. self::ACTION_GOTO => self::EXPRESSION,
  286. ],
  287. self::TYPE_BRACE_OPEN => [
  288. // Use of '{' in statement context, creates a Block.
  289. self::ACTION_PUSH => self::STATEMENT,
  290. ],
  291. self::TYPE_BRACE_CLOSE => [
  292. // Ends a Block
  293. self::ACTION_POP => true,
  294. ],
  295. self::TYPE_PAREN_OPEN => [
  296. self::ACTION_PUSH => self::EXPRESSION_OP,
  297. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  298. ],
  299. self::TYPE_RETURN => [
  300. self::ACTION_GOTO => self::EXPRESSION_NO_NL,
  301. ],
  302. self::TYPE_IF => [
  303. self::ACTION_GOTO => self::CONDITION,
  304. ],
  305. self::TYPE_FUNC => [
  306. self::ACTION_GOTO => self::CONDITION,
  307. ],
  308. self::TYPE_LITERAL => [
  309. self::ACTION_GOTO => self::EXPRESSION_OP,
  310. ],
  311. ],
  312. self::CONDITION => [
  313. self::TYPE_PAREN_OPEN => [
  314. self::ACTION_PUSH => self::STATEMENT,
  315. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  316. ],
  317. ],
  318. // Property assignment - This is an object literal declaration.
  319. // For example: `{ key: value }`
  320. self::PROPERTY_ASSIGNMENT => [
  321. self::TYPE_COLON => [
  322. self::ACTION_GOTO => self::PROPERTY_EXPRESSION,
  323. ],
  324. self::TYPE_BRACE_OPEN => [
  325. self::ACTION_PUSH => self::PROPERTY_ASSIGNMENT,
  326. self::ACTION_GOTO => self::STATEMENT,
  327. ],
  328. self::TYPE_BRACE_CLOSE => [
  329. self::ACTION_POP => true,
  330. ],
  331. ],
  332. self::EXPRESSION => [
  333. self::TYPE_SEMICOLON => [
  334. self::ACTION_GOTO => self::STATEMENT,
  335. ],
  336. self::TYPE_BRACE_OPEN => [
  337. self::ACTION_PUSH => self::EXPRESSION_OP,
  338. self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
  339. ],
  340. self::TYPE_BRACE_CLOSE => [
  341. self::ACTION_POP => true,
  342. ],
  343. self::TYPE_PAREN_OPEN => [
  344. self::ACTION_PUSH => self::EXPRESSION_OP,
  345. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  346. ],
  347. self::TYPE_FUNC => [
  348. self::ACTION_GOTO => self::EXPRESSION_FUNC,
  349. ],
  350. self::TYPE_LITERAL => [
  351. self::ACTION_GOTO => self::EXPRESSION_OP,
  352. ],
  353. ],
  354. self::EXPRESSION_NO_NL => [
  355. self::TYPE_SEMICOLON => [
  356. self::ACTION_GOTO => self::STATEMENT,
  357. ],
  358. self::TYPE_BRACE_OPEN => [
  359. self::ACTION_PUSH => self::EXPRESSION_OP,
  360. self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
  361. ],
  362. self::TYPE_BRACE_CLOSE => [
  363. self::ACTION_POP => true,
  364. ],
  365. self::TYPE_PAREN_OPEN => [
  366. self::ACTION_PUSH => self::EXPRESSION_OP,
  367. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  368. ],
  369. self::TYPE_FUNC => [
  370. self::ACTION_GOTO => self::EXPRESSION_FUNC,
  371. ],
  372. self::TYPE_LITERAL => [
  373. self::ACTION_GOTO => self::EXPRESSION_OP,
  374. ],
  375. ],
  376. self::EXPRESSION_OP => [
  377. self::TYPE_BIN_OP => [
  378. self::ACTION_GOTO => self::EXPRESSION,
  379. ],
  380. self::TYPE_ADD_OP => [
  381. self::ACTION_GOTO => self::EXPRESSION,
  382. ],
  383. self::TYPE_HOOK => [
  384. self::ACTION_PUSH => self::EXPRESSION,
  385. self::ACTION_GOTO => self::EXPRESSION_TERNARY,
  386. ],
  387. self::TYPE_COLON => [
  388. self::ACTION_GOTO => self::STATEMENT,
  389. ],
  390. self::TYPE_COMMA => [
  391. self::ACTION_GOTO => self::EXPRESSION,
  392. ],
  393. self::TYPE_SEMICOLON => [
  394. self::ACTION_GOTO => self::STATEMENT,
  395. ],
  396. self::TYPE_PAREN_OPEN => [
  397. self::ACTION_PUSH => self::EXPRESSION_OP,
  398. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  399. ],
  400. self::TYPE_BRACE_CLOSE => [
  401. self::ACTION_POP => true,
  402. ],
  403. ],
  404. self::EXPRESSION_FUNC => [
  405. self::TYPE_BRACE_OPEN => [
  406. self::ACTION_PUSH => self::EXPRESSION_OP,
  407. self::ACTION_GOTO => self::STATEMENT,
  408. ],
  409. ],
  410. self::EXPRESSION_TERNARY => [
  411. self::TYPE_BRACE_OPEN => [
  412. self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP,
  413. self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
  414. ],
  415. self::TYPE_PAREN_OPEN => [
  416. self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP,
  417. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  418. ],
  419. self::TYPE_FUNC => [
  420. self::ACTION_GOTO => self::EXPRESSION_TERNARY_FUNC,
  421. ],
  422. self::TYPE_LITERAL => [
  423. self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP,
  424. ],
  425. ],
  426. self::EXPRESSION_TERNARY_OP => [
  427. self::TYPE_BIN_OP => [
  428. self::ACTION_GOTO => self::EXPRESSION_TERNARY,
  429. ],
  430. self::TYPE_ADD_OP => [
  431. self::ACTION_GOTO => self::EXPRESSION_TERNARY,
  432. ],
  433. self::TYPE_HOOK => [
  434. self::ACTION_PUSH => self::EXPRESSION_TERNARY,
  435. self::ACTION_GOTO => self::EXPRESSION_TERNARY,
  436. ],
  437. self::TYPE_COMMA => [
  438. self::ACTION_GOTO => self::EXPRESSION_TERNARY,
  439. ],
  440. self::TYPE_PAREN_OPEN => [
  441. self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP,
  442. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  443. ],
  444. self::TYPE_COLON => [
  445. self::ACTION_POP => true,
  446. ],
  447. ],
  448. self::EXPRESSION_TERNARY_FUNC => [
  449. self::TYPE_BRACE_OPEN => [
  450. self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP,
  451. self::ACTION_GOTO => self::STATEMENT,
  452. ],
  453. ],
  454. self::PAREN_EXPRESSION => [
  455. self::TYPE_BRACE_OPEN => [
  456. self::ACTION_PUSH => self::PAREN_EXPRESSION_OP,
  457. self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
  458. ],
  459. self::TYPE_PAREN_OPEN => [
  460. self::ACTION_PUSH => self::PAREN_EXPRESSION_OP,
  461. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  462. ],
  463. self::TYPE_PAREN_CLOSE => [
  464. self::ACTION_POP => true,
  465. ],
  466. self::TYPE_FUNC => [
  467. self::ACTION_GOTO => self::PAREN_EXPRESSION_FUNC,
  468. ],
  469. self::TYPE_LITERAL => [
  470. self::ACTION_GOTO => self::PAREN_EXPRESSION_OP,
  471. ],
  472. ],
  473. self::PAREN_EXPRESSION_OP => [
  474. self::TYPE_BIN_OP => [
  475. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  476. ],
  477. self::TYPE_ADD_OP => [
  478. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  479. ],
  480. self::TYPE_HOOK => [
  481. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  482. ],
  483. self::TYPE_COLON => [
  484. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  485. ],
  486. self::TYPE_COMMA => [
  487. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  488. ],
  489. self::TYPE_SEMICOLON => [
  490. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  491. ],
  492. self::TYPE_PAREN_OPEN => [
  493. self::ACTION_PUSH => self::PAREN_EXPRESSION_OP,
  494. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  495. ],
  496. self::TYPE_PAREN_CLOSE => [
  497. self::ACTION_POP => true,
  498. ],
  499. ],
  500. self::PAREN_EXPRESSION_FUNC => [
  501. self::TYPE_BRACE_OPEN => [
  502. self::ACTION_PUSH => self::PAREN_EXPRESSION_OP,
  503. self::ACTION_GOTO => self::STATEMENT,
  504. ],
  505. ],
  506. // Property expression - The value of a key in an object literal.
  507. self::PROPERTY_EXPRESSION => [
  508. self::TYPE_BRACE_OPEN => [
  509. self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
  510. self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
  511. ],
  512. self::TYPE_BRACE_CLOSE => [
  513. self::ACTION_POP => true,
  514. ],
  515. self::TYPE_PAREN_OPEN => [
  516. self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
  517. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  518. ],
  519. self::TYPE_FUNC => [
  520. self::ACTION_GOTO => self::PROPERTY_EXPRESSION_FUNC,
  521. ],
  522. self::TYPE_LITERAL => [
  523. self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP,
  524. ],
  525. ],
  526. self::PROPERTY_EXPRESSION_OP => [
  527. self::TYPE_BIN_OP => [
  528. self::ACTION_GOTO => self::PROPERTY_EXPRESSION,
  529. ],
  530. self::TYPE_ADD_OP => [
  531. self::ACTION_GOTO => self::PROPERTY_EXPRESSION,
  532. ],
  533. self::TYPE_HOOK => [
  534. self::ACTION_PUSH => self::PROPERTY_EXPRESSION,
  535. self::ACTION_GOTO => self::EXPRESSION_TERNARY,
  536. ],
  537. self::TYPE_COMMA => [
  538. self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
  539. ],
  540. self::TYPE_BRACE_OPEN => [
  541. self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
  542. ],
  543. self::TYPE_BRACE_CLOSE => [
  544. self::ACTION_POP => true,
  545. ],
  546. self::TYPE_PAREN_OPEN => [
  547. self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
  548. self::ACTION_GOTO => self::PAREN_EXPRESSION,
  549. ],
  550. ],
  551. self::PROPERTY_EXPRESSION_FUNC => [
  552. self::TYPE_BRACE_OPEN => [
  553. self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
  554. self::ACTION_GOTO => self::STATEMENT,
  555. ],
  556. ],
  557. ];
  558. // $semicolon : Rules for when a semicolon insertion is appropriate
  559. $semicolon = [
  560. self::EXPRESSION_NO_NL => [
  561. self::TYPE_UN_OP => true,
  562. self::TYPE_INCR_OP => true,
  563. self::TYPE_ADD_OP => true,
  564. self::TYPE_BRACE_OPEN => true,
  565. self::TYPE_PAREN_OPEN => true,
  566. self::TYPE_RETURN => true,
  567. self::TYPE_IF => true,
  568. self::TYPE_DO => true,
  569. self::TYPE_FUNC => true,
  570. self::TYPE_LITERAL => true
  571. ],
  572. self::EXPRESSION_OP => [
  573. self::TYPE_UN_OP => true,
  574. self::TYPE_INCR_OP => true,
  575. self::TYPE_BRACE_OPEN => true,
  576. self::TYPE_RETURN => true,
  577. self::TYPE_IF => true,
  578. self::TYPE_DO => true,
  579. self::TYPE_FUNC => true,
  580. self::TYPE_LITERAL => true
  581. ]
  582. ];
  583. // $divStates : Contains all states that can be followed by a division operator
  584. $divStates = [
  585. self::EXPRESSION_OP => true,
  586. self::EXPRESSION_TERNARY_OP => true,
  587. self::PAREN_EXPRESSION_OP => true,
  588. self::PROPERTY_EXPRESSION_OP => true
  589. ];
  590. // Here's where the minifying takes place: Loop through the input, looking for tokens
  591. // and output them to $out, taking actions to the above defined rules when appropriate.
  592. $out = '';
  593. $pos = 0;
  594. $length = strlen( $s );
  595. $lineLength = 0;
  596. $newlineFound = true;
  597. $state = self::STATEMENT;
  598. $stack = [];
  599. $last = ';'; // Pretend that we have seen a semicolon yet
  600. while ( $pos < $length ) {
  601. // First, skip over any whitespace and multiline comments, recording whether we
  602. // found any newline character
  603. $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
  604. if ( !$skip ) {
  605. $ch = $s[$pos];
  606. if ( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
  607. // Multiline comment. Search for the end token or EOT.
  608. $end = strpos( $s, '*/', $pos + 2 );
  609. $skip = $end === false ? $length - $pos : $end - $pos + 2;
  610. }
  611. }
  612. if ( $skip ) {
  613. // The semicolon insertion mechanism needs to know whether there was a newline
  614. // between two tokens, so record it now.
  615. if ( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
  616. $newlineFound = true;
  617. }
  618. $pos += $skip;
  619. continue;
  620. }
  621. // Handle C++-style comments and html comments, which are treated as single line
  622. // comments by the browser, regardless of whether the end tag is on the same line.
  623. // Handle --> the same way, but only if it's at the beginning of the line
  624. if ( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
  625. || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
  626. || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
  627. ) {
  628. $pos += strcspn( $s, "\r\n", $pos );
  629. continue;
  630. }
  631. // Find out which kind of token we're handling.
  632. // Note: $end must point past the end of the current token
  633. // so that `substr($s, $pos, $end - $pos)` would be the entire token.
  634. // In order words, $end will be the offset of the last relevant character
  635. // in the stream + 1, or simply put: The offset of the first character
  636. // of any next token in the stream.
  637. $end = $pos + 1;
  638. // Handle string literals
  639. if ( $ch === "'" || $ch === '"' ) {
  640. // Search to the end of the string literal, skipping over backslash escapes
  641. $search = $ch . '\\';
  642. do{
  643. // Speculatively add 2 to the end so that if we see a backslash,
  644. // the next iteration will start 2 characters further (one for the
  645. // backslash, one for the escaped character).
  646. // We'll correct this outside the loop.
  647. $end += strcspn( $s, $search, $end ) + 2;
  648. // If the last character in our search for a quote or a backlash
  649. // matched a backslash and we haven't reached the end, keep searching..
  650. } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
  651. // Correction (1): Undo speculative add, keep only one (end of string literal)
  652. $end--;
  653. if ( $end > $length ) {
  654. // Correction (2): Loop wrongly assumed an end quote ended the search,
  655. // but search ended because we've reached the end. Correct $end.
  656. // TODO: This is invalid and should throw.
  657. $end--;
  658. }
  659. // We have to distinguish between regexp literals and division operators
  660. // A division operator is only possible in certain states
  661. } elseif ( $ch === '/' && !isset( $divStates[$state] ) ) {
  662. // Regexp literal
  663. for ( ; ; ) {
  664. // Search until we find "/" (end of regexp), "\" (backslash escapes),
  665. // or "[" (start of character classes).
  666. do{
  667. // Speculatively add 2 to ensure next iteration skips
  668. // over backslash and escaped character.
  669. // We'll correct this outside the loop.
  670. $end += strcspn( $s, '/[\\', $end ) + 2;
  671. // If backslash escape, keep searching...
  672. } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
  673. // Correction (1): Undo speculative add, keep only one (end of regexp)
  674. $end--;
  675. if ( $end > $length ) {
  676. // Correction (2): Loop wrongly assumed end slash was seen
  677. // String ended without end of regexp. Correct $end.
  678. // TODO: This is invalid and should throw.
  679. $end--;
  680. break;
  681. }
  682. if ( $s[$end - 1] === '/' ) {
  683. break;
  684. }
  685. // (Implicit else), we must've found the start of a char class,
  686. // skip until we find "]" (end of char class), or "\" (backslash escape)
  687. do{
  688. // Speculatively add 2 for backslash escape.
  689. // We'll substract one outside the loop.
  690. $end += strcspn( $s, ']\\', $end ) + 2;
  691. // If backslash escape, keep searching...
  692. } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
  693. // Correction (1): Undo speculative add, keep only one (end of regexp)
  694. $end--;
  695. if ( $end > $length ) {
  696. // Correction (2): Loop wrongly assumed "]" was seen
  697. // String ended without ending char class or regexp. Correct $end.
  698. // TODO: This is invalid and should throw.
  699. $end--;
  700. break;
  701. }
  702. }
  703. // Search past the regexp modifiers (gi)
  704. while ( $end < $length && ctype_alpha( $s[$end] ) ) {
  705. $end++;
  706. }
  707. } elseif (
  708. $ch === '0'
  709. && ( $pos + 1 < $length ) && ( $s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
  710. ) {
  711. // Hex numeric literal
  712. $end++; // x or X
  713. $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
  714. if ( !$len ) {
  715. return self::parseError(
  716. $s,
  717. $pos,
  718. 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...'
  719. );
  720. }
  721. $end += $len;
  722. } elseif (
  723. ctype_digit( $ch )
  724. || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
  725. ) {
  726. $end += strspn( $s, '0123456789', $end );
  727. $decimal = strspn( $s, '.', $end );
  728. if ( $decimal ) {
  729. if ( $decimal > 2 ) {
  730. return self::parseError( $s, $end, 'The number has too many decimal points' );
  731. }
  732. $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
  733. }
  734. $exponent = strspn( $s, 'eE', $end );
  735. if ( $exponent ) {
  736. if ( $exponent > 1 ) {
  737. return self::parseError( $s, $end, 'Number with several E' );
  738. }
  739. $end++;
  740. // + sign is optional; - sign is required.
  741. $end += strspn( $s, '-+', $end );
  742. $len = strspn( $s, '0123456789', $end );
  743. if ( !$len ) {
  744. return self::parseError(
  745. $s,
  746. $pos,
  747. 'No decimal digits after e, how many zeroes should be added?'
  748. );
  749. }
  750. $end += $len;
  751. }
  752. } elseif ( isset( $opChars[$ch] ) ) {
  753. // Punctuation character. Search for the longest matching operator.
  754. while (
  755. $end < $length
  756. && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
  757. ) {
  758. $end++;
  759. }
  760. } else {
  761. // Identifier or reserved word. Search for the end by excluding whitespace and
  762. // punctuation.
  763. $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
  764. }
  765. // Now get the token type from our type array
  766. $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
  767. $type = $tokenTypes[$token] ?? self::TYPE_LITERAL;
  768. if ( $newlineFound && isset( $semicolon[$state][$type] ) ) {
  769. // This token triggers the semicolon insertion mechanism of javascript. While we
  770. // could add the ; token here ourselves, keeping the newline has a few advantages.
  771. $out .= "\n";
  772. $state = self::STATEMENT;
  773. $lineLength = 0;
  774. } elseif ( $lineLength + $end - $pos > self::$maxLineLength &&
  775. !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) {
  776. // This line would get too long if we added $token, so add a newline first.
  777. // Only do this if it won't trigger semicolon insertion and if it won't
  778. // put a postfix increment operator on its own line, which is illegal in js.
  779. $out .= "\n";
  780. $lineLength = 0;
  781. // Check, whether we have to separate the token from the last one with whitespace
  782. } elseif ( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
  783. $out .= ' ';
  784. $lineLength++;
  785. // Don't accidentally create ++, -- or // tokens
  786. } elseif ( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
  787. $out .= ' ';
  788. $lineLength++;
  789. }
  790. if (
  791. $type === self::TYPE_LITERAL
  792. && ( $token === 'true' || $token === 'false' )
  793. && ( $state === self::EXPRESSION || $state === self::PROPERTY_EXPRESSION )
  794. && $last !== '.'
  795. ) {
  796. $token = ( $token === 'true' ) ? '!0' : '!1';
  797. }
  798. $out .= $token;
  799. $lineLength += $end - $pos; // += strlen( $token )
  800. $last = $s[$end - 1];
  801. $pos = $end;
  802. $newlineFound = false;
  803. // Now that we have output our token, transition into the new state.
  804. if ( isset( $model[$state][$type][self::ACTION_PUSH] ) &&
  805. count( $stack ) < self::STACK_LIMIT
  806. ) {
  807. $stack[] = $model[$state][$type][self::ACTION_PUSH];
  808. }
  809. if ( $stack && isset( $model[$state][$type][self::ACTION_POP] ) ) {
  810. $state = array_pop( $stack );
  811. } elseif ( isset( $model[$state][$type][self::ACTION_GOTO] ) ) {
  812. $state = $model[$state][$type][self::ACTION_GOTO];
  813. }
  814. }
  815. return $out;
  816. }
  817. static function parseError( $fullJavascript, $position, $errorMsg ) {
  818. // TODO: Handle the error: trigger_error, throw exception, return false...
  819. return false;
  820. }
  821. }