entityTries.js 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. // entityTries.js
  2. // Builds integer-keyed tries so the decoder never allocates a string object
  3. // during lookup — every key is a plain charCode number.
  4. //
  5. // trie1: Map<code0, entity>
  6. // trie2: Map<code0, Map<code1, entity>>
  7. // trie3: Map<code0, Map<code1, Map<code2, entity>>>
  8. import { ALL_ENTITIES } from './entities.js';
  9. // Reverse map: character sequence → "&name;"
  10. const CHAR_TO_ENTITY = new Map();
  11. for (const [name, chars] of Object.entries(ALL_ENTITIES)) {
  12. CHAR_TO_ENTITY.set(chars, `&${name};`);
  13. }
  14. export const trie1 = new Map(); // code0 → entity string
  15. export const trie2 = new Map(); // code0 → Map → entity string
  16. export const trie3 = new Map(); // code0 → Map → Map → entity string
  17. for (const [chars, entity] of CHAR_TO_ENTITY) {
  18. const len = chars.length;
  19. if (len === 1) {
  20. const c0 = chars.charCodeAt(0);
  21. // Keep shortest match only if no longer match already claimed this code
  22. // (longer matches are inserted in the same pass so we just overwrite —
  23. // trie1 is only consulted after trie2/trie3 both miss, so no conflict)
  24. trie1.set(c0, entity);
  25. } else if (len === 2) {
  26. const c0 = chars.charCodeAt(0);
  27. const c1 = chars.charCodeAt(1);
  28. let inner = trie2.get(c0);
  29. if (inner === undefined) { inner = new Map(); trie2.set(c0, inner); }
  30. inner.set(c1, entity);
  31. } else if (len === 3) {
  32. const c0 = chars.charCodeAt(0);
  33. const c1 = chars.charCodeAt(1);
  34. const c2 = chars.charCodeAt(2);
  35. let mid = trie3.get(c0);
  36. if (mid === undefined) { mid = new Map(); trie3.set(c0, mid); }
  37. let inner = mid.get(c1);
  38. if (inner === undefined) { inner = new Map(); mid.set(c1, inner); }
  39. inner.set(c2, entity);
  40. }
  41. // HTML5 has no named entity whose character sequence is longer than 3 chars
  42. }