EncodingUtils.h 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  4. * You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #ifndef mozilla_dom_encodingutils_h_
  6. #define mozilla_dom_encodingutils_h_
  7. #include "nsDataHashtable.h"
  8. #include "nsString.h"
  9. class nsIUnicodeDecoder;
  10. class nsIUnicodeEncoder;
  11. namespace mozilla {
  12. namespace dom {
  13. class EncodingUtils
  14. {
  15. public:
  16. /**
  17. * Implements get an encoding algorithm from Encoding spec.
  18. * http://encoding.spec.whatwg.org/#concept-encoding-get
  19. * Given a label, this function returns the corresponding encoding or a
  20. * false.
  21. * The returned name may not be lowercased due to compatibility with
  22. * our internal implementations.
  23. *
  24. * @param aLabel, incoming label describing charset to be decoded.
  25. * @param aOutEncoding, returning corresponding encoding for label.
  26. * @return false if no encoding was found for label.
  27. * true if valid encoding found.
  28. */
  29. static bool FindEncodingForLabel(const nsACString& aLabel,
  30. nsACString& aOutEncoding);
  31. static bool FindEncodingForLabel(const nsAString& aLabel,
  32. nsACString& aOutEncoding)
  33. {
  34. return FindEncodingForLabel(NS_ConvertUTF16toUTF8(aLabel), aOutEncoding);
  35. }
  36. /**
  37. * Like FindEncodingForLabel() except labels that map to "replacement"
  38. * are treated as unknown.
  39. *
  40. * @param aLabel, incoming label describing charset to be decoded.
  41. * @param aOutEncoding, returning corresponding encoding for label.
  42. * @return false if no encoding was found for label.
  43. * true if valid encoding found.
  44. */
  45. static bool FindEncodingForLabelNoReplacement(const nsACString& aLabel,
  46. nsACString& aOutEncoding);
  47. static bool FindEncodingForLabelNoReplacement(const nsAString& aLabel,
  48. nsACString& aOutEncoding)
  49. {
  50. return FindEncodingForLabelNoReplacement(NS_ConvertUTF16toUTF8(aLabel),
  51. aOutEncoding);
  52. }
  53. /**
  54. * Remove any leading and trailing space characters, following the
  55. * definition of space characters from Encoding spec.
  56. * http://encoding.spec.whatwg.org/#terminology
  57. * Note that nsAString::StripWhitespace() doesn't exactly match the
  58. * definition. It also removes all matching chars in the string,
  59. * not just leading and trailing.
  60. *
  61. * @param aString, string to be trimmed.
  62. */
  63. template<class T>
  64. static void TrimSpaceCharacters(T& aString)
  65. {
  66. aString.Trim(" \t\n\f\r");
  67. }
  68. /**
  69. * Check is the encoding is ASCII-compatible in the sense that Basic Latin
  70. * encodes to ASCII bytes. (The reverse may not be true!)
  71. *
  72. * @param aPreferredName a preferred encoding label
  73. * @return whether the encoding is ASCII-compatible
  74. */
  75. static bool IsAsciiCompatible(const nsACString& aPreferredName);
  76. /**
  77. * Instantiates a decoder for an encoding. The input must be a
  78. * Gecko-canonical encoding name.
  79. * @param aEncoding a Gecko-canonical encoding name
  80. * @return a decoder
  81. */
  82. static already_AddRefed<nsIUnicodeDecoder>
  83. DecoderForEncoding(const char* aEncoding)
  84. {
  85. nsDependentCString encoding(aEncoding);
  86. return DecoderForEncoding(encoding);
  87. }
  88. /**
  89. * Instantiates a decoder for an encoding. The input must be a
  90. * Gecko-canonical encoding name
  91. * @param aEncoding a Gecko-canonical encoding name
  92. * @return a decoder
  93. */
  94. static already_AddRefed<nsIUnicodeDecoder>
  95. DecoderForEncoding(const nsACString& aEncoding);
  96. /**
  97. * Instantiates an encoder for an encoding. The input must be a
  98. * Gecko-canonical encoding name.
  99. * @param aEncoding a Gecko-canonical encoding name
  100. * @return an encoder
  101. */
  102. static already_AddRefed<nsIUnicodeEncoder>
  103. EncoderForEncoding(const char* aEncoding)
  104. {
  105. nsDependentCString encoding(aEncoding);
  106. return EncoderForEncoding(encoding);
  107. }
  108. /**
  109. * Instantiates an encoder for an encoding. The input must be a
  110. * Gecko-canonical encoding name.
  111. * @param aEncoding a Gecko-canonical encoding name
  112. * @return an encoder
  113. */
  114. static already_AddRefed<nsIUnicodeEncoder>
  115. EncoderForEncoding(const nsACString& aEncoding);
  116. /**
  117. * Finds a Gecko language group string (e.g. x-western) for a Gecko-canonical
  118. * encoding name.
  119. *
  120. * @param aEncoding, incoming label describing charset to be decoded.
  121. * @param aOutGroup, returning corresponding language group.
  122. */
  123. static void LangGroupForEncoding(const nsACString& aEncoding,
  124. nsACString& aOutGroup);
  125. private:
  126. EncodingUtils() = delete;
  127. };
  128. } // namespace dom
  129. } // namespace mozilla
  130. #endif // mozilla_dom_encodingutils_h_