nsCyrillicDetector.h 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #ifndef nsCyrillicDetector_h__
  6. #define nsCyrillicDetector_h__
  7. #include "nsCyrillicClass.h"
  8. // {2002F781-3960-11d3-B3C3-00805F8A6670}
  9. #define NS_RU_PROBDETECTOR_CID \
  10. { 0x2002f781, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
  11. // {2002F782-3960-11d3-B3C3-00805F8A6670}
  12. #define NS_UK_PROBDETECTOR_CID \
  13. { 0x2002f782, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
  14. // {2002F783-3960-11d3-B3C3-00805F8A6670}
  15. #define NS_RU_STRING_PROBDETECTOR_CID \
  16. { 0x2002f783, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
  17. // {2002F784-3960-11d3-B3C3-00805F8A6670}
  18. #define NS_UK_STRING_PROBDETECTOR_CID \
  19. { 0x2002f784, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
  20. static const uint8_t *gCyrillicCls[5] =
  21. {
  22. CP1251Map,
  23. KOI8Map,
  24. ISO88595Map,
  25. MacCyrillicMap,
  26. IBM866Map
  27. };
  28. static const char * gRussian[5] = {
  29. "windows-1251",
  30. "KOI8-R",
  31. "ISO-8859-5",
  32. "x-mac-cyrillic",
  33. "IBM866"
  34. };
  35. static const char * gUkrainian[5] = {
  36. "windows-1251",
  37. "KOI8-U",
  38. "ISO-8859-5",
  39. "x-mac-cyrillic",
  40. "IBM866"
  41. };
  42. #define NUM_CYR_CHARSET 5
  43. class nsCyrillicDetector
  44. {
  45. public:
  46. nsCyrillicDetector(uint8_t aItems,
  47. const uint8_t ** aCyrillicClass,
  48. const char **aCharsets) {
  49. mItems = aItems;
  50. mCyrillicClass = aCyrillicClass;
  51. mCharsets = aCharsets;
  52. for(unsigned i=0;i<mItems;i++)
  53. mProb[i] = mLastCls[i] =0;
  54. mDone = false;
  55. }
  56. virtual ~nsCyrillicDetector() {}
  57. virtual void HandleData(const char* aBuf, uint32_t aLen);
  58. virtual void DataEnd();
  59. protected:
  60. virtual void Report(const char* aCharset) = 0;
  61. bool mDone;
  62. private:
  63. uint8_t mItems;
  64. const uint8_t ** mCyrillicClass;
  65. const char** mCharsets;
  66. uint32_t mProb[NUM_CYR_CHARSET];
  67. uint8_t mLastCls[NUM_CYR_CHARSET];
  68. };
  69. class nsCyrXPCOMDetector :
  70. public nsCyrillicDetector,
  71. public nsICharsetDetector
  72. {
  73. public:
  74. // nsISupports interface
  75. NS_DECL_ISUPPORTS
  76. nsCyrXPCOMDetector(uint8_t aItems,
  77. const uint8_t ** aCyrillicClass,
  78. const char **aCharsets);
  79. NS_IMETHOD Init(nsICharsetDetectionObserver* aObserver) override;
  80. NS_IMETHOD DoIt(const char* aBuf, uint32_t aLen, bool *oDontFeedMe) override;
  81. NS_IMETHOD Done() override;
  82. protected:
  83. virtual ~nsCyrXPCOMDetector();
  84. virtual void Report(const char* aCharset) override;
  85. private:
  86. nsCOMPtr<nsICharsetDetectionObserver> mObserver;
  87. };
  88. class nsCyrXPCOMStringDetector :
  89. public nsCyrillicDetector,
  90. public nsIStringCharsetDetector
  91. {
  92. public:
  93. // nsISupports interface
  94. NS_DECL_ISUPPORTS
  95. nsCyrXPCOMStringDetector(uint8_t aItems,
  96. const uint8_t ** aCyrillicClass,
  97. const char **aCharsets);
  98. NS_IMETHOD DoIt(const char* aBuf, uint32_t aLen,
  99. const char** oCharset, nsDetectionConfident &oConf) override;
  100. protected:
  101. virtual ~nsCyrXPCOMStringDetector();
  102. virtual void Report(const char* aCharset) override;
  103. private:
  104. nsCOMPtr<nsICharsetDetectionObserver> mObserver;
  105. const char* mResult;
  106. };
  107. class nsRUProbDetector : public nsCyrXPCOMDetector
  108. {
  109. public:
  110. nsRUProbDetector()
  111. : nsCyrXPCOMDetector(5, gCyrillicCls, gRussian) {}
  112. };
  113. class nsRUStringProbDetector : public nsCyrXPCOMStringDetector
  114. {
  115. public:
  116. nsRUStringProbDetector()
  117. : nsCyrXPCOMStringDetector(5, gCyrillicCls, gRussian) {}
  118. };
  119. class nsUKProbDetector : public nsCyrXPCOMDetector
  120. {
  121. public:
  122. nsUKProbDetector()
  123. : nsCyrXPCOMDetector(5, gCyrillicCls, gUkrainian) {}
  124. };
  125. class nsUKStringProbDetector : public nsCyrXPCOMStringDetector
  126. {
  127. public:
  128. nsUKStringProbDetector()
  129. : nsCyrXPCOMStringDetector(5, gCyrillicCls, gUkrainian) {}
  130. };
  131. #endif