nsCyrillicDetector.cpp 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #include "nscore.h"
  6. #include "nsCyrillicProb.h"
  7. #include <stdio.h>
  8. #include "nsCOMPtr.h"
  9. #include "nsISupports.h"
  10. #include "nsICharsetDetector.h"
  11. #include "nsICharsetDetectionObserver.h"
  12. #include "nsIStringCharsetDetector.h"
  13. #include "nsCyrillicDetector.h"
  14. //----------------------------------------------------------------------
  15. // Interface nsISupports [implementation]
  16. NS_IMPL_ISUPPORTS(nsCyrXPCOMDetector, nsICharsetDetector)
  17. NS_IMPL_ISUPPORTS(nsCyrXPCOMStringDetector, nsIStringCharsetDetector)
  18. void nsCyrillicDetector::HandleData(const char* aBuf, uint32_t aLen)
  19. {
  20. uint8_t cls;
  21. const char* b;
  22. uint32_t i;
  23. if(mDone)
  24. return;
  25. for(i=0, b=aBuf;i<aLen;i++,b++)
  26. {
  27. for(unsigned j=0;j<mItems;j++)
  28. {
  29. if( 0x80 & *b)
  30. cls = mCyrillicClass[j][(*b) & 0x7F];
  31. else
  32. cls = 0;
  33. NS_ASSERTION( cls <= 32 , "illegal character class");
  34. mProb[j] += gCyrillicProb[mLastCls[j]][cls];
  35. mLastCls[j] = cls;
  36. }
  37. }
  38. // We now only based on the first block we receive
  39. DataEnd();
  40. }
  41. //---------------------------------------------------------------------
  42. #define THRESHOLD_RATIO 1.5f
  43. void nsCyrillicDetector::DataEnd()
  44. {
  45. uint32_t max=0;
  46. uint8_t maxIdx=0;
  47. uint8_t j;
  48. if(mDone)
  49. return;
  50. for(j=0;j<mItems;j++) {
  51. if(mProb[j] > max)
  52. {
  53. max = mProb[j];
  54. maxIdx= j;
  55. }
  56. }
  57. if( 0 == max ) // if we didn't get any 8 bits data
  58. return;
  59. #ifdef DEBUG
  60. for(j=0;j<mItems;j++)
  61. printf("Charset %s->\t%d\n", mCharsets[j], mProb[j]);
  62. #endif
  63. this->Report(mCharsets[maxIdx]);
  64. mDone = true;
  65. }
  66. //---------------------------------------------------------------------
  67. nsCyrXPCOMDetector:: nsCyrXPCOMDetector(uint8_t aItems,
  68. const uint8_t ** aCyrillicClass,
  69. const char **aCharsets)
  70. : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
  71. {
  72. mObserver = nullptr;
  73. }
  74. //---------------------------------------------------------------------
  75. nsCyrXPCOMDetector::~nsCyrXPCOMDetector()
  76. {
  77. }
  78. //---------------------------------------------------------------------
  79. NS_IMETHODIMP nsCyrXPCOMDetector::Init(
  80. nsICharsetDetectionObserver* aObserver)
  81. {
  82. NS_ASSERTION(mObserver == nullptr , "Init twice");
  83. if(nullptr == aObserver)
  84. return NS_ERROR_ILLEGAL_VALUE;
  85. mObserver = aObserver;
  86. return NS_OK;
  87. }
  88. //----------------------------------------------------------
  89. NS_IMETHODIMP nsCyrXPCOMDetector::DoIt(
  90. const char* aBuf, uint32_t aLen, bool* oDontFeedMe)
  91. {
  92. NS_ASSERTION(mObserver != nullptr , "have not init yet");
  93. if((nullptr == aBuf) || (nullptr == oDontFeedMe))
  94. return NS_ERROR_ILLEGAL_VALUE;
  95. this->HandleData(aBuf, aLen);
  96. *oDontFeedMe = false;
  97. return NS_OK;
  98. }
  99. //----------------------------------------------------------
  100. NS_IMETHODIMP nsCyrXPCOMDetector::Done()
  101. {
  102. NS_ASSERTION(mObserver != nullptr , "have not init yet");
  103. this->DataEnd();
  104. return NS_OK;
  105. }
  106. //----------------------------------------------------------
  107. void nsCyrXPCOMDetector::Report(const char* aCharset)
  108. {
  109. NS_ASSERTION(mObserver != nullptr , "have not init yet");
  110. mObserver->Notify(aCharset, eBestAnswer);
  111. }
  112. //---------------------------------------------------------------------
  113. nsCyrXPCOMStringDetector:: nsCyrXPCOMStringDetector(uint8_t aItems,
  114. const uint8_t ** aCyrillicClass,
  115. const char **aCharsets)
  116. : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
  117. {
  118. }
  119. //---------------------------------------------------------------------
  120. nsCyrXPCOMStringDetector::~nsCyrXPCOMStringDetector()
  121. {
  122. }
  123. //---------------------------------------------------------------------
  124. void nsCyrXPCOMStringDetector::Report(const char *aCharset)
  125. {
  126. mResult = aCharset;
  127. }
  128. //---------------------------------------------------------------------
  129. NS_IMETHODIMP nsCyrXPCOMStringDetector::DoIt(const char* aBuf, uint32_t aLen,
  130. const char** oCharset, nsDetectionConfident &oConf)
  131. {
  132. mResult = nullptr;
  133. mDone = false;
  134. this->HandleData(aBuf, aLen);
  135. this->DataEnd();
  136. *oCharset=mResult;
  137. oConf = eBestAnswer;
  138. return NS_OK;
  139. }