nsSemanticUnitScanner.cpp 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #include "nsSemanticUnitScanner.h"
  6. NS_IMPL_ISUPPORTS_INHERITED(nsSemanticUnitScanner, nsSampleWordBreaker, nsISemanticUnitScanner)
  7. nsSemanticUnitScanner::nsSemanticUnitScanner() : nsSampleWordBreaker()
  8. {
  9. /* member initializers and constructor code */
  10. }
  11. nsSemanticUnitScanner::~nsSemanticUnitScanner()
  12. {
  13. /* destructor code */
  14. }
  15. NS_IMETHODIMP nsSemanticUnitScanner::Start(const char *characterSet)
  16. {
  17. // do nothing for now.
  18. return NS_OK;
  19. }
  20. NS_IMETHODIMP nsSemanticUnitScanner::Next(const char16_t *text, int32_t length, int32_t pos, bool isLastBuffer, int32_t *begin, int32_t *end, bool *_retval)
  21. {
  22. // xxx need to bullet proff and check input pointer
  23. // make sure begin, end and _retval is not nullptr here
  24. // if we reach the end, just return
  25. if (pos >= length) {
  26. *begin = pos;
  27. *end = pos;
  28. *_retval = false;
  29. return NS_OK;
  30. }
  31. uint8_t char_class = nsSampleWordBreaker::GetClass(text[pos]);
  32. // if we are in chinese mode, return one han letter at a time
  33. // we should not do this if we are in Japanese or Korean mode
  34. if (kWbClassHanLetter == char_class) {
  35. *begin = pos;
  36. *end = pos+1;
  37. *_retval = true;
  38. return NS_OK;
  39. }
  40. int32_t next;
  41. // find the next "word"
  42. next = NextWord(text, (uint32_t) length, (uint32_t) pos);
  43. // if we don't have enough text to make decision, return
  44. if (next == NS_WORDBREAKER_NEED_MORE_TEXT) {
  45. *begin = pos;
  46. *end = isLastBuffer ? length : pos;
  47. *_retval = isLastBuffer;
  48. return NS_OK;
  49. }
  50. // if what we got is space or punct, look at the next break
  51. if ((char_class == kWbClassSpace) || (char_class == kWbClassPunct)) {
  52. // if the next "word" is not letters,
  53. // call itself recursively with the new pos
  54. return Next(text, length, next, isLastBuffer, begin, end, _retval);
  55. }
  56. // for the rest, return
  57. *begin = pos;
  58. *end = next;
  59. *_retval = true;
  60. return NS_OK;
  61. }