1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- #include "nsSemanticUnitScanner.h"
- NS_IMPL_ISUPPORTS_INHERITED(nsSemanticUnitScanner, nsSampleWordBreaker, nsISemanticUnitScanner)
- nsSemanticUnitScanner::nsSemanticUnitScanner() : nsSampleWordBreaker()
- {
- /* member initializers and constructor code */
- }
- nsSemanticUnitScanner::~nsSemanticUnitScanner()
- {
- /* destructor code */
- }
- NS_IMETHODIMP nsSemanticUnitScanner::Start(const char *characterSet)
- {
- // do nothing for now.
- return NS_OK;
- }
- NS_IMETHODIMP nsSemanticUnitScanner::Next(const char16_t *text, int32_t length, int32_t pos, bool isLastBuffer, int32_t *begin, int32_t *end, bool *_retval)
- {
- // xxx need to bullet proff and check input pointer
- // make sure begin, end and _retval is not nullptr here
- // if we reach the end, just return
- if (pos >= length) {
- *begin = pos;
- *end = pos;
- *_retval = false;
- return NS_OK;
- }
- uint8_t char_class = nsSampleWordBreaker::GetClass(text[pos]);
- // if we are in chinese mode, return one han letter at a time
- // we should not do this if we are in Japanese or Korean mode
- if (kWbClassHanLetter == char_class) {
- *begin = pos;
- *end = pos+1;
- *_retval = true;
- return NS_OK;
- }
- int32_t next;
- // find the next "word"
- next = NextWord(text, (uint32_t) length, (uint32_t) pos);
- // if we don't have enough text to make decision, return
- if (next == NS_WORDBREAKER_NEED_MORE_TEXT) {
- *begin = pos;
- *end = isLastBuffer ? length : pos;
- *_retval = isLastBuffer;
- return NS_OK;
- }
-
- // if what we got is space or punct, look at the next break
- if ((char_class == kWbClassSpace) || (char_class == kWbClassPunct)) {
- // if the next "word" is not letters,
- // call itself recursively with the new pos
- return Next(text, length, next, isLastBuffer, begin, end, _retval);
- }
- // for the rest, return
- *begin = pos;
- *end = next;
- *_retval = true;
- return NS_OK;
- }
|