123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- *******************************************************************************
- * Copyright (C) 1997-2015, International Business Machines Corporation and
- * others. All Rights Reserved.
- *******************************************************************************
- *
- * File brkiter.cpp
- *
- * Modification History:
- *
- * Date Name Description
- * 02/18/97 aliu Converted from OpenClass. Added DONE.
- * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
- *****************************************************************************************
- */
- // *****************************************************************************
- // This file was generated from the java source file BreakIterator.java
- // *****************************************************************************
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_BREAK_ITERATION
- #include "unicode/rbbi.h"
- #include "unicode/brkiter.h"
- #include "unicode/udata.h"
- #include "unicode/ures.h"
- #include "unicode/ustring.h"
- #include "unicode/filteredbrk.h"
- #include "bytesinkutil.h"
- #include "ucln_cmn.h"
- #include "cstring.h"
- #include "umutex.h"
- #include "servloc.h"
- #include "locbased.h"
- #include "uresimp.h"
- #include "uassert.h"
- #include "ubrkimpl.h"
- #include "utracimp.h"
- #include "charstr.h"
- // *****************************************************************************
- // class BreakIterator
- // This class implements methods for finding the location of boundaries in text.
- // Instances of BreakIterator maintain a current position and scan over text
- // returning the index of characters where boundaries occur.
- // *****************************************************************************
- U_NAMESPACE_BEGIN
- // -------------------------------------
- BreakIterator*
- BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status)
- {
- char fnbuff[256];
- char ext[4]={'\0'};
- CharString actualLocale;
- int32_t size;
- const char16_t* brkfname = nullptr;
- UResourceBundle brkRulesStack;
- UResourceBundle brkNameStack;
- UResourceBundle *brkRules = &brkRulesStack;
- UResourceBundle *brkName = &brkNameStack;
- RuleBasedBreakIterator *result = nullptr;
- if (U_FAILURE(status))
- return nullptr;
- ures_initStackObject(brkRules);
- ures_initStackObject(brkName);
- // Get the locale
- UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status);
- // Get the "boundaries" array.
- if (U_SUCCESS(status)) {
- brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
- // Get the string object naming the rules file
- brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
- // Get the actual string
- brkfname = ures_getString(brkName, &size, &status);
- U_ASSERT((size_t)size<sizeof(fnbuff));
- if ((size_t)size>=sizeof(fnbuff)) {
- size=0;
- if (U_SUCCESS(status)) {
- status = U_BUFFER_OVERFLOW_ERROR;
- }
- }
- // Use the string if we found it
- if (U_SUCCESS(status) && brkfname) {
- actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status);
- char16_t* extStart=u_strchr(brkfname, 0x002e);
- int len = 0;
- if (extStart != nullptr){
- len = (int)(extStart-brkfname);
- u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
- u_UCharsToChars(brkfname, fnbuff, len);
- }
- fnbuff[len]=0; // nul terminate
- }
- }
- ures_close(brkRules);
- ures_close(brkName);
- UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
- if (U_FAILURE(status)) {
- ures_close(b);
- return nullptr;
- }
- // Create a RuleBasedBreakIterator
- result = new RuleBasedBreakIterator(file, uprv_strstr(type, "phrase") != nullptr, status);
- // If there is a result, set the valid locale and actual locale, and the kind
- if (U_SUCCESS(status) && result != nullptr) {
- U_LOCALE_BASED(locBased, *(BreakIterator*)result);
- locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
- actualLocale.data());
- }
- ures_close(b);
- if (U_FAILURE(status) && result != nullptr) { // Sometimes redundant check, but simple
- delete result;
- return nullptr;
- }
- if (result == nullptr) {
- udata_close(file);
- if (U_SUCCESS(status)) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
- }
- return result;
- }
- // Creates a break iterator for word breaks.
- BreakIterator* U_EXPORT2
- BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
- {
- return createInstance(key, UBRK_WORD, status);
- }
- // -------------------------------------
- // Creates a break iterator for line breaks.
- BreakIterator* U_EXPORT2
- BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
- {
- return createInstance(key, UBRK_LINE, status);
- }
- // -------------------------------------
- // Creates a break iterator for character breaks.
- BreakIterator* U_EXPORT2
- BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status)
- {
- return createInstance(key, UBRK_CHARACTER, status);
- }
- // -------------------------------------
- // Creates a break iterator for sentence breaks.
- BreakIterator* U_EXPORT2
- BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status)
- {
- return createInstance(key, UBRK_SENTENCE, status);
- }
- // -------------------------------------
- // Creates a break iterator for title casing breaks.
- BreakIterator* U_EXPORT2
- BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
- {
- return createInstance(key, UBRK_TITLE, status);
- }
- // -------------------------------------
- // Gets all the available locales that has localized text boundary data.
- const Locale* U_EXPORT2
- BreakIterator::getAvailableLocales(int32_t& count)
- {
- return Locale::getAvailableLocales(count);
- }
- // ------------------------------------------
- //
- // Constructors, destructor and assignment operator
- //
- //-------------------------------------------
- BreakIterator::BreakIterator()
- {
- *validLocale = *actualLocale = 0;
- }
- BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
- uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
- uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
- }
- BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
- if (this != &other) {
- uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
- uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
- }
- return *this;
- }
- BreakIterator::~BreakIterator()
- {
- }
- // ------------------------------------------
- //
- // Registration
- //
- //-------------------------------------------
- #if !UCONFIG_NO_SERVICE
- // -------------------------------------
- class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
- public:
- virtual ~ICUBreakIteratorFactory();
- protected:
- virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const override {
- return BreakIterator::makeInstance(loc, kind, status);
- }
- };
- ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {}
- // -------------------------------------
- class ICUBreakIteratorService : public ICULocaleService {
- public:
- ICUBreakIteratorService()
- : ICULocaleService(UNICODE_STRING("Break Iterator", 14))
- {
- UErrorCode status = U_ZERO_ERROR;
- registerFactory(new ICUBreakIteratorFactory(), status);
- }
- virtual ~ICUBreakIteratorService();
- virtual UObject* cloneInstance(UObject* instance) const override {
- return ((BreakIterator*)instance)->clone();
- }
- virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const override {
- LocaleKey& lkey = static_cast<LocaleKey&>(const_cast<ICUServiceKey&>(key));
- int32_t kind = lkey.kind();
- Locale loc;
- lkey.currentLocale(loc);
- return BreakIterator::makeInstance(loc, kind, status);
- }
- virtual UBool isDefault() const override {
- return countFactories() == 1;
- }
- };
- ICUBreakIteratorService::~ICUBreakIteratorService() {}
- // -------------------------------------
- // defined in ucln_cmn.h
- U_NAMESPACE_END
- static icu::UInitOnce gInitOnceBrkiter {};
- static icu::ICULocaleService* gService = nullptr;
- /**
- * Release all static memory held by breakiterator.
- */
- U_CDECL_BEGIN
- static UBool U_CALLCONV breakiterator_cleanup() {
- #if !UCONFIG_NO_SERVICE
- if (gService) {
- delete gService;
- gService = nullptr;
- }
- gInitOnceBrkiter.reset();
- #endif
- return true;
- }
- U_CDECL_END
- U_NAMESPACE_BEGIN
- static void U_CALLCONV
- initService() {
- gService = new ICUBreakIteratorService();
- ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup);
- }
- static ICULocaleService*
- getService()
- {
- umtx_initOnce(gInitOnceBrkiter, &initService);
- return gService;
- }
- // -------------------------------------
- static inline UBool
- hasService()
- {
- return !gInitOnceBrkiter.isReset() && getService() != nullptr;
- }
- // -------------------------------------
- URegistryKey U_EXPORT2
- BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
- {
- ICULocaleService *service = getService();
- if (service == nullptr) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- return service->registerInstance(toAdopt, locale, kind, status);
- }
- // -------------------------------------
- UBool U_EXPORT2
- BreakIterator::unregister(URegistryKey key, UErrorCode& status)
- {
- if (U_SUCCESS(status)) {
- if (hasService()) {
- return gService->unregister(key, status);
- }
- status = U_MEMORY_ALLOCATION_ERROR;
- }
- return false;
- }
- // -------------------------------------
- StringEnumeration* U_EXPORT2
- BreakIterator::getAvailableLocales()
- {
- ICULocaleService *service = getService();
- if (service == nullptr) {
- return nullptr;
- }
- return service->getAvailableLocales();
- }
- #endif /* UCONFIG_NO_SERVICE */
- // -------------------------------------
- BreakIterator*
- BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status)
- {
- if (U_FAILURE(status)) {
- return nullptr;
- }
- #if !UCONFIG_NO_SERVICE
- if (hasService()) {
- Locale actualLoc("");
- BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status);
- // TODO: The way the service code works in ICU 2.8 is that if
- // there is a real registered break iterator, the actualLoc
- // will be populated, but if the handleDefault path is taken
- // (because nothing is registered that can handle the
- // requested locale) then the actualLoc comes back empty. In
- // that case, the returned object already has its actual/valid
- // locale data populated (by makeInstance, which is what
- // handleDefault calls), so we don't touch it. YES, A COMMENT
- // THIS LONG is a sign of bad code -- so the action item is to
- // revisit this in ICU 3.0 and clean it up/fix it/remove it.
- if (U_SUCCESS(status) && (result != nullptr) && *actualLoc.getName() != 0) {
- U_LOCALE_BASED(locBased, *result);
- locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
- }
- return result;
- }
- else
- #endif
- {
- return makeInstance(loc, kind, status);
- }
- }
- // -------------------------------------
- enum { kKeyValueLenMax = 32 };
- BreakIterator*
- BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
- {
- if (U_FAILURE(status)) {
- return nullptr;
- }
- BreakIterator *result = nullptr;
- switch (kind) {
- case UBRK_CHARACTER:
- {
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_CHARACTER);
- result = BreakIterator::buildInstance(loc, "grapheme", status);
- UTRACE_EXIT_STATUS(status);
- }
- break;
- case UBRK_WORD:
- {
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_WORD);
- result = BreakIterator::buildInstance(loc, "word", status);
- UTRACE_EXIT_STATUS(status);
- }
- break;
- case UBRK_LINE:
- {
- char lb_lw[kKeyValueLenMax];
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
- uprv_strcpy(lb_lw, "line");
- UErrorCode kvStatus = U_ZERO_ERROR;
- CharString value;
- CharStringByteSink valueSink(&value);
- loc.getKeywordValue("lb", valueSink, kvStatus);
- if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
- uprv_strcat(lb_lw, "_");
- uprv_strcat(lb_lw, value.data());
- }
- // lw=phrase is only supported in Japanese and Korean
- if (uprv_strcmp(loc.getLanguage(), "ja") == 0 || uprv_strcmp(loc.getLanguage(), "ko") == 0) {
- value.clear();
- loc.getKeywordValue("lw", valueSink, kvStatus);
- if (U_SUCCESS(kvStatus) && value == "phrase") {
- uprv_strcat(lb_lw, "_");
- uprv_strcat(lb_lw, value.data());
- }
- }
- result = BreakIterator::buildInstance(loc, lb_lw, status);
- UTRACE_DATA1(UTRACE_INFO, "lb_lw=%s", lb_lw);
- UTRACE_EXIT_STATUS(status);
- }
- break;
- case UBRK_SENTENCE:
- {
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_SENTENCE);
- result = BreakIterator::buildInstance(loc, "sentence", status);
- #if !UCONFIG_NO_FILTERED_BREAK_ITERATION
- char ssKeyValue[kKeyValueLenMax] = {0};
- UErrorCode kvStatus = U_ZERO_ERROR;
- int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus);
- if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) {
- FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus);
- if (U_SUCCESS(kvStatus)) {
- result = fbiBuilder->build(result, status);
- delete fbiBuilder;
- }
- }
- #endif
- UTRACE_EXIT_STATUS(status);
- }
- break;
- case UBRK_TITLE:
- {
- UTRACE_ENTRY(UTRACE_UBRK_CREATE_TITLE);
- result = BreakIterator::buildInstance(loc, "title", status);
- UTRACE_EXIT_STATUS(status);
- }
- break;
- default:
- status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- if (U_FAILURE(status)) {
- return nullptr;
- }
- return result;
- }
- Locale
- BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
- U_LOCALE_BASED(locBased, *this);
- return locBased.getLocale(type, status);
- }
- const char *
- BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
- U_LOCALE_BASED(locBased, *this);
- return locBased.getLocaleID(type, status);
- }
- // This implementation of getRuleStatus is a do-nothing stub, here to
- // provide a default implementation for any derived BreakIterator classes that
- // do not implement it themselves.
- int32_t BreakIterator::getRuleStatus() const {
- return 0;
- }
- // This implementation of getRuleStatusVec is a do-nothing stub, here to
- // provide a default implementation for any derived BreakIterator classes that
- // do not implement it themselves.
- int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return 0;
- }
- if (capacity < 1) {
- status = U_BUFFER_OVERFLOW_ERROR;
- return 1;
- }
- *fillInVec = 0;
- return 1;
- }
- BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
- U_LOCALE_BASED(locBased, (*this));
- locBased.setLocaleIDs(valid, actual);
- }
- U_NAMESPACE_END
- #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
- //eof
|