1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889 |
- // © 2021 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- #ifndef LSTMBE_H
- #define LSTMBE_H
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_BREAK_ITERATION
- #include "unicode/uniset.h"
- #include "unicode/ures.h"
- #include "unicode/utext.h"
- #include "unicode/utypes.h"
- #include "brkeng.h"
- #include "dictbe.h"
- #include "uvectr32.h"
- U_NAMESPACE_BEGIN
- class Vectorizer;
- struct LSTMData;
- /*******************************************************************
- * LSTMBreakEngine
- */
- /**
- * <p>LSTMBreakEngine is a kind of DictionaryBreakEngine that uses a
- * LSTM to determine language-specific breaks.</p>
- *
- * <p>After it is constructed a LSTMBreakEngine may be shared between
- * threads without synchronization.</p>
- */
- class LSTMBreakEngine : public DictionaryBreakEngine {
- public:
- /**
- * <p>Constructor.</p>
- */
- LSTMBreakEngine(const LSTMData* data, const UnicodeSet& set, UErrorCode &status);
- /**
- * <p>Virtual destructor.</p>
- */
- virtual ~LSTMBreakEngine();
- virtual const char16_t* name() const;
- protected:
- /**
- * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
- *
- * @param text A UText representing the text
- * @param rangeStart The start of the range of dictionary characters
- * @param rangeEnd The end of the range of dictionary characters
- * @param foundBreaks Output of C array of int32_t break positions, or 0
- * @param status Information on any errors encountered.
- * @return The number of breaks found
- */
- virtual int32_t divideUpDictionaryRange(UText *text,
- int32_t rangeStart,
- int32_t rangeEnd,
- UVector32 &foundBreaks,
- UBool isPhraseBreaking,
- UErrorCode& status) const override;
- private:
- const LSTMData* fData;
- const Vectorizer* fVectorizer;
- };
- U_CAPI const LanguageBreakEngine* U_EXPORT2 CreateLSTMBreakEngine(
- UScriptCode script, const LSTMData* data, UErrorCode& status);
- U_CAPI const LSTMData* U_EXPORT2 CreateLSTMData(
- UResourceBundle* rb, UErrorCode& status);
- U_CAPI const LSTMData* U_EXPORT2 CreateLSTMDataForScript(
- UScriptCode script, UErrorCode& status);
- U_CAPI void U_EXPORT2 DeleteLSTMData(const LSTMData* data);
- U_CAPI const char16_t* U_EXPORT2 LSTMDataName(const LSTMData* data);
- U_NAMESPACE_END
- #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
- #endif /* LSTMBE_H */
|