uscript.cpp 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 1997-2014, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. *
  9. * File USCRIPT.C
  10. *
  11. * Modification History:
  12. *
  13. * Date Name Description
  14. * 07/06/2001 Ram Creation.
  15. ******************************************************************************
  16. */
  17. #include "unicode/uchar.h"
  18. #include "unicode/uscript.h"
  19. #include "unicode/uloc.h"
  20. #include "bytesinkutil.h"
  21. #include "charstr.h"
  22. #include "cmemory.h"
  23. #include "cstring.h"
  24. #include "ulocimp.h"
  25. static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
  26. static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
  27. static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
  28. static int32_t
  29. setCodes(const UScriptCode *src, int32_t length,
  30. UScriptCode *dest, int32_t capacity, UErrorCode *err) {
  31. int32_t i;
  32. if(U_FAILURE(*err)) { return 0; }
  33. if(length > capacity) {
  34. *err = U_BUFFER_OVERFLOW_ERROR;
  35. return length;
  36. }
  37. for(i = 0; i < length; ++i) {
  38. dest[i] = src[i];
  39. }
  40. return length;
  41. }
  42. static int32_t
  43. setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
  44. if(U_FAILURE(*err)) { return 0; }
  45. if(1 > capacity) {
  46. *err = U_BUFFER_OVERFLOW_ERROR;
  47. return 1;
  48. }
  49. scripts[0] = script;
  50. return 1;
  51. }
  52. static int32_t
  53. getCodesFromLocale(const char *locale,
  54. UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
  55. UErrorCode internalErrorCode = U_ZERO_ERROR;
  56. char lang[8] = {0};
  57. char script[8] = {0};
  58. int32_t scriptLength;
  59. if(U_FAILURE(*err)) { return 0; }
  60. // Multi-script languages, equivalent to the LocaleScript data
  61. // that we used to load from locale resource bundles.
  62. /*length = */ uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode);
  63. if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
  64. return 0;
  65. }
  66. if(0 == uprv_strcmp(lang, "ja")) {
  67. return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err);
  68. }
  69. if(0 == uprv_strcmp(lang, "ko")) {
  70. return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err);
  71. }
  72. scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode);
  73. if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
  74. return 0;
  75. }
  76. if(0 == uprv_strcmp(lang, "zh") && 0 == uprv_strcmp(script, "Hant")) {
  77. return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err);
  78. }
  79. // Explicit script code.
  80. if(scriptLength != 0) {
  81. UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
  82. if(scriptCode != USCRIPT_INVALID_CODE) {
  83. if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) {
  84. scriptCode = USCRIPT_HAN;
  85. }
  86. return setOneCode(scriptCode, scripts, capacity, err);
  87. }
  88. }
  89. return 0;
  90. }
  91. /* TODO: this is a bad API and should be deprecated, ticket #11141 */
  92. U_CAPI int32_t U_EXPORT2
  93. uscript_getCode(const char* nameOrAbbrOrLocale,
  94. UScriptCode* fillIn,
  95. int32_t capacity,
  96. UErrorCode* err){
  97. UBool triedCode;
  98. UErrorCode internalErrorCode;
  99. int32_t length;
  100. if(U_FAILURE(*err)) {
  101. return 0;
  102. }
  103. if(nameOrAbbrOrLocale==nullptr ||
  104. (fillIn == nullptr ? capacity != 0 : capacity < 0)) {
  105. *err = U_ILLEGAL_ARGUMENT_ERROR;
  106. return 0;
  107. }
  108. triedCode = false;
  109. const char* lastSepPtr = uprv_strrchr(nameOrAbbrOrLocale, '-');
  110. if (lastSepPtr==nullptr) {
  111. lastSepPtr = uprv_strrchr(nameOrAbbrOrLocale, '_');
  112. }
  113. // Favor interpretation of nameOrAbbrOrLocale as a script alias if either
  114. // 1. nameOrAbbrOrLocale does not contain -/_. Handles Han, Mro, Nko, etc.
  115. // 2. The last instance of -/_ is at offset 3, and the portion after that is
  116. // longer than 4 characters (i.e. not a script or region code). This handles
  117. // Old_Hungarian, Old_Italic, etc. ("old" is a valid language code)
  118. // 3. The last instance of -/_ is at offset 7, and the portion after that is
  119. // 3 characters. This handles New_Tai_Lue ("new" is a valid language code).
  120. if (lastSepPtr==nullptr
  121. || (lastSepPtr-nameOrAbbrOrLocale == 3 && uprv_strlen(nameOrAbbrOrLocale) > 8)
  122. || (lastSepPtr-nameOrAbbrOrLocale == 7 && uprv_strlen(nameOrAbbrOrLocale) == 11) ) {
  123. /* try long and abbreviated script names first */
  124. UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
  125. if(code!=USCRIPT_INVALID_CODE) {
  126. return setOneCode(code, fillIn, capacity, err);
  127. }
  128. triedCode = true;
  129. }
  130. internalErrorCode = U_ZERO_ERROR;
  131. length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err);
  132. if(U_FAILURE(*err) || length != 0) {
  133. return length;
  134. }
  135. icu::CharString likely;
  136. {
  137. icu::CharStringByteSink sink(&likely);
  138. ulocimp_addLikelySubtags(nameOrAbbrOrLocale, sink, &internalErrorCode);
  139. }
  140. if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) {
  141. length = getCodesFromLocale(likely.data(), fillIn, capacity, err);
  142. if(U_FAILURE(*err) || length != 0) {
  143. return length;
  144. }
  145. }
  146. if(!triedCode) {
  147. /* still not found .. try long and abbreviated script names again */
  148. UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
  149. if(code!=USCRIPT_INVALID_CODE) {
  150. return setOneCode(code, fillIn, capacity, err);
  151. }
  152. }
  153. return 0;
  154. }