JConf.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. /* ----------------------------------------------------------------- */
  2. /* The HMM-Based Singing Voice Synthesis System "Sinsy" */
  3. /* developed by Sinsy Working Group */
  4. /* http://sinsy.sourceforge.net/ */
  5. /* ----------------------------------------------------------------- */
  6. /* */
  7. /* Copyright (c) 2009-2015 Nagoya Institute of Technology */
  8. /* Department of Computer Science */
  9. /* */
  10. /* All rights reserved. */
  11. /* */
  12. /* Redistribution and use in source and binary forms, with or */
  13. /* without modification, are permitted provided that the following */
  14. /* conditions are met: */
  15. /* */
  16. /* - Redistributions of source code must retain the above copyright */
  17. /* notice, this list of conditions and the following disclaimer. */
  18. /* - Redistributions in binary form must reproduce the above */
  19. /* copyright notice, this list of conditions and the following */
  20. /* disclaimer in the documentation and/or other materials provided */
  21. /* with the distribution. */
  22. /* - Neither the name of the Sinsy working group nor the names of */
  23. /* its contributors may be used to endorse or promote products */
  24. /* derived from this software without specific prior written */
  25. /* permission. */
  26. /* */
  27. /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
  28. /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  29. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  30. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  31. /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
  32. /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
  33. /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
  34. /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
  35. /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
  36. /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
  37. /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
  38. /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  39. /* POSSIBILITY OF SUCH DAMAGE. */
  40. /* ----------------------------------------------------------------- */
  41. #include <stdexcept>
  42. #include <limits>
  43. #include <deque>
  44. #include <vector>
  45. #include <iterator>
  46. #include "util_log.h"
  47. #include "util_string.h"
  48. #include "util_converter.h"
  49. #include "StringTokenizer.h"
  50. #include "JConf.h"
  51. #include "Deleter.h"
  52. namespace sinsy
  53. {
  54. namespace
  55. {
  56. const std::string SIL_STR = "sil";
  57. const std::string SEPARATOR = ",";
  58. const std::string LANGUAGE_INFO = "JPN";
  59. const std::string MACRON = "MACRON";
  60. const std::string VOWEL_REDUCTION = "VOWEL_REDUCTION";
  61. const std::string PHONEME_CL = "PHONEME_CL";
  62. const std::string VOWELS = "VOWELS";
  63. const std::string MULTIBYTE_CHAR_RANGE = "MULTIBYTE_CHAR_RANGE";
  64. const size_t INVALID_IDX = std::numeric_limits<size_t>::max();
  65. const std::string DEFAULT_VOWELS = "a,i,u,e,o,N";
  66. const std::string PHONEME_SEPARATOR = ",";
  67. class PhonemeJudge
  68. {
  69. public:
  70. //! constructor
  71. PhonemeJudge(const std::string& v, const std::string& b) {
  72. {
  73. StringTokenizer st(v, PHONEME_SEPARATOR);
  74. size_t sz(st.size());
  75. for (size_t i(0); i < sz; ++i) {
  76. std::string phoneme(st.at(i));
  77. cutBlanks(phoneme);
  78. if (!phoneme.empty()) {
  79. this->vowels.insert(phoneme);
  80. }
  81. }
  82. }
  83. {
  84. StringTokenizer st(b, PHONEME_SEPARATOR);
  85. size_t sz(st.size());
  86. for (size_t i(0); i < sz; ++i) {
  87. std::string phoneme(st.at(i));
  88. cutBlanks(phoneme);
  89. if (!phoneme.empty()) {
  90. this->breaks.insert(phoneme);
  91. }
  92. }
  93. }
  94. }
  95. //! destructor
  96. virtual ~PhonemeJudge() {}
  97. //! return whether vowel or not
  98. const std::string& getType(const std::string& phoneme) const {
  99. if (vowels.end() != vowels.find(phoneme)) {
  100. return PhonemeInfo::TYPE_VOWEL;
  101. }
  102. if (breaks.end() != breaks.find(phoneme)) {
  103. return PhonemeInfo::TYPE_BREAK;
  104. }
  105. return PhonemeInfo::TYPE_CONSONANT;
  106. }
  107. private:
  108. //! copy constructor (donot use)
  109. PhonemeJudge(const PhonemeJudge&);
  110. //! assignment operator (donot use)
  111. PhonemeJudge& operator=(const PhonemeJudge&);
  112. //! vowels
  113. std::set<std::string> vowels;
  114. //! breaks such as /cl/
  115. std::set<std::string> breaks;
  116. };
  117. class InfoAdder
  118. {
  119. public:
  120. //! constructor
  121. InfoAdder(sinsy::IConvertable& c, const std::string& cl, const PhonemeJudge& pj) :
  122. convertable(c), clPhoneme(cl), phonemeJudge(pj), waiting(false), vowelReductionIdx(INVALID_IDX), scoreFlag(0), macronFlag(false) {
  123. }
  124. //! destructor
  125. virtual ~InfoAdder() {
  126. reflect();
  127. }
  128. //! set score flag
  129. void setScoreFlag(ScoreFlag f) {
  130. scoreFlag = f;
  131. }
  132. //! set macron flag
  133. void setMacronFlag(bool f) {
  134. macronFlag = f;
  135. }
  136. //! add syllable
  137. void addSyllable(const PhonemeTable::PhonemeList& p, bool vowelReductionFlag) {
  138. if (p.empty()) { // fail safe
  139. WARN_MSG("Cannot add Japanese syllable : no phonemes");
  140. return;
  141. }
  142. bool clFlag = ((1 == p.size()) && (clPhoneme == p[0])) ? true : false;
  143. if (clFlag) { // cl
  144. if (ptrList.empty()) { // first time
  145. ptrList.push_back(new PhonemeTable::PhonemeList(p));
  146. waiting = true;
  147. } else if (ptrList.back()->back() != clPhoneme) { // over second time, and not following cl
  148. ptrList.back()->push_back(clPhoneme);
  149. }
  150. } else { // not cl
  151. if (waiting) { // previous syllable has vowel reduction
  152. PhonemeTable::PhonemeList* prevPhonemes(ptrList.back());
  153. if (INVALID_IDX != vowelReductionIdx) {
  154. prevPhonemes->erase(prevPhonemes->begin() + vowelReductionIdx);
  155. }
  156. std::copy(p.begin(), p.end(), std::back_inserter(*prevPhonemes));
  157. waiting = false;
  158. vowelReductionIdx = INVALID_IDX;
  159. } else {
  160. ptrList.push_back(new PhonemeTable::PhonemeList(p));
  161. }
  162. }
  163. if (vowelReductionFlag) {
  164. if (1 == p.size()) { // vowels, N or cl
  165. WARN_MSG("Vowel reduction symbol was ignored : only one phoneme \"" << p[0] << "\"");
  166. } else {
  167. waiting = true;
  168. vowelReductionIdx = ptrList.back()->size() - 1; // last phoneme ( = vowel)
  169. }
  170. }
  171. }
  172. //! get phonemes of last syllable
  173. const PhonemeTable::PhonemeList* getLastPhonemes() const {
  174. if (ptrList.empty()) {
  175. return NULL;
  176. }
  177. return ptrList.back();
  178. }
  179. //! get phonemes of last syllable
  180. PhonemeTable::PhonemeList* getLastPhonemes() {
  181. if (ptrList.empty()) {
  182. return NULL;
  183. }
  184. return ptrList.back();
  185. }
  186. //! cancel vowel reduction of last syllable
  187. void cancelVowelReductionOfLastSyllable() {
  188. if (ptrList.empty()) {
  189. return;
  190. }
  191. if (waiting) { // previous syllable has vowel reduction
  192. waiting = false;
  193. vowelReductionIdx = INVALID_IDX;
  194. }
  195. }
  196. private:
  197. //! copy constructor (donot use)
  198. InfoAdder(const InfoAdder&);
  199. //! assignment operator (donot use)
  200. InfoAdder& operator=(const InfoAdder&);
  201. //! reflect to convertable
  202. void reflect() {
  203. if (ptrList.empty()) return;
  204. // last syllable has silent vowel
  205. if (waiting) {
  206. if (ptrList.size() <= 1) {
  207. WARN_MSG("Syllable that has vowel reductions needs previous or next syllable");
  208. } else {
  209. PhonemeTable::PhonemeList* lastPhonemes(ptrList.back());
  210. ptrList.pop_back();
  211. if (INVALID_IDX != vowelReductionIdx) {
  212. lastPhonemes->erase(lastPhonemes->begin() + vowelReductionIdx);
  213. }
  214. std::copy(lastPhonemes->begin(), lastPhonemes->end(), std::back_inserter(*(ptrList.back())));
  215. delete lastPhonemes;
  216. }
  217. waiting = false;
  218. vowelReductionIdx = INVALID_IDX;
  219. }
  220. // add
  221. {
  222. std::string info = (macronFlag) ? "1" : "0";
  223. std::string lastPhoneme;
  224. std::vector<PhonemeTable::PhonemeList*>::iterator itr(ptrList.begin());
  225. const std::vector<PhonemeTable::PhonemeList*>::iterator itrEnd(ptrList.end());
  226. for (; itrEnd != itr; ++itr) {
  227. PhonemeTable::PhonemeList& phonemes(**itr);
  228. // same vowel
  229. while (!phonemes.empty()) {
  230. if (phonemes[0] != lastPhoneme) {
  231. break;
  232. }
  233. phonemes.erase(phonemes.begin());
  234. }
  235. if (phonemes.empty()) {
  236. continue;
  237. }
  238. std::vector<PhonemeInfo> phonemeInfos;
  239. phonemeInfos.reserve(phonemes.size());
  240. const std::vector<std::string>::const_iterator iEnd(phonemes.end());
  241. for (std::vector<std::string>::const_iterator i(phonemes.begin()); iEnd != i; ++i) {
  242. const std::string& type(phonemeJudge.getType(*i));
  243. phonemeInfos.push_back(PhonemeInfo(type, *i, scoreFlag));
  244. }
  245. convertable.addInfo(phonemeInfos, LANGUAGE_INFO, info);
  246. if (PhonemeInfo::TYPE_VOWEL == phonemeJudge.getType(phonemes.back())) {
  247. lastPhoneme = phonemes.back();
  248. } else {
  249. lastPhoneme.clear();
  250. }
  251. }
  252. }
  253. // clear
  254. std::for_each(ptrList.begin(), ptrList.end(), Deleter<PhonemeTable::PhonemeList>());
  255. ptrList.clear();
  256. }
  257. //! target
  258. sinsy::IConvertable& convertable;
  259. //! phoneme of cl
  260. const std::string clPhoneme;
  261. //! phoneme type judge
  262. const PhonemeJudge& phonemeJudge;
  263. //! waiting flag
  264. bool waiting;
  265. //! index of vowel reduction
  266. size_t vowelReductionIdx;
  267. //! score flag
  268. ScoreFlag scoreFlag;
  269. //! macron flag
  270. bool macronFlag;
  271. //! phoneme list
  272. std::vector<PhonemeTable::PhonemeList*> ptrList;
  273. };
  274. /*!
  275. convert string of char code to char
  276. */
  277. bool str2char(const std::string& s, unsigned char& c)
  278. {
  279. int tmp(-1);
  280. if ((2 < s.size()) && ('0' == s[0]) && ('x' == s[1])) {
  281. std::string sub(s.substr(2));
  282. std::istringstream iss(sub);
  283. iss >> std::hex >> tmp;
  284. } else {
  285. std::istringstream iss(s);
  286. iss >> tmp;
  287. }
  288. if ((tmp < 0) || (std::numeric_limits<unsigned char>::max() < tmp)) {
  289. ERR_MSG("Config of multibyte char range is wrong format: " << s);
  290. return false;
  291. }
  292. c = static_cast<unsigned char>(tmp);
  293. return true;
  294. }
  295. /*!
  296. set multibyte char range from str to mRange
  297. */
  298. bool setMultibyteCharRange(MultibyteCharRange& mRange, const std::string& str)
  299. {
  300. bool ret(true);
  301. StringTokenizer st1(str, ";");
  302. size_t size1(st1.size());
  303. for (size_t i(0); i < size1; ++i) {
  304. StringTokenizer st2(st1.at(i), ",");
  305. size_t size2(st2.size());
  306. if (3 != size2) {
  307. ERR_MSG("Config of multibyte char range is wrong format: " << str);
  308. return false;
  309. }
  310. size_t size(0);
  311. unsigned char begin(0), end(0);
  312. {
  313. std::istringstream iss(st2.at(0));
  314. iss >> size;
  315. }
  316. if (!str2char(st2.at(1), begin)) {
  317. ret = false;
  318. continue;
  319. // don't return false here to set other ranges
  320. }
  321. if (!str2char(st2.at(2), end)) {
  322. ret = false;
  323. continue;
  324. // don't return false here to set other ranges
  325. }
  326. if (false == mRange.addRange(size, begin, end)) {
  327. ret = false;
  328. continue;
  329. // don't return false here to set other ranges
  330. }
  331. }
  332. return ret;
  333. }
  334. /*!
  335. expand prevInfoAdder to infoAdder
  336. */
  337. bool expand(InfoAdder& prevInfoAdder, InfoAdder& infoAdder, const MacronTable& macronTable, const std::string& clSymbol)
  338. {
  339. if (NULL != infoAdder.getLastPhonemes()) { // fail safe
  340. ERR_MSG("Dst InfoAdder is not empty (Source code is wrong)");
  341. return false;
  342. }
  343. PhonemeTable::PhonemeList* prevPhonemes(prevInfoAdder.getLastPhonemes());
  344. if (!prevPhonemes) {
  345. return false;
  346. }
  347. PhonemeTable::PhonemeList dst1;
  348. PhonemeTable::PhonemeList dst2;
  349. if (macronTable.divide(*prevPhonemes, dst1, dst2)) {
  350. *prevPhonemes = dst1;
  351. infoAdder.addSyllable(dst2, false);
  352. } else {
  353. if (clSymbol == prevPhonemes->back()) { // "cl"
  354. WARN_MSG("Macron cannot follow double consonant(cl)");
  355. // retry after erasing "cl" from phoneme list of previous InfoAdder
  356. prevPhonemes->pop_back();
  357. if (!expand(prevInfoAdder, infoAdder, macronTable, clSymbol)) {
  358. return false;
  359. }
  360. // add "cl"
  361. PhonemeTable::PhonemeList* pl(infoAdder.getLastPhonemes());
  362. if (NULL == pl) {
  363. return false;
  364. }
  365. pl->push_back(clSymbol);
  366. } else { // not "cl"
  367. dst2.push_back(prevPhonemes->back());
  368. infoAdder.addSyllable(dst2, false);
  369. }
  370. }
  371. return true;
  372. }
  373. };
  374. /*!
  375. constructor
  376. @param enc encoding strings (e.g. "utf_8, utf8, utf-8")
  377. */
  378. JConf::JConf(const std::string& enc)
  379. {
  380. StringTokenizer tokeizer(enc, SEPARATOR);
  381. size_t sz(tokeizer.size());
  382. for (size_t i(0); i < sz; ++i) {
  383. std::string e(tokeizer.at(i));
  384. cutBlanks(e);
  385. toLower(e);
  386. encodings.insert(e);
  387. }
  388. }
  389. /*!
  390. destructor
  391. */
  392. JConf::~JConf()
  393. {
  394. }
  395. /*!
  396. read phoneme table and config from files
  397. @param table phoneme table file path
  398. @param conf config file path
  399. @return true if success
  400. */
  401. bool JConf::read(const std::string& table, const std::string& conf, const std::string& macron)
  402. {
  403. if (!phonemeTable.read(table)) {
  404. ERR_MSG("Cannot read phoneme table file : " << table);
  405. return false;
  406. }
  407. if (!config.read(conf)) {
  408. ERR_MSG("Cannot read config file : " << conf);
  409. phonemeTable.clear();
  410. return false;
  411. }
  412. if (!macronTable.read(macron)) {
  413. ERR_MSG("Cannot read macron table file : " << macron);
  414. macronTable.clear();
  415. return false;
  416. }
  417. // set multibyte char ranges
  418. std::string strCharRange(config.get(MULTIBYTE_CHAR_RANGE));
  419. if (!setMultibyteCharRange(multibyteCharRange, strCharRange)) {
  420. ERR_MSG("Failed to set multibyte char ranges");
  421. return false;
  422. }
  423. return true;
  424. }
  425. /*!
  426. convert lyrics to phonemes
  427. */
  428. bool JConf::convert(const std::string& enc, ConvertableList::iterator begin, ConvertableList::iterator end) const
  429. {
  430. // check encoding
  431. if (!checkEncoding(enc)) {
  432. return true; // no relation
  433. }
  434. const std::string macronSymbol(config.get(MACRON));
  435. const std::string clSymbol(config.get(PHONEME_CL));
  436. const std::string vowelReductionSymbol(config.get(VOWEL_REDUCTION));
  437. std::string vowels(config.get(VOWELS));
  438. if (vowels.empty()) {
  439. vowels = DEFAULT_VOWELS;
  440. }
  441. PhonemeJudge phonemeJudge(vowels, clSymbol);
  442. std::vector<InfoAdder*> infoAdderList;
  443. for (ConvertableList::iterator itr(begin); itr != end; ++itr) {
  444. IConvertable& convertable(**itr);
  445. InfoAdder* infoAdder = new InfoAdder(**itr, clSymbol, phonemeJudge);
  446. std::string lyric(convertable.getLyric());
  447. ScoreFlag scoreFlag(analyzeScoreFlags(lyric, &multibyteCharRange));
  448. size_t pos = std::string::npos;
  449. infoAdder->setScoreFlag(scoreFlag);
  450. while (!lyric.empty()) {
  451. if (!vowelReductionSymbol.empty() && (0 == lyric.compare(0, vowelReductionSymbol.size(), vowelReductionSymbol))) { // vowel reduction
  452. WARN_MSG("Vowel reduction symbol appeared at the invalid place");
  453. lyric.erase(0, vowelReductionSymbol.size());
  454. } else if (0 == lyric.compare(0, macronSymbol.size(), macronSymbol)) { // macron
  455. if (NULL != infoAdder->getLastPhonemes()) {
  456. infoAdder->cancelVowelReductionOfLastSyllable();
  457. } else if (infoAdderList.empty()) {
  458. WARN_MSG("Macron have to follow another lyric");
  459. } else {
  460. expand(*(infoAdderList.back()), *infoAdder, macronTable, clSymbol);
  461. }
  462. infoAdder->setMacronFlag(true);
  463. lyric.erase(0, macronSymbol.size());
  464. } else { // others
  465. PhonemeTable::Result result(phonemeTable.find(lyric));
  466. if (!result.isValid()) {
  467. break;
  468. }
  469. lyric.erase(0, result.getMatchedLength());
  470. const PhonemeTable::PhonemeList* phonemes(result.getPhonemeList());
  471. // vowel reduction symbol
  472. bool vl = false;
  473. if (!vowelReductionSymbol.empty() && (0 == lyric.compare(0, vowelReductionSymbol.size(), vowelReductionSymbol))) { // vowel reduction
  474. vl = true;
  475. lyric.erase(0, vowelReductionSymbol.size());
  476. }
  477. // cl
  478. if (!clSymbol.empty() && (1 == phonemes->size()) && (clSymbol == (*phonemes)[0])) {
  479. if (NULL == infoAdder->getLastPhonemes()) { // first phoneme in this note
  480. std::string l(lyric);
  481. while (std::string::npos != (pos = l.find(vowelReductionSymbol))) { // erase vowel reduction symbols
  482. l.erase(pos, vowelReductionSymbol.size());
  483. }
  484. while (std::string::npos != (pos = l.find(macronSymbol))) { // erase macrons
  485. l.erase(pos, macronSymbol.size());
  486. }
  487. if (l.empty()) { // only cl
  488. if (infoAdderList.empty()) {
  489. WARN_MSG("If there is only a phoneme \"cl\" in a note, \"cl\" have to follow vowel");
  490. } else {
  491. expand(*(infoAdderList.back()), *infoAdder, macronTable, clSymbol);
  492. }
  493. }
  494. }
  495. }
  496. infoAdder->addSyllable(*phonemes, vl);
  497. }
  498. }
  499. infoAdderList.push_back(infoAdder);
  500. }
  501. // clear list of InfoAdder
  502. std::for_each(infoAdderList.begin(), infoAdderList.end(), Deleter<InfoAdder>());
  503. infoAdderList.clear();
  504. return true;
  505. }
  506. /*!
  507. get sil string
  508. return sil str
  509. */
  510. std::string JConf::getSilStr() const
  511. {
  512. return SIL_STR;
  513. }
  514. /*!
  515. check encoding
  516. */
  517. bool JConf::checkEncoding(const std::string& enc) const
  518. {
  519. std::string encoding(enc);
  520. toLower(encoding);
  521. Encodings::const_iterator itr(encodings.find(encoding));
  522. return encodings.end() != itr;
  523. }
  524. /*!
  525. get multibyte char range
  526. */
  527. const MultibyteCharRange& JConf::getMultibyteCharRange() const
  528. {
  529. return multibyteCharRange;
  530. }
  531. }; // namespace sinsy