HtsEngine.cpp 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. /* ----------------------------------------------------------------- */
  2. /* The HMM-Based Singing Voice Synthesis System "Sinsy" */
  3. /* developed by Sinsy Working Group */
  4. /* http://sinsy.sourceforge.net/ */
  5. /* ----------------------------------------------------------------- */
  6. /* */
  7. /* Copyright (c) 2009-2015 Nagoya Institute of Technology */
  8. /* Department of Computer Science */
  9. /* */
  10. /* All rights reserved. */
  11. /* */
  12. /* Redistribution and use in source and binary forms, with or */
  13. /* without modification, are permitted provided that the following */
  14. /* conditions are met: */
  15. /* */
  16. /* - Redistributions of source code must retain the above copyright */
  17. /* notice, this list of conditions and the following disclaimer. */
  18. /* - Redistributions in binary form must reproduce the above */
  19. /* copyright notice, this list of conditions and the following */
  20. /* disclaimer in the documentation and/or other materials provided */
  21. /* with the distribution. */
  22. /* - Neither the name of the Sinsy working group nor the names of */
  23. /* its contributors may be used to endorse or promote products */
  24. /* derived from this software without specific prior written */
  25. /* permission. */
  26. /* */
  27. /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
  28. /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  29. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  30. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  31. /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
  32. /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
  33. /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
  34. /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
  35. /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
  36. /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
  37. /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
  38. /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  39. /* POSSIBILITY OF SUCH DAMAGE. */
  40. /* ----------------------------------------------------------------- */
  41. #ifdef HAVE_HTS
  42. #include <math.h>
  43. #include <string.h>
  44. #include <vector>
  45. #include <limits>
  46. #include <stdlib.h>
  47. #include <limits.h>
  48. #include "HtsEngine.h"
  49. #include "LabelStrings.h"
  50. #include "SynthConditionImpl.h"
  51. namespace sinsy
  52. {
  53. /*!
  54. constructor
  55. */
  56. HtsEngine::HtsEngine()
  57. {
  58. init();
  59. }
  60. /*!
  61. destructor
  62. */
  63. HtsEngine::~HtsEngine()
  64. {
  65. clear();
  66. }
  67. /*!
  68. initialize
  69. */
  70. void HtsEngine::init()
  71. {
  72. HTS_Engine_initialize(&engine);
  73. fperiod = 0;
  74. }
  75. /*!
  76. clear
  77. */
  78. void HtsEngine::clear()
  79. {
  80. HTS_Engine_clear(&engine);
  81. }
  82. /*!
  83. reset
  84. */
  85. void HtsEngine::reset()
  86. {
  87. clear();
  88. init();
  89. }
  90. /*!
  91. load voices
  92. */
  93. bool HtsEngine::load(const std::vector<std::string>& voices)
  94. {
  95. size_t i;
  96. char **fn_voices = NULL;
  97. // check
  98. if (voices.size() == 0)
  99. return false;
  100. // get HTS voice file names
  101. fn_voices = (char **) malloc(voices.size() * sizeof(char *));
  102. if (NULL == fn_voices) {
  103. throw std::bad_alloc();
  104. }
  105. for(i = 0; i < voices.size(); i++) {
  106. fn_voices[i] = strdup(voices[i].c_str());
  107. }
  108. // load HTS voices
  109. if(HTS_Engine_load(&engine, fn_voices, voices.size()) != TRUE) {
  110. HTS_Engine_clear(&engine);
  111. for(i = 0; i < voices.size(); i++) {
  112. free(fn_voices[i]);
  113. }
  114. free(fn_voices);
  115. return false;
  116. }
  117. for(i = 0; i < voices.size(); i++) {
  118. free(fn_voices[i]);
  119. }
  120. free(fn_voices);
  121. // save default frame period
  122. fperiod = HTS_Engine_get_fperiod(&engine);
  123. // set audio buffer size (100ms)
  124. HTS_Engine_set_audio_buff_size(&engine, (size_t) ((double) HTS_Engine_get_sampling_frequency(&engine) * 0.100));
  125. // phoneme alignment
  126. HTS_Engine_set_phoneme_alignment_flag(&engine, TRUE);
  127. return true;
  128. }
  129. /*!
  130. synthesize
  131. */
  132. bool HtsEngine::synthesize(const LabelStrings& label, SynthConditionImpl& condition)
  133. {
  134. // check
  135. if (HTS_Engine_get_nvoices(&engine) == 0 || label.size() == 0) {
  136. return false;
  137. }
  138. bool playFlag = condition.playFlag;
  139. bool saveFlag = !condition.saveFilePath.empty();
  140. bool storeFlag = (NULL != condition.waveformBuffer);
  141. // nothing to do
  142. if (!playFlag && !saveFlag && !storeFlag) {
  143. return true;
  144. }
  145. FILE* fp(NULL);
  146. if (saveFlag) {
  147. fp = fopen(condition.saveFilePath.c_str(), "wb");
  148. if (NULL == fp) {
  149. return false;
  150. }
  151. }
  152. size_t x = HTS_Engine_get_audio_buff_size(&engine);
  153. if (playFlag) {
  154. HTS_Engine_set_audio_buff_size(&engine, x); // reset audio device
  155. } else {
  156. HTS_Engine_set_audio_buff_size(&engine, 0);
  157. }
  158. int error = 0; // 0: no error 1: unknown error 2: bad alloc
  159. if(HTS_Engine_synthesize_from_strings(&engine, (char**) label.getData(), label.size()) != TRUE) {
  160. error = 1;
  161. }
  162. if (saveFlag) {
  163. if(0 == error)
  164. HTS_Engine_save_riff(&engine, fp);
  165. fclose(fp);
  166. }
  167. if (storeFlag && 0 == error) {
  168. if(condition.waveformBuffer) {
  169. size_t numSamples = HTS_Engine_get_nsamples(&engine);
  170. condition.waveformBuffer->resize(numSamples);
  171. for (size_t i = 0; i < numSamples; ++i)
  172. (*condition.waveformBuffer)[i] = HTS_Engine_get_generated_speech(&engine, i);
  173. }
  174. }
  175. HTS_Engine_set_audio_buff_size(&engine, x);
  176. HTS_Engine_refresh(&engine);
  177. if (2 == error) {
  178. throw std::bad_alloc();
  179. }
  180. return (0 == error);
  181. }
  182. /*!
  183. stop
  184. */
  185. void HtsEngine::stop()
  186. {
  187. HTS_Engine_set_stop_flag(&engine, true);
  188. }
  189. /*!
  190. reset stop flag
  191. */
  192. void HtsEngine::resetStopFlag()
  193. {
  194. HTS_Engine_set_stop_flag(&engine, false);
  195. }
  196. /*!
  197. set alpha
  198. */
  199. bool HtsEngine::setAlpha(double alpha)
  200. {
  201. if (0 == HTS_Engine_get_nvoices(&engine)) {
  202. return false;
  203. }
  204. HTS_Engine_set_alpha(&engine, alpha);
  205. return true;
  206. }
  207. /*!
  208. set tone
  209. */
  210. bool HtsEngine::setTone(double tone)
  211. {
  212. if (0 == HTS_Engine_get_nvoices(&engine)) {
  213. return false;
  214. }
  215. HTS_Engine_add_half_tone(&engine, tone);
  216. return true;
  217. }
  218. /*!
  219. set speed
  220. */
  221. bool HtsEngine::setSpeed(double speed)
  222. {
  223. if (0 == HTS_Engine_get_nvoices(&engine)) {
  224. return false;
  225. }
  226. if(0.0 == speed) {
  227. return false;
  228. }
  229. HTS_Engine_set_fperiod(&engine, static_cast<size_t>(fperiod / speed));
  230. return true;
  231. }
  232. /*!
  233. set volume
  234. */
  235. bool HtsEngine::setVolume(double volume)
  236. {
  237. if (0 == HTS_Engine_get_nvoices(&engine)) {
  238. return false;
  239. }
  240. HTS_Engine_set_volume(&engine, volume);
  241. return true;
  242. }
  243. /*!
  244. set interpolation weight
  245. */
  246. bool HtsEngine::setInterpolationWeight(size_t index, double weight)
  247. {
  248. if (0 == HTS_Engine_get_nvoices(&engine)) {
  249. return false;
  250. }
  251. if (HTS_Engine_get_nvoices(&engine) <= index) {
  252. return false;
  253. }
  254. HTS_Engine_set_duration_interpolation_weight(&engine, index, weight);
  255. for(size_t i = 0; i < HTS_Engine_get_nstream(&engine); i++) {
  256. HTS_Engine_set_parameter_interpolation_weight(&engine, index, i, weight);
  257. HTS_Engine_set_gv_interpolation_weight(&engine, index, i, weight);
  258. }
  259. return true;
  260. }
  261. }; // namespace sinsy
  262. #endif