unisyn.cpp 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. #include <sekai/UnisynIndex.h>
  2. #include <sekai/MBRSynth.h>
  3. #include <sndfile.h>
  4. #include <assert.h>
  5. #include <string>
  6. #include <fstream>
  7. #include <iostream>
  8. #include <sstream>
  9. #include <boost/algorithm/string/classification.hpp>
  10. #include <boost/algorithm/string/split.hpp>
  11. #include <boost/lexical_cast.hpp>
  12. //must not be hardcoded
  13. std::string voicename = "krb";
  14. std::string voicedir = "/home/isengaara/Hacking/Audio/VoiceSynth/FESTIVAL/festival-czech-voices/voice-czech-"+voicename+"/";
  15. std::string indexfile = "dic/"+voicename+"diph.est";
  16. std::string wavfile = voicename+".wav";
  17. //std::string phofile = "/home/isengaara/Hacking/Audio/VoiceSynth/MBROLA/MBROLASing/Ukazky/Komplet/Prodanka_1-0_2-0.pho";
  18. std::string phofile = "/home/isengaara/Hacking/Audio/VoiceSynth/MBROLA/MBROLASing/Ukazky/Komplet/Prodanka_3-0_4-0.pho";
  19. struct phone
  20. {
  21. std::string name;
  22. float length;
  23. float start;
  24. };
  25. struct diphone
  26. {
  27. std::string basename;
  28. };
  29. std::vector<phone> phones;
  30. UnisynIndex* voiceindex=nullptr;
  31. std::vector<diphone> diphones;
  32. ControlTrack* ctrl=nullptr;
  33. std::string subst(std::string pho)
  34. {
  35. if(pho=="x") return "ch";
  36. if(pho=="D") return "d";
  37. if(pho=="N") return "n";
  38. if(pho=="Z") return "z";
  39. return pho;
  40. }
  41. void readPho(std::string fileName) {
  42. std::ifstream infile(fileName);
  43. float pos=0;
  44. std::string line;
  45. bool firstPitch=true;
  46. float pitchFrq=0;
  47. ctrl = new ControlTrack();
  48. float last_pos2=0;
  49. while (std::getline(infile, line)) {
  50. if(line.length())
  51. {
  52. std::vector<std::string> spl;
  53. boost::split(spl, line, boost::is_any_of("\t "),
  54. boost::token_compress_on);
  55. std::string pho = spl[0];
  56. printf("spl[1]=%s\n",spl[1].c_str());
  57. float length = boost::lexical_cast<float>(spl[1])/1000;
  58. phone p;
  59. p.start = pos;
  60. p.length = length;
  61. p.name = pho;
  62. phones.push_back(p);
  63. for(uint i=2;i<spl.size();i+=2)
  64. {
  65. if(spl[i].size()==0) {
  66. printf("error 1");
  67. break;
  68. }
  69. if(spl[i+1].size()==0) {
  70. printf("error 2");
  71. break;
  72. }
  73. printf("spl[%i]=%s\n",i,spl[i].c_str());
  74. printf("spl[%i]=%s\n",i+1,spl[i+1].c_str());
  75. float percent = boost::lexical_cast<float>(spl[i]);
  76. float frq = boost::lexical_cast<float>(spl[i+1]);
  77. if(firstPitch)
  78. {
  79. ctrl->addPoint(0,frq);
  80. firstPitch=false;
  81. }
  82. else
  83. {
  84. pitchFrq=frq;
  85. }
  86. float pos2=(pos+length*percent/100);
  87. ctrl->addPoint(pos2,frq);
  88. if(last_pos2==pos2)
  89. {
  90. float pos3=pos2+0.00000005;
  91. // printf("pos2=%f pos3=%f\n",pos2,pos3);
  92. ctrl->addPoint(pos3,frq);
  93. }
  94. else
  95. {
  96. // printf("pos2=%f\n",pos2);
  97. ctrl->addPoint(pos,frq);
  98. }
  99. last_pos2=pos;
  100. }
  101. pos+=length;
  102. }
  103. }
  104. ctrl->addPoint(pos,pitchFrq);
  105. ctrl->addPoint(pos+1.0,pitchFrq);
  106. ctrl->fix();
  107. }
  108. //params needed:
  109. // voicepath
  110. // synthExt
  111. // synthDefParams(mbr)
  112. void generate(MBRSynth *synth)
  113. {
  114. float end = 0;
  115. int sz = phones.size();
  116. printf("size=%i\n",sz);
  117. assert(sz>2);
  118. for(uint i=0;i<phones.size()-1;i++)
  119. {
  120. printf("i=%i\n",i);
  121. phone p0 = phones[i];
  122. phone p1 = phones[i+1];
  123. std::string diph = subst(p0.name)+"-"+subst(p1.name);
  124. auto found = voiceindex->getPho(diph);
  125. if(found.basename.size()> 0)
  126. {
  127. printf("found\n");
  128. //printf("%s -> %s\n",diph.c_str(),found.basename.c_str());
  129. diphone d;
  130. d.basename = found.basename;
  131. diphones.push_back(d);
  132. //printf(" pho: %f %f %f\n",p0.start,p1.start,p1.start+p1.length);
  133. //printf(" vox: %f %f %f\n",found.alignment[0],found.alignment[1],found.alignment[2]);
  134. float x[3] = {p0.start,p1.start,p1.start+p1.length};
  135. if(end)
  136. {
  137. float length = x[2]-x[1];
  138. float concat = length/2;
  139. //TODO: lookup concat somewhere
  140. if(concat>0.050) concat=0.050;
  141. printf("concat %f %s\n",concat,diph.c_str());
  142. assert(concat<length);
  143. x[1] = x[2] - concat;
  144. }
  145. end = x[3];
  146. float y[3] = {found.alignment[0],found.alignment[1],found.alignment[2]};
  147. synth->addUnit(found.basename,3,x,y);
  148. //std::string ogg = voicedir + "ogg/"+found.basename+".ogg";
  149. //printf("%s\n",ogg.c_str());
  150. //TODO:: meta info to emit
  151. }
  152. else
  153. {
  154. printf("not found\n");
  155. end=0;
  156. }
  157. }
  158. }
  159. int main()
  160. {
  161. printf("readpho\n");
  162. readPho(phofile);
  163. printf("create unisyn index\n");
  164. voiceindex = new UnisynIndex();
  165. voiceindex->readFromFile(voicedir+indexfile);
  166. printf("create synth\n");
  167. MBRConfig config;
  168. config.type=synthType::FVOX;
  169. config.mbr_period=0;
  170. config.frame_period=0;
  171. config.fft_size=0;
  172. MBRSynth *synth = new MBRSynth(ctrl);
  173. synth->setBasedir(voicedir);
  174. synth->setConfig(&config);
  175. //TODO: load ogg files / shared for UTAU and Unisyn
  176. printf("generate\n");
  177. generate(synth);
  178. printf("run\n");
  179. SF_INFO info = {0};
  180. info.samplerate = 32000;
  181. info.channels = 1;
  182. info.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16;
  183. SNDFILE *sf = sf_open(wavfile.c_str(), SFM_WRITE, &info);
  184. while (1) {
  185. const int size = 1024;
  186. int fill = size * 4;
  187. float buffer_out[size];
  188. if (synth->readData(buffer_out, size, fill) == false) break;
  189. sf_write_float(sf, buffer_out, size);
  190. }
  191. sf_close(sf);
  192. }