GloomyGhost
/
sinsy


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
							/* ----------------------------------------------------------------- */
/*           The HMM-Based Singing Voice Synthesis System "Sinsy"    */
/*           developed by Sinsy Working Group                        */
/*           http://sinsy.sourceforge.net/                           */
/* ----------------------------------------------------------------- */
/*                                                                   */
/*  Copyright (c) 2009-2015  Nagoya Institute of Technology          */
/*                           Department of Computer Science          */
/*                                                                   */
/* All rights reserved.                                              */
/*                                                                   */
/* Redistribution and use in source and binary forms, with or        */
/* without modification, are permitted provided that the following   */
/* conditions are met:                                               */
/*                                                                   */
/* - Redistributions of source code must retain the above copyright  */
/*   notice, this list of conditions and the following disclaimer.   */
/* - Redistributions in binary form must reproduce the above         */
/*   copyright notice, this list of conditions and the following     */
/*   disclaimer in the documentation and/or other materials provided */
/*   with the distribution.                                          */
/* - Neither the name of the Sinsy working group nor the names of    */
/*   its contributors may be used to endorse or promote products     */
/*   derived from this software without specific prior written       */
/*   permission.                                                     */
/*                                                                   */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
/* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
/* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
/* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
/* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
/* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
/* POSSIBILITY OF SUCH DAMAGE.                                       */
/* ----------------------------------------------------------------- */

#include <string.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include "sinsy.h"

#include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h>
#include <espeak-ng/encoding.h>
#include <sndfile.h>
#include <samplerate.h>
#include <assert.h>
#include <ringbuffer.h>
#include <unistd.h>

using namespace std;

namespace
{
//const char* DEFAULT_LANGS = "fi";
};

void usage()
{
#ifdef HAVE_HTS
   std::cout << "The HMM-Based Singing Voice Syntheis System \"Sinsy\"" << std::endl;
#else
   std::cout << "The Formant-Based Singing Voice Syntheis System \"SinsyNG\"" << std::endl;
#endif
   std::cout << "Version 0.92 (http://sinsy.sourceforge.net/)" << std::endl;
   std::cout << "Copyright (C) 2009-2015 Nagoya Institute of Technology" << std::endl;
   std::cout << "All rights reserved." << std::endl;
   std::cout << "" << std::endl;
   #ifdef HAVE_HTS
   std::cout << "The HMM-Based Speech Synthesis Engine \"hts_engine API\"" << std::endl;
   std::cout << "Version 1.10 (http://hts-engine.sourceforge.net/)" << std::endl;
   std::cout << "Copyright (C) 2001-2015 Nagoya Institute of Technology" << std::endl;
   std::cout << "              2001-2008 Tokyo Institute of Technology" << std::endl;
   std::cout << "All rights reserved." << std::endl;
   std::cout << "" << std::endl;
   std::cout << "sinsy - The HMM-based singing voice synthesis system \"Sinsy\"" << std::endl;
   #else
   std::cout << "The eSpeak NG (Next Generation) Text-to-Speech Synthesis Engine" << std::endl;
   std::cout << "Copyright (C) 2005-2014 Jonathan Duddington" << std::endl;
   std::cout << "              2015-2017 Reece H. Dunn" << std::endl;
   #endif
   std::cout << "" << std::endl;
   std::cout << "  usage:" << std::endl;
   std::cout << "    sinsy [ options ] [ infile ]" << std::endl;
   std::cout << "  options:                                           [def]" << std::endl;
   
   #ifdef HAVE_HTS
   std::cout << "    -w langs    : languages                          [  j]" << std::endl;
   std::cout << "                  j: Japanese                             " << std::endl;
   std::cout << "                  (Currently, you can set only Japanese)  " << std::endl;
   #else
   std::cout << "    -w langs    : languages                          [ en]" << std::endl;
   #endif
   
   #ifdef HAVE_HTS
   std::cout << "    -x dir      : dictionary directory               [N/A]" << std::endl;
   std::cout << "    -m htsvoice : HTS voice file                     [N/A]" << std::endl;
   #endif
   std::cout << "    -o file     : filename of output wav audio       [N/A]" << std::endl;
   std::cout << "  infile:" << std::endl;
   std::cout << "    MusicXML file" << std::endl;
}

static int SynthCallback(short *wav, int numsamples, espeak_EVENT *events)
{
	//dummy
	return 0;
}

class ECantorix : public sinsy::IScore
{
	int inputSamplerate=0;
	int outputSamplerate=0;
    bool inTie=false;
    size_t tieDuration=0;
    std::string tieLyrics;
    
    float* resample(float* data_in,int input_frames,float ratio,int *output_frames)
	{
		*output_frames = input_frames*ratio;
		float* ret = (float*)malloc(sizeof(float)*output_frames[0]);
		SRC_DATA data;
		data.data_in = data_in;
		data.data_out = ret;
		data.input_frames = input_frames;
		data.output_frames = *output_frames;
		data.src_ratio = ratio;
		src_simple(&data,SRC_SINC_BEST_QUALITY,1);
		return ret;
	}

    
public:
    ECantorix(){
    }
    virtual ~ECantorix() {
    }
    
    void init()
    {
        char *data_path = NULL; // use default path for espeak-ng-data
        espeak_ng_InitializePath(data_path);
        espeak_ng_ERROR_CONTEXT context = NULL;
        espeak_ng_STATUS result = espeak_ng_Initialize(&context);
        if (result != ENS_OK) {
            espeak_ng_PrintStatusCodeMessage(result, stderr, context);
            espeak_ng_ClearErrorContext(&context);
            exit(1);
        }
        sinsy_ng_Init();
        result = espeak_ng_InitializeOutput(ENOUTPUT_MODE_SYNCHRONOUS, 0, NULL);
		espeak_SetSynthCallback(SynthCallback);//dummy synth callback
        inputSamplerate = espeak_ng_GetSampleRate();
    }
    
    void setOutputSamplerate(int fs)
    {
      outputSamplerate = fs;
    }
    
    
    void setVoiceByName(const std::string& voicename)
    {
        espeak_ng_STATUS result = espeak_ng_SetVoiceByName(voicename.c_str());
        if (result != ENS_OK) {
             fprintf(stderr,"ESPEAK_ERROR voice %s not found\n",voicename.c_str());
             exit(1);
        }
    }

    //! set encoding
    virtual bool setEncoding(const std::string& encoding)
    {
        if(encoding=="utf-8") return true;
        fprintf(stderr,"setEncoding %s\n",encoding.c_str());
        return false;
    }

    //! add key mark
    virtual bool addKeyMark(sinsy::ModeType modeType, int fifths)
    {
        fprintf(stderr,"addKeyMark %i %i\n",(int)modeType,fifths);
        return true;
    }

    //! add beat mark (beats/beatType) to end of score: default beat mark is 4/4
    virtual bool addBeatMark(size_t beats, size_t beatType)
    {
        fprintf(stderr,"addBeatMark %i %i\n",(int)beats,(int)beatType);
        return true;
    }

    //! add tempo mark to end of score: default tempo is 100bps
    virtual bool addTempoMark(double tempo)
    {
        fprintf(stderr,"addTempoMark %f\n",tempo);
        return true;
    }

    //! add dynamics mark (sudden changes) to end of score
    virtual bool addSuddenDynamicsMark(sinsy::SuddenDynamicsType suddenDynamicsType)
    {
        fprintf(stderr,"addSuddenDynamicsMark %i\n",(int)suddenDynamicsType);
        return true;
    }

    //! add dynamics mark (gradual changes) to end of score
    virtual bool addGradualDynamicsMark(sinsy::GradualDynamicsType gradualDynamicsType)
    {
        fprintf(stderr,"addGradualDynamicsMark %i\n",(int)gradualDynamicsType);
        return true;
    }

    //! add note to end of score
    virtual bool addNote(size_t duration, const std::string& lyric, size_t pitch, bool accent, bool staccato, sinsy::TieType tieType, sinsy::SlurType slurType, sinsy::SyllabicType syllabicType, bool breath = false)
    {
        if(tieType==sinsy::TIETYPE_BEGIN) inTie=true;
        if(inTie) {
            tieLyrics+=lyric;
            tieDuration+=duration;
        }
        if(!inTie) sinsy_ng_addNote(duration,lyric.c_str(),pitch,accent,staccato,slurType,syllabicType,breath);
        if(tieType==sinsy::TIETYPE_END)
        {
            sinsy_ng_addNote(tieDuration,tieLyrics.c_str(),pitch,accent,staccato,slurType,syllabicType,breath);
            tieDuration=0;
            tieLyrics="";
            inTie=false;
        }
        //fprintf(stderr,"addNote %i [%s] pitch=%i accent=%i staccato=%i tieType=%i slurType=%i syllabicType=%i breath=%i\n",duration,lyric.c_str(),pitch,accent,staccato,tieType,slurType,syllabicType,breath);
        return true;
    }

    //! add rest to end of score
    virtual bool addRest(size_t duration)
    {
        if(inTie)
        {
            fprintf(stderr,"cannot tie rests\n");
            exit(1);
        }
        sinsy_ng_addRest(duration);
       // fprintf(stderr,"addRest %i\n",duration);
        return true;
    }
    
    void saveTo(std::string fileName)
    {
		
		int data_length = 0;
		float* data2=0;
		float* data = sinsy_ng_getAudioData(&data_length);
		int fs=inputSamplerate;
		if(outputSamplerate!=0 && outputSamplerate!=inputSamplerate)
		{
			fs=outputSamplerate;
			int data_length2;
			data2 = data;
			float ratio = outputSamplerate*1.0/inputSamplerate;
			data = resample(data,data_length,ratio,&data_length2);
			fprintf(stderr,"resample %i %i\n",data_length,data_length2);
			data_length = data_length2;
		}
		
		
		SF_INFO info;
		memset(&info,0,sizeof(info));
		info.channels = 1;
        info.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16;
        info.samplerate = fs;
        SNDFILE* sndfile = sf_open(fileName.c_str(),SFM_WRITE,&info);
        sf_write_float(sndfile,data,data_length);
        free(data);
        sf_close(sndfile);
        fprintf(stderr,"saving to %s length=%i\n",fileName.c_str(),data_length);
        if(data2) free(data2);
	}
};

ECantorix ecantorix;

class CommandHandler
{
    vector<string> argv;
void rest()
{
    if(argv.size()>1) ecantorix.addRest(atoi(argv[1].c_str()));
}
void note()
{
    bool breath = false;
    if(argv.size()>9) breath = atoi(argv[9].c_str());
    if(argv.size()>8) ecantorix.addNote(atoi(argv[1].c_str()),argv[2],atoi(argv[3].c_str()),
                (bool)atoi(argv[4].c_str()),(bool)atoi(argv[5].c_str()),
                (sinsy::TieType)atoi(argv[6].c_str()),(sinsy::SlurType)atoi(argv[7].c_str()),(sinsy::SyllabicType)atoi(argv[8].c_str()),breath);
}
void resample()
{
    if(argv.size()>1) ecantorix.setOutputSamplerate(atoi(argv[1].c_str()));
}
void voice()
{
    if(argv.size()>1) ecantorix.setVoiceByName(argv[1]);
}
public:
void parseCMD(const string& input)
{
    
	std::string tmp;
	bool quote = false;
	for(int i=0;i<input.length();i++)
	{
        if(i==0 && input[i]=='#') return;//comment
		if(input[i]=='\"' && quote==false)
		{
			quote=true;
		}
		else if(input[i]=='\"' && quote==true)
		{
			quote=false;
		}
		else if(input[i]==' ' && quote==false)
		{
            if(tmp.length()) argv.push_back(tmp);
			tmp = "";
		}
		else if(i<input.length()-1 && input[i]=='\\')
		{
			i++;
			tmp += input[i];
		}
		else
		{
			tmp += input[i];
		}
	}
	if(tmp.length()) argv.push_back(tmp);
    string cmd = argv[0];
    if(cmd=="note") note();
    if(cmd=="rest") rest();
    if(cmd=="resample") resample();
    if(cmd=="voice") voice();
    argv.clear();
}
};


int handleUScore(const std::string& uscore,const std::string& wav) {
    ecantorix.init();
    std::ifstream input(uscore);
    CommandHandler cmd;
    while(input) {
	string input_line;
    getline(input, input_line);
	cmd.parseCMD(input_line);
    };
    ecantorix.saveTo(wav);
    return 0;
}

int main(int argc, char **argv)
{
   if (argc < 2) {
      usage();
      return -1;
   }

   std::string xml;
   std::string voice;
   #ifdef HAVE_HTS
   std::string config;
   #endif
   std::string wav;
   std::string languages;
   std::string uscore;
   
   voice = "en";
   
   int i(1);
   for(; i < argc; ++i) {
      if ('-' != argv[i][0]) {
         if (xml.empty()) {
            xml = argv[i];
         } else {
            std::cout << "[ERROR] invalid option : '" << argv[i][1] << "'" << std::endl;
            usage();
            return -1;
         }
      } else {
         switch (argv[i][1]) {
         case 'w' :
            languages = argv[++i];
            break;
         #ifdef HAVE_HTS
         case 'x' :
            config = argv[++i];
            break;
         #endif   
         case 'm' :
            voice = argv[++i];
            break;
         case 'o' :
            wav = argv[++i];
            break;
         case 'u' :
            uscore = argv[++i];
            break;
         case 'h' :
            usage();
            return 0;
         default :
            std::cout << "[ERROR] invalid option : '-" << argv[i][1] << "'" << std::endl;
            usage();
            return -1;
         }
      }
   }
   
   if(uscore.size()) {  
       return handleUScore(uscore,wav);
   }

   if(xml.empty() || voice.empty() || wav.empty()) {
	   
      usage();
      return -1;
   }
   
   
   sinsy::Sinsy sinsy;

   std::vector<std::string> voices;
   voices.push_back(voice);

   #ifdef HAVE_HTS
   if (!sinsy.setLanguages(languages, config)) {
      std::cout << "[ERROR] failed to set languages : " << languages << ", config dir : " << config << std::endl;
      return -1;
   }

   if (!sinsy.loadVoices(voices)) {
      std::cout << "[ERROR] failed to load voices : " << voice << std::endl;
      return -1;
   }
   #endif

   if (!sinsy.loadScoreFromMusicXML(xml)) {
      std::cout << "[ERROR] failed to load score from MusicXML file : " << xml << std::endl;
      return -1;
   }
   
   #ifdef HAVE_HTS

   sinsy::SynthCondition condition;

   if (wav.empty()) {
      condition.setPlayFlag();
   } else {
      condition.setSaveFilePath(wav);
   }

   sinsy.synthesize(condition);
   
   
   #else
  
   
   ECantorix ecantorix;
   ecantorix.init();
   ecantorix.setVoiceByName(voice);
   sinsy.toScore(ecantorix);
   
   
   #if 1
   if (wav.empty()) {
	  // use https://github.com/espeak-ng/pcaudiolib
      //ecantorix.play();//
   } else {
      ecantorix.saveTo(wav);
   }
   #endif
   
   
  // if(g_sndfile) sf_close(g_sndfile);
	
   
   #endif

   return 0;
}