Ceska synteza reci - cz2_mbrola modul do festivalu
Pavel Machek
pavel na suse.cz
Pátek Září 29 21:23:10 CEST 2000
Ahoj!
Pribalen je cz2_mbrola modul do festivalu -- spolu s baliky festival,
mbrola, cz2 (hmm, a zvukovou kartou ;-) nauci vas pocitac mluvit
docela obstojne cesky.
Pavel
; CZ2 mbrola: czech voice for festival. Requires festival, mbrola and
; cz2 database.
;
; Master copy is located at http://atrey.karlin.mff.cuni.cz/~pavel/cz2_mbrola.scm.
; Please send patches to pavel na ucw.cz.
;
; Put this into lib/voices/czech/cz2_mbrola/festvox/ and put cz2
; database to lib/voices/czech/cz2_mbrola. (You can get cz2 database
; by going to http://tcts.fpms.ac.be/synthesis/, clicking "download",
; "MBROLA binary and voices", "cz2". You'll also need mbrola binary,
; which can be get nearby.
;
; Then do (set! voice_default 'voice_cz2_mbrola) to use it or put
; (set! voice_default 'voice_cz2_mbrola) into your .festivalrc.
;
;
; Copyright 2000 Pavel Machek <pavel na ucw.cz>
;
; Version 0.6.1
;
; You MAY use this software under terms of GNU GPL, or under following license:
;
; Permission is hereby granted, free of charge, to use and distribute
; this software and its documentation without restriction, including
; without limitation the rights to use, copy, modify, merge, publish,
; distribute, sublicense, and/or sell copies of this work, and to
; permit persons to whom this work is furnished to do so, subject to
; the following conditions:
; 1. The code must retain the above copyright notice, this list of
; conditions and the following disclaimer.
; 2. Original authors' names are not deleted.
;
; EVERYONE
; DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT
; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE
; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
; THIS SOFTWARE.
;
; Festival is buggy, and ignores current locale setting. It also has
; lowercase letters hardcoded to a-z, and similar fatal bugs
; w.r.t. localization. I try to workaround it.
;
; FIXME: should create my own _accent_cart_tree and use it
;
(set! cz2_mbrola_dir (cdr (assoc 'cz2_mbrola voice-locations)))
(require 'mbrola)
(defPhoneSet
czech
;;; Phone Features
(;; vowel or consonant
(vc + -)
;; vowel length: short long diphthong schwa
(vlng s l d a 0)
;; vowel height: high mid low
(vheight 1 2 3 -)
;; vowel frontness: front mid back
(vfront 1 2 3 -)
;; lip rounding
(vrnd + -)
;; consonant type: stop fricative affricative nasal liquid
(ctype s f a n l 0)
;; place of articulation: labial alveolar palatal labio-dental
;; dental velar
(cplace l a p b d v 0)
;; consonant voicing
(cvox + -)
)
;; Phone set members (features not set properly: copied from spanish)
(
(_ - 0 - - - 0 0 -)
(a + l 3 1 - 0 0 -)
(a: + l 3 1 - 0 0 -)
(b - 0 - - + s l +)
(ts - 0 - - + s v +)
(d - 0 - - + s a +)
("d'" - 0 - - + s a +)
(e + l 2 1 - 0 0 -)
(e: + l 2 1 - 0 0 -)
(f - 0 - - + f b -)
(g - 0 - - + s p +)
(h\ - 0 - - + a a -)
(i + l 1 1 - 0 0 -)
(i: + l 1 1 - 0 0 -)
(j - 0 - - + l a +)
(k - 0 - - + s p -)
(l - 0 - - + l d +)
(m - 0 - - + n l +)
(n - 0 - - + n d +)
("n'" - 0 - - + n d +)
(o + l 3 3 - 0 0 -)
(o: + l 3 3 - 0 0 -)
(p - 0 - - + s l -)
(r - 0 - - + l p +)
("r'" - 0 - - + l p +)
(s - 0 - - + f a +)
(S - 0 - - + f a +)
(t - 0 - - + s v +)
("t'" - 0 - - + s v +)
(u + l 1 3 + 0 0 -)
(u: + l 1 3 + 0 0 -)
(v - 0 - - + n l +)
(x - 0 - - + a a -)
(z - 0 - - + f a +)
(Z - 0 - - + f a +)
(tS - 0 - - + s v +)
)
)
(set! czech_el_phone_data
; This is also not right
'(
(_ 0.0 0.250)
(a 0.0 0.090)
(a: 0.0 0.150)
(b 0.0 0.065)
(ts 0.0 0.065)
(d 0.0 0.060)
("d'" 0.0 0.060)
(e 0.0 0.090)
(e: 0.0 0.150)
(f 0.0 0.100)
(g 0.0 0.080)
(h\ 0.0 0.135)
(i 0.0 0.080)
(i: 0.0 0.150)
(j 0.0 0.100)
(k 0.0 0.100)
(l 0.0 0.080)
(m 0.0 0.070)
(n 0.0 0.080)
("n'" 0.0 0.080)
(o 0.0 0.090)
(o: 0.0 0.150)
(p 0.0 0.100)
(r 0.0 0.060)
("r'" 0.0 0.070)
(s 0.0 0.110)
(S 0.0 0.110)
(t 0.0 0.085)
("t'" 0.0 0.085)
(u 0.0 0.080)
(u: 0.0 0.150)
(v 0.0 0.100)
(x 0.0 0.135)
(z 0.0 0.110)
(Z 0.0 0.110)
(tS 0.0 0.110)
))
(set! czech_dur_tree
'
((R:SylStructure.parent.R:Syllable.p.syl_break > 1 ) ;; clause initial
((R:SylStructure.parent.stress is 1)
((1.5))
((1.2)))
((R:SylStructure.parent.syl_break > 1) ;; clause final
((R:SylStructure.parent.stress is 1)
((2.0))
((1.5)))
((R:SylStructure.parent.stress is 1)
((1.2))
((1.0))))))
(PhoneSet.silences '(_))
(lex.create "czech")
(lex.set.phoneset "czech")
(lex.add.entry
'("pocitac" nil ( ((p o) 1) ((tS i:) 0) ((t a tS) 0) ))
)
(lex.add.entry
'("pocitaca" nil ( ((p o) 0) ((tS i:) 1) ((t a tS) 0) ))
)
(lex.add.entry
'("pocitacb" nil ( ((p o) 0) ((tS i:) 0) ((t a tS) 1) ))
)
(lts.ruleset
; Name of rule set
czech_ruleset
; Sets used in the rules
(
(MEKCIDLO i í i I Í I)
)
; Rules
(
( [ a ] = a )
( [ á ] = a: )
( [ b ] = b )
( [ c h ] = x )
( [ c ] = ts )
( [ E ] = tS )
( [ e ] = tS )
( [ d ] MEKCIDLO = "d'" )
( [ d ] = d )
( [ i ] = "d'" )
( [ I ] = "d'" )
( [ e ] = e )
( [ é ] = e: )
( [ i ] = e )
( [ I ] = e )
( [ f ] = f )
( [ g ] = g )
( [ h ] = h\ )
( [ i ] = i )
( [ í ] = i: )
( [ Í ] = i: )
( [ j ] = j )
( [ k ] = k )
( [ l ] = l )
( [ m i ] = m "n'" e )
( [ m I ] = m "n'" e )
( [ m ] = m )
( [ n ] MEKCIDLO = "n'" )
( [ n ] = n )
( [ o ] = "n'" )
( [ O ] = "n'" )
( [ o ] = o )
( [ ó ] = o: )
( [ Ó ] = o: )
( [ p ] i = p j )
( [ p ] I = p j )
( [ p ] = p )
( [ q ] = k v )
( [ r ] = r )
( [ o ] = "r'" )
( [ O ] = "r'" )
( [ s ] = s )
( [ š ] = S )
( [ (c) ] = S )
( [ t ] MEKCIDLO = "t'" )
( [ t ] = t )
( [ << ] = "t'" )
( [ >> ] = T )
( [ u ] = u )
( [ ú ] = u: )
( [ Ú ] = u: )
( [ u ] = u: )
( [ U ] = u: )
( [ v ] i = v j )
( [ v ] I = v j )
( [ v ] = v )
( [ w ] = v )
( [ x ] = k s )
( [ y ] = i )
( [ ý ] = i: )
( [ Ý ] = i: )
( [ z ] = z )
( [ 3/4 ] = Z )
( [ (r) ] = Z )
( [ 1 ] = j e d n a _ )
( [ 2 ] = d v a _ )
( [ 3 ] = t "r'" i _ )
( [ 4 ] = tS t i "r'" i _ )
( [ 5 ] = p j e t _ )
( [ 6 ] = S e s t _ )
( [ 7 ] = s e d m _ )
( [ 8 ] = o s m _ )
( [ 9 ] = d e v j e t _ )
( [ 0 ] = n u l a _ )
( [ "." ] = _ )
( [ "?" ] = _ )
( [ "-" ] = _ )
( [ ":" ] = _ )
( [ "," ] = _ )
( [ "_" ] = _ )
( [ ">" ] = _ )
( [ "<" ] = _ )
( [ "(" ] = _ )
( [ ")" ] = _ )
( [ "!" ] = _ )
))
(define (czech_lts word features)
"(czech_lts WORD FEATURES)
Using letter to sound rules build a spanish pronunciation of WORD."
(list word
nil
(lex.syllabify.phstress (lts.apply (downcase word) 'czech_ruleset))))
(lex.set.lts.method 'czech_lts)
(set! czech_phrase_cart_tree
'
((lisp_token_end_punc in ("?" "." ":"))
((BB))
((lisp_token_end_punc in ("'" "\"" "," ";"))
((B))
((n.name is 0) ;; end of utterance
((BB))
((NB))))))
(define (czech_token_to_words token name)
"(czech_token_to_words TOKEN NAME)
This is workaround for ugly bugs w.r.t. iso-8859-2 in core festival"
(list name)
)
(set! czech_accent_cart_tree
'
((R:SylStructure.parent.gpos is content)
((stress is 1)
; ((Accented))
((position_type is single)
; ((Accented))
((NONE))))
((NONE))))
(define (voice_cz2_mbrola)
"(voice_cz2_mbrola)
Set up the current voice to be male Czech using mbrola."
;; Phone set
(voice_reset)
(Parameter.set 'Language 'czech)
(Parameter.set 'PhoneSet 'czech)
(PhoneSet.select 'czech)
;; Tokenization rules
(set! token_to_words czech_token_to_words)
;; POS tagger
; (require 'pos)
(set! pos_supported nil)
; (set! pos_lex_name nil) ; If I enable this, it just says nothing
(set! pos_ngram_name 'english_pos_ngram)
;; Lexicon selection
(lex.select "czech")
;; Phrase prediction
(require 'phrase)
(set! phrase_cart_tree czech_phrase_cart_tree)
(Parameter.set 'Phrase_Method 'cart_tree)
;; Accent and tone prediction
(require 'tobi)
(set! int_tone_cart_tree f2b_int_tone_cart_tree)
(set! int_accent_cart_tree f2b_int_accent_cart_tree) ;f2b_int gives better results than czech ?
(set! postlex_vowel_reduce_cart_tree
postlex_vowel_reduce_cart_data)
;; F0 prediction
(set! int_simple_params
'((f0_mean 120) (f0_std 15)))
(Parameter.set 'Int_Target_Method 'Simple)
(Parameter.set 'Int_Method 'Simple)
;; Duration prediction
(set! duration_cart_tree czech_dur_tree)
(set! duration_ph_info czech_el_phone_data)
(Parameter.set 'Duration_Method Duration_Tree_ZScores)
(Parameter.set 'Duration_Stretch 1.1)
;; Waveform synthesizer
(set! us_abs_offset 0.0)
(set! window_factor 1.0)
(set! us_rel_offset 0.0)
(set! us_gain 0.9)
(Parameter.set 'Synth_Method 'MBROLA_Synth)
(set! mbrola_progname "mbrola")
(set! mbrola_database
(format
nil
"%s%s "
cz2_mbrola_dir "cz2/cz2"
))
(set! current-voice 'cz2_mbrola)
)
(proclaim_voice
'cz2_mbrola
'((language czech)
(gender male)
(dialect none)
(description
"This is test czech voice using mbrola.")))
(provide 'cz2_mbrola)
--
I'm pavel na ucw.cz. "In my country we have almost anarchy and I don't care."
Panos Katsaloulis describing me w.r.t. patents at discuss na linmodems.org
Další informace o konferenci Linux