Ceska synteza hlasu znovu, tentokrat festival

Pavel Machek pavel na suse.cz
Sobota Březen 25 23:19:03 CET 2000


AHoj!

Hral jsem si s festivalem... Mluvi to, ale je problem s hackovanymi
pismeny protoze festival nevola setlocale() :-((((.

Kdyby nekdo chtel pokracovat... nebo vidi do lingvistiky a chtel to
opravit, tady to je.

(Je potreba funkcni festival, mbrola a cz1 databaze.)

								Pavel

(require 'mbrola)

(defPhoneSet
  czech
  ;;;  Phone Features
  (;; vowel or consonant
   (vc + -)  
   ;; vowel length: short long diphthong schwa
   (vlng s l d a 0)
   ;; vowel height: high mid low
   (vheight 1 2 3 -)
   ;; vowel frontness: front mid back
   (vfront 1 2 3 -)
   ;; lip rounding
   (vrnd + -)
   ;; consonant type: stop fricative affricative nasal liquid
   (ctype s f a n l 0)
   ;; place of articulation: labial alveolar palatal labio-dental
   ;;                         dental velar
   (cplace l a p b d v 0)
   ;; consonant voicing
   (cvox + -)
   )
  ;; Phone set members (features not set properly: copied from spanish)
  (
   (_   - 0 - - - 0 0 -)
   (a   + l 3 1 - 0 0 -)
   (a:  + l 3 1 - 0 0 -)
   (b   - 0 - - + s l +)
   (c   - 0 - - + s v +)
   (d   - 0 - - + s a +)
   (D   - 0 - - + s a +)
   (e   + l 2 1 - 0 0 -)
   (e:  + l 2 1 - 0 0 -)
   (f   - 0 - - + f b -)
   (g   - 0 - - + s p +)
   (h   - 0 - - + a a -)
   (i   + l 1 1 - 0 0 -)
   (i:  + l 1 1 - 0 0 -)
   (j   - 0 - - + l a +)
   (k   - 0 - - + s p -)
   (l   - 0 - - + l d +)
   (m   - 0 - - + n l +)
   (n   - 0 - - + n d +)
   (N   - 0 - - + n d +)
   (o   + l 3 3 - 0 0 -)
   (o:  + l 3 3 - 0 0 -)
   (p   - 0 - - + s l -)
   (r   - 0 - - + l p +)
   (r\  - 0 - - + l p +)
   (s   - 0 - - + f a +)
   (S   - 0 - - + f a +)
   (t   - 0 - - + s v +)
   (T   - 0 - - + s v +)
   (u   + l 1 3 + 0 0 -)
   (u:  + l 1 3 + 0 0 -)
   (v   - 0 - - + n l +)
   (x   - 0 - - + a a -)
   (z   - 0 - - + f a +)
   (Z   - 0 - - + f a +)
   (tS  - 0 - - + s v +)
  )
)

(set! czech_el_phone_data
; This is also not right
'(
   (_ 0.0 0.250)
   (a 0.0 0.090)
   (a: 0.0 0.150)
   (b 0.0 0.065)
   (c 0.0 0.065)
   (d 0.0 0.060)
   (D 0.0 0.060)
   (e 0.0 0.090)
   (e: 0.0 0.150)
   (f 0.0 0.100)
   (g 0.0 0.080)
   (h 0.0 0.135)
   (i 0.0 0.080)
   (i: 0.0 0.150)
   (j 0.0 0.100)
   (k 0.0 0.100)
   (l 0.0 0.080)
   (m 0.0 0.070)
   (n 0.0 0.080)
   (N 0.0 0.080)
   (o 0.0 0.090)
   (o: 0.0 0.150)
   (p 0.0 0.100)
   (r 0.0 0.060)
   (r\ 0.0 0.070)
   (s 0.0 0.110)
   (S 0.0 0.110)
   (t 0.0 0.085)
   (T 0.0 0.085)
   (u 0.0 0.080)   
   (u: 0.0 0.150)
   (v 0.0 0.100)
   (x 0.0 0.135)
   (z 0.0 0.110)
   (Z 0.0 0.110)
   (tS 0.0 0.110)
))

(set! czech_dur_tree
 '
   ((R:SylStructure.parent.R:Syllable.p.syl_break > 1 ) ;; clause initial
    ((R:SylStructure.parent.stress is 1)
     ((1.5))
     ((1.2)))
    ((R:SylStructure.parent.syl_break > 1)   ;; clause final
     ((R:SylStructure.parent.stress is 1)
      ((2.0))
      ((1.5)))
     ((R:SylStructure.parent.stress is 1)
      ((1.2))
      ((1.0))))))

(PhoneSet.silences '(_))

(lex.create "czech")
(lex.set.phoneset "czech")

(lex.add.entry
  '("pocitac" nil ( ((p o) 1) ((tS i:) 0)  ((t a tS) 0) )))

(lts.ruleset
;  Name of rule set
 czech_ruleset
;  Sets used in the rules
(
  (LNS l n s )
)
;  Rules
(
 ( [ a ] = a )
 ( [ á ] = a: )
 ( [ b ] = b )
 ( [ c ] = c )
 ( [ e ] = tS )
 ( [ d ] = d )
 ( [ i ] = D )
 ( [ e ] = e )
 ( [ é ] = e: )
; ( [ i ] = j e )
 ( [ f ] = f )
 ( [ g ] = g )
 ( [ h ] = h )
 ( [ i ] = i )
 ( [ í ] = i: )
 ( [ j ] = j )
 ( [ k ] = k )
 ( [ l ] = l )
 ( [ m ] = m )
 ( [ n ] = n )
 ( [ o ] = N )
 ( [ o ] = o )
 ( [ ó ] = o: )
 ( [ p ] = p )
 ( [ q ] = q )
 ( [ r ] = r )
 ( [ o ] = r\ )
 ( [ s ] = s )
 ( [ š ] = S )
 ( [ t ] = t )
 ( [ << ] = T )
 ( [ u ] = u )
 ( [ ú ] = u: )
 ( [ u ] = u: )
 ( [ v ] = v )
 ( [ w ] = v )
 ( [ x ] = k s )
 ( [ y ] = i )
 ( [ ý ] = i: )
 ( [ z ] = z )
 ( [ 3/4 ] = Z )
 ( [ "." ] = _ )
 ( [ "?" ] = _ )
 ( [ "-" ] = _ )
 ( [ ":" ] = _ )
))

(define (czech_lts word features)
  "(czech_lts WORD FEATURES)
Using letter to sound rules build a spanish pronunciation of WORD."
  (list word
        nil
        (lex.syllabify.phstress (lts.apply (downcase word) 'czech_ruleset))))

(lex.set.lts.method 'czech_lts)

;(lex.set.lts.ruleset 'czech_ruleset)

(set! czech_phrase_cart_tree
'
((lisp_token_end_punc in ("?" "." ":"))
  ((BB))
  ((lisp_token_end_punc in ("'" "\"" "," ";"))
   ((B))
   ((n.name is 0)  ;; end of utterance
    ((BB))
    ((NB))))))

(define (voice_cz1_mbrola)
"(voice_cz1_mbrola)
 Set up the current voice to be female  Czech using mbrola."
  ;; Phone set
  (voice_reset)
  (Parameter.set 'Language 'czech)
  (Parameter.set 'PhoneSet 'czech)
  (PhoneSet.select 'czech)
  ;; Tokenization rules
  (set! token_to_words english_token_to_words)
  ;; POS tagger
  (require 'pos)
  (set! pos_lex_name "english_poslex")
  (set! pos_ngram_name 'english_pos_ngram)
  (set! pos_supported t)
  (set! guess_pos english_guess_pos)   ;; need this for accents
  ;; Lexicon selection
  (lex.select "czech")
  (set! postlex_rules_hooks (list postlex_apos_s_check))
  ;; Phrase prediction
  (require 'phrase)

  (set! phrase_cart_tree czech_phrase_cart_tree)
  (Parameter.set 'Phrase_Method 'cart_tree)

  ;; Accent and tone prediction
  (require 'tobi)
  (set! int_tone_cart_tree f2b_int_tone_cart_tree)
  (set! int_accent_cart_tree f2b_int_accent_cart_tree)

  (set! postlex_vowel_reduce_cart_tree 
	postlex_vowel_reduce_cart_data)
  ;; F0 prediction
  (require 'f2bf0lr)
  (set! f0_lr_start f2b_f0_lr_start)
  (set! f0_lr_mid f2b_f0_lr_mid)
  (set! f0_lr_end f2b_f0_lr_end)
  (Parameter.set 'Int_Method Intonation_Tree)
  (set! int_lr_params
	'((target_f0_mean 105) (target_f0_std 15)
	  (model_f0_mean 170) (model_f0_std 34)))
  (Parameter.set 'Int_Target_Method Int_Targets_LR)
  ;; Duration prediction
;  (require 'kddurtreeZ)
  (set! duration_cart_tree czech_dur_tree)
  (set! duration_ph_info czech_el_phone_data)
  (Parameter.set 'Duration_Method Duration_Tree_ZScores)
  (Parameter.set 'Duration_Stretch 1.1)
  ;; Waveform synthesizer: ked diphones
  ;; This assigned the diphone names from their context (_ $ etc)
;  (set! UniSyn_module_hooks (list ked_diphone_const_clusters ))
  (set! us_abs_offset 0.0)
  (set! window_factor 1.0)
  (set! us_rel_offset 0.0)
  (set! us_gain 0.9)

  (Parameter.set 'Synth_Method 'MBROLA_Synth)
;  (Parameter.set 'us_sigpr ked_sigpr)
;  (us_db_select ked_db_name)

  (set! current-voice 'cz1_mbrola)
)

(proclaim_voice
 'cz1_mbrola
 '((language czech)
   (gender female)
   (dialect none)
   (description
    "This is test czech voice using mbrola.")))

(provide 'cz1_mbrola)

-- 
I'm pavel na ucw.cz. "In my country we have almost anarchy and I don't care."
Panos Katsaloulis describing me w.r.t. patents me at discuss na linmodems.org


Další informace o konferenci Linux