# -*- coding: utf-8 -*-

# finnish-prosody.script

# Copyright (C) 2004  Lauri Karttunen
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.

# This script maps Finnish words into a prosodic representation
# that splits the words into syllables, adds primary and secondary
# stress marks, and organizes the syllables into feet. For example,
# the input "ilmoittautumisesta" 'registering' (Sg. Elative) becomes
#
#     (íl.moit)(tàu.tu.mi)(sès.ta)
#
# where the acute accent on the first vowel indicates primary stress,
# the grave accents mark secondary stress, and feet are enclosed in
# parentheses.

# Note that this script is encoded in utf-8. To run it,
# you should start xfst in utf-8 mode:
#
#     xfst -utf8 -l finnish-prosody.xfst

# The version of xfst that comes with the Book is not utf8-enabled
# To check about the availability of a utf8-enabled version of xfst,
# please write to karttunen@parc.com.


# The descriptive generalizations come from Paul Kiparsky's paper
# "Finnish Noun Inflection" in Generative Approaches to Finnic and
# Saami Linguistics, Diane Nelson and Satu Manninen (eds.), pp.109-161,
# CSLI Publications, 2003. Kiparsky writes (p. 111): "Speaking for the
# moment in derivational terms, Finnish stress is assigned by laying down
# binary feet from left to right. Final syllables are not stressed if
# they are light, and only optionally if they are heavy. An important
# phenomenon is the LH` effect: when the left-to-right scansion
# encounters a Light-Heavy sequence, the light syllable is skipped
# with the result that a ternary foot is formed. At the edge of a
# word the LH` effect is superseded by the inviolable requirement that
# a word must have initial stress."

# For an OT account of the same generalizations, see the Finnish OT Prosody
# script.

################################## DATA ##################################

define FinnWords {kalastelet} | {kalasteleminen} | {ilmoittautuminen} |
                 {järjestelemättömyydestänsä} | {kalastelemme} |
                 {ilmoittautumisesta} | {järjestelmällisyydelläni} |
                 {järjestelmällistämätöntä} | {voimisteluttelemasta} |
                 {opiskelija} | {opettamassa} | {kalastelet} |
                 {strukturalismi} | {onnittelemanikin} | {mäki} |
                 {perijä} | {repeämä} | {ergonomia} | {puhelimellani} |
                 {matematiikka} | {puhelimistani} | {rakastajattariansa} |
                 {kuningas} | {kainostelijat} | {ravintolat} |
                 {merkonomin} ;


######################### BASIC DEFINITIONS #############################

define HighV [u | y | i];                          # High vowel
define MidV  [e | o | ö];                          # Mid vowel
define LowV  [a | ä] ;                             # Low vowel
define USV [HighV | MidV | LowV];                  # Unstressed Vowel

define C [b | c | d | f | g | h | j | k | l | m |
          n | p | q | r | s | t | v | w | x | z];  # Consonant

define MSV [á | é | í | ó | ú | ý | "ä́" | "ö́"];
define SSV [à | è | ì | ò | ù | "ỳ" | "ä̀" | "ö̀"];
define SV [MSV | SSV];                             # Stressed vowel
define V [USV | SV] ;                              # Vowel

define P [V | C];                                  # Phone
define B [[\P+] | .#.];                            # Boundary
define E .#. | ".";                                # Edge
define SB [~$"." "." ~$"."];                       # At most one syllable boundary

define Light [C* V];                               # Light syllable
define Heavy [Light P+];                           # Heavy syllable

define S [Heavy | Light];                          # Syllable
define SS [S & $SV];                               # Stressed syllable
define US [S & ~$SV];                              # Unstressed syllable
define MSS [S & $MSV] ;                            # Syllable with main stress

define BF [S "." S];                               # Binary foot

######################### RULES FOR PROSODY #############################

define MarkNonDiphthongs [ [. .] -> "." || [HighV | MidV] _ LowV, # i.a, e.a
                                           i _ [MidV - e],        # i.o, i.ö
                                           u _ [MidV - o],        # u.e
                                           y _ [MidV - ö] ];      # y.e

# The general syllabification rule has exceptions. In particular, loan
# words such as ate.isti 'atheist' must be partially syllabified in the
# lexicon.

define Syllabify C* V+ C* @-> ... "." || _ C V ;

define TernaryFeet BF "." Light @-> "(" ... ")"
                                    // [{).} | .#.] [BF "."]*  _
                                       ["." Heavy "." S ] | .#. ;

# Scan all the unfooted material into binary feet.

define BinaryFeet BF @-> "(" ... ")" || .#.|"." _ .#.|".";

# Assign the primary stress to the first vowel of the first syllable.

define MainStress a -> á, e -> é, i -> í, o -> ó,
                  u -> ú, y -> ý, ä -> "ä́", ö -> "ö́" || .#. "(" C* _ ;

# Assign secondary stress to all initial vowels of non-initial syllables.

define SecondaryStress a -> à, e -> è, i -> ì, o -> ò,
                       u -> ù, y -> "ỳ", ä -> "ä̀", ö -> "ö̀" || "(" C* _ ;

# Assign an optional secondary stress to an unfooted final syllable
# if it is heavy.

define OptFinalStress a (->) à, e (->) è, i (->) ì,
                      o (->) ò, u (->) ù, y (->) "ỳ",
                      ä (->) "ä̀", ö (->) "ö̀" || "." C* _ P .#.;

define FinnProsody [
MarkNonDiphthongs
                        .o.
                     Syllabify
                        .o.
                     TernaryFeet
                        .o.
                     BinaryFeet
                        .o.
                     MainStress
                        .o.
                    SecondaryStress
                        .o.
                    OptFinalStress
                    ];

echo ### Computing [FinnWords .o. FinnProsody]

regex FinnWords .o. FinnProsody;

print lower-words

################################ END ######################################

# Here is the output produced by the script:

# (ón.nit).(tè.le).(mà.ni).kìn
# (ón.nit).(tè.le).(mà.ni).kin
# (ó.pet.ta).(màs.sa)
# (ó.pis).(kè.li.ja)
# (ér.go).(nò.mi.a)
# (íl.moit).(tàu.tu).(mì.nen)
# (íl.moit).(tàu.tu.mi).(sès.ta)
# (vói.mis.te).(lùt.te.le).(màs.ta)
# (strúk.tu.ra).(lìs.mi)
# (rá.kas.ta).(jàt.ta.ri).(àn.sa)
# (rá.vin).(tò.lat)
# (ré.pe).(ä̀.mä)
# (pé.ri.jä)
# (pú.he.li).(mèl.la.ni)
# (pú.he.li).(mìs.ta.ni)
# (mä́.ki)
# (má.te.ma).(tìik.ka)
# (mér.ko).(nò.min)
# (kái.nos).(tè.li).jàt
# (kái.nos).(tè.li).jat
# (ká.las).(tè.let)
# (ká.las).(tè.le).(mì.nen)
# (ká.las.te).(lèm.me)
# (kú.nin).gàs
# (kú.nin).gas
# (jä́r.jes).(tèl.mäl).(lìs.tä.mä).(tö̀n.tä)
# (jä́r.jes).(tèl.mäl.li).(sỳy.del).(lä̀.ni)
# (jä́r.jes).(tè.le).(mä̀t.tö).(mỳy.des).(tä̀n.sä)