# -*- coding: iso-8859-1 -*-

# Lingala script

# Copyright (C) 2003  Lauri Karttunen
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.

# This script generates a lexical analyzer for Lingala, a Bantu language
# spoken along the Zaire river. The data and the analysis comes from
# Gregory Stump's book Inflectional Morphology. A Theory of Paradigm
# Structure. Cambridge University Press. Cambridge, UK. 2001. The
# script comes from the paper Computing with Realizational Morphology
# by Lauri Karttunen that appeared in Computational Linguistics and
# Intelligent Text Processing. Alexander Gelbukh (ed.), Lecture Notes
# in Computer Science, Volume 2588, pages 205-216, Springer Verlag,
# Heidelberg. 2003. Please consult the paper for explanations for what
# the script does and why. Thanks to Prof. Michael Gasser for a list of
# Lingala verb stems.

########################################################################

define Stems [{bet} |        # hit
              {béb} |        # deteriorate
              {bomb} |       # hide
              {bóndel} |     # plead
              {bóngol} |     # change
              {bót} |        # be born
              {búk} |        # break
              {fung} |       # open
              {kabol} |      # divide
              {kang} |       # attach
              {kom} |        # write
              {kund} |       # bury
              {kóm} |        # arrive
              {lakis} |      # teach
              {lí}  |        # eat
              {lob} |        # speak
              {luk} |        # search
              {ndim} |       # agree
              {palangan} |   # disperse
              {pangwis} |    # wipe
              {sál} |        # make
              {sepel} |      # enjoy
              {sómb} |       # buy
              {tál} |        # look
              {támbol} |     # walk
              {tambwis} |    # drive
              {tataban} |    # be surprised
              {tún} |        # ask
              {yébis}        # yell
          ] ;

define L [a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|
          á|é|í|ó|ú];

define Person1 [Per ":" 1];
define Person2 [Per ":" 2];
define Person3 [Per ":" 3];
define Number [Num ":" [Sg|Pl] ];
# define Gender [Gen ":" [[1["0"|1]|2|3|4|5|6|7|8|9] "." [1[4|5]|[1|9|10]a]]];
define Gender3 [Gen ":" [1 "." 2 | 1a "." 2 | 3 "." 4 | 5 "." 6 |
                         7 "." 8 | 9a "." 10a | 10 | 11 "." 6 |
                         14 "." 6 | 15]];
 
# define Reflexive [No | Yes];

# Passive: -am-
# Causative: -is-
# Reciprocal: -an-
# Applicative:l -el-
# Reversive: -ol-
# Iterative: reduplication of the root or root plus other morpheme;
#  the two copies are separated by -a-; the second copy of the root
#  has low tone on the first syllable
# That is, the order of the morphemes is: REV, CAUS, PASS, APPL, RECIP

# alobaloba 'he/she speak SP ITER'
# alíalia   'he eat SP ITER'

define PastTense [Past ":" [Rec|Hist|MoreRem|MostRem]];
define PresTense [Pres ":" [Cont|Hab1|Hab2]];
define FutTense [Fut ":" [Immed|MostRem]];

# define Reflexive [Refl ":" [No|Yes]];
define Tense [Tns ":" [PastTense|PresTense|FutTense]];
# define Polarity [Pol ":" [Pos|Neg]];

define Agreement [[[Person1 | Person2] " " Number] |
                  [Person3 " " Number " " Gender3]];

# For Gender 15 we have only singular subject marker, no plural
# and no object markers. Missing info?

define SubjAgr [Sub ":" Agreement]  - [$Pl & $15];
define ObjAgr  [Obj ":" Agreement] - $15 ;
define Agr     [Func ":" Agreement];

#define Features [SubjAgr " " ObjAgr " "  Tense " " Polarity];

define Features  [SubjAgr " " ObjAgr " " Tense];

define VerbLex "<" Stems "," Features ">" ;

# Common singular agreement markers.

define RAgr1 [[. .] -> {mo} || "<" _ [$[Agr & $Person3 & $Sg & $4]]];
define RAgr2 [[. .] -> {li} || "<" _ [$[Agr & $Person3 & $Sg & $5]]];
define RAgr3 [[. .] -> e  || "<" _ [$[Agr & $Person3 & $Sg & $[9a"."10a]]]];
define RAgr4 [[. .] -> {lo} || "<" _ [$[Agr & $Person3 & $Sg & $[10|11]]]];
define RAgr5 [[. .] -> {bo} || "<" _ [$[Agr & $Person3 & $Sg & $14]]];

# Common plural agreement markers

define RAgr6 [[. .] -> {bo} || "<" _ [$[Agr & $Person2 & $Pl]]] ;
define RAgr7 [[. .] -> {ba} || "<" _ [$[Agr & $Person3 & $Pl & $2]]];
define RAgr8 [[. .] -> {mi} || "<" _ [$[Agr & $Person3 & $Pl & $4]]];
define RAgr9 [[. .] -> {ma} || "<" _ [$[Agr & $Person3 & $Pl & $[5|6]]]];
define RAgr10 [[. .] -> {bi}  || "<" _ [$[Agr & $Person3 & $Pl & $7]]];
define RAgr11 [[. .] -> i  || "<" _ [$[Agr & $Person3 & $Pl & $[9a|10]]]];

# Rule Block 1

# Singular specific subject markers

define R101 [[. .] -> {na} || "<" _ [$[SubjAgr & $Person1 & $Sg]]] ;
define R102 [[. .] -> o || "<" _ [$[SubjAgr & $Person2 & $Sg]]] ;
define R103 [[. .] -> a || "<" _ [$[SubjAgr & $Person3 & $Sg & $2]]];
define R104 [[. .] -> e  || "<" _ [$[SubjAgr & $Person3 & $Sg & $7]]];
define R105 [[. .] -> {ei} || "<" _[$[SubjAgr & $Person3 & $Sg & $15]]];

# Rules of referral for singular subject markers

define R106 `[RAgr1, Func, Sub];
define R107 `[RAgr2, Func, Sub];
define R108 `[RAgr3, Func, Sub];
define R109 `[RAgr4, Func, Sub];
define R110 `[RAgr5, Func, Sub];

# Plural specific subject markers

define R111 [[. .] -> {to} || "<" _ [$[SubjAgr & $Person1 & $Pl]]] ;

# Rules of referral for plural subject markers.

define R112 `[RAgr6, Func, Sub];
define R113 `[RAgr7, Func, Sub];
define R114 `[RAgr8, Func, Sub];
define R115 `[RAgr9, Func, Sub];
define R116 `[RAgr10, Func, Sub];
define R117 `[RAgr11, Func, Sub];

define R201 [[. .] -> {ko} || "<" _ [$[Fut":"Immed]]] ;

# Singular specific object markers

define R301 [[. .] -> n || "<" _ [$[ObjAgr & $Person1 & $Sg]]] ;
define R302 [[. .] -> {ko} || "<" _ [$[ObjAgr & $Person2 & $Sg]]] ;
define R303 [[. .] -> {mo} || "<" _ [$[ObjAgr & $Person3 & $Sg & $2]]];
define R304 [[. .] -> {ei}  || "<" _ [$[ObjAgr & $Person3 & $Sg & $7]]];

# Rules of referral for singular object markers

define R305 `[RAgr1, Func, Obj];
define R306 `[RAgr2, Func, Obj];
define R307 `[RAgr3, Func, Obj];
define R308 `[RAgr4, Func, Obj];
define R309 `[RAgr5, Func, Obj];

# Plural specific object markers

define R310 [[. .] -> {lo} || "<" _ [$[ObjAgr & $Person1 & $Pl]]] ;

# Rules of referral for plural object markers

define R311 `[RAgr6, Func, Obj];
define R312 `[RAgr7, Func, Obj];
define R313 `[RAgr8, Func, Obj];
define R314 `[RAgr9, Func, Obj];
define R315 `[RAgr10, Func, Obj];
define R316 `[RAgr11, Func, Obj];

# Tense rules
define R401 [[. .] -> {ak} || _ "," [$[Pres":"[Hab1|Hab2]|
                                       Past":"[Hist|MostRem]]]];
define R402 [[. .] -> a    || _ "," [$[Pres":"Cont|Fut":"Immed]]];

define R501 [[. .] -> i || _ "," [$[Fut":"MostRem|Past":"[Rec|Hist]]]];

# Eliminate Features and auxiliary symbols from the lower side.

define Cleanup \L -> 0;

# Build the Lingala Verb Transducer

regex VerbLex
        .o.
   R301 .o. R302 .o. R303 .o. R304 .o. R305 .o.   # singular object
   R306 .o. R307 .o. R308 .o. R309
        .o.
   R310 .o. R311 .o. R312 .o. R313 .o. R314 .o.   # plural object
   R315 .o. R316
        .o.
        R201                                      # future
        .o.
   R101 .o. R102 .o. R103 .o. R104 .o. R105 .o.   # singular subject
   R106 .o. R107 .o. R108 .o. R109 .o. R110
        .o.
   R111 .o. R112 .o. R113 .o. R114 .o. R115 .o.   # plural subject
   R116 .o. R117
        .o.
   R401 .o. R402 .o. R501                         # tense
        .o.
       Cleanup ;