# Palindroms.script

# Copyright (C) 2004  Lauri Karttunen
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.

# This script assumes that /usr/dict/words, a 23K English
# word list, is available on the machine. The script
# extracts any palindroms and leaves them on the stack.

# How does it work? We first construct BidirEnglish that
# contains all the words whose reverse is also a word of English,
# for example, "madam" and "dog". We wish to keep "madam" and
# eliminate "dog". Here's what happens:

# m a d a m                                d o g
#                 mark for reduplication
# ^[ [ m a d a m ZZZ ] ^2 ^]               ^[ [ d o g ZZZ ] ^2 ^]
#                 compile-replace
# m a d a m ZZZ m a d a m ZZZ              d o g ZZZ d o g ZZZ
#                 splice in intersection and reverse
# ^[ m a d a m & [ m a d a m ] .r ^]       ^[ d o g & [ d o g ] .r ^]
#                 compile-replace
# m a d a m

# Note the two rounds of 'compile-replace lower'. The first
# reduplicates, the second intersects a word with its inverse.

set retokenize off
set print-space on

define English @txt"/usr/dict/words";

# Intersect English with its reverse. Only take into account
# words that contain at least two characters. (Words like "a"
# and "I" are not interesting here.)

define BidirEnglish [English & English.r & [? ?+]];

echo >> Wrapping with a reduplication operator

# Concatenate ZZZ to mark the end of the word

regex [ BidirEnglish
          .o.
  ?+ @-> "^[" "[" ... ZZZ "]" "^2" "^]" ];

print random-lower

echo >> Reduplicating with  compile-replace

compile-replace lower
print random-lower
lower-side net
define Reduplicated

echo >> Splicing in  reversal and intersection operators

regex [ Reduplicated
             .o.
        ZZZ -> "]" ".r" || _ .#.
             .o.
        ZZZ -> "&" "["
             .o.
        ?+ @-> "^[" ... "^]" ];

print random-lower

echo >> Adding reversing and intersecting with compile-replace
compile-replace lower

lower-side net
sort
print random-words