#!/usr/bin/perl # -*- coding: iso-8859-1 -*- # # pronouncible.pl # # Elias Schwerdtfeger, http://tamagothi.wordpress.com/ # 2009-05-25 # # Try to apply some rules to make it pronouncible. The motivation is to # make the differences between the two Currier "languages" audible, it is # not a serious research. Of course, I am convinced that the VMs isn't # written language. # # But there are people who don't believe in the fact of the two different # Currier "languages", because it is an abstract statistical result they # don't understand. It is hard to see in a transcription file, so I want # to make it obvious by creating a pronouncible version. use strict; use warnings; # ATTENTION! # # I use an environment variable VOYNICH, which points to # the interlinear transcription archive. This makes it easier # for me to use my hacks on different computers. (This script # was written on my old HP Jornada 820e, which is a nice # mobile workhorse for me.) # # Just change the following line to your local convention. my $path = $ENV{'VOYNICH'} or die 'No environment var VOYNICH'; my $outa = $path . ".pro.a.txt"; my $outb = $path . ".pro.b.txt"; my $outx = $path . ".pro.x.txt"; my ($ofa, $ofb, $ofx); open INFILE, "<$path" or die "Failed to open $path $!"; open $ofa, ">$outa" or die "Failed to open $outa $!"; open $ofb, ">$outb" or die "Failed to open $outb $!"; open $ofx, ">$outx" or die "Failed to open $outx $!"; my ($ln, $lang, $of); while ($ln = ) { # Skip comments next if ($ln =~ /^\#/); # Parseable information? # Determine language and output file if ($ln =~ /^\<(f\d+[rv]\d?)\>\s*\{([^\}]+)\}/) { print "$1 "; my $pinf = $2; if ($pinf =~ /\$L=(.)/) { $lang = $1; $of = ($lang eq 'A') ? $ofa : $ofb; } else { $lang = 'X'; $of = $ofx; } print "lang=$lang\n"; } # Extract locator and text chomp $ln; next unless ($ln =~ /^\<([^\>]+)\>\s*(.*)$/); my $loc = $1; my $txt = $2; # Only Takeshi Takahashi's full transcription # (you may set your favorite here) next unless ($loc =~ /H$/); # H = Takeshi Takahashi # Normalize the text line and split it in words $txt =~ s/\{[^\}]*\}//g; # Remove inline comments $txt =~ s/[\,\-\=]/./g; # Consistent spacing with dots $txt =~ s/[^a-z\.\*]//g; # Remove unwanted transcription stuff $txt =~ s/\.+$//g; # Remove trailing dots my @wrd = split /\.+/, $txt; # Get the words. # And now for some strange rules to make voynichese pronouncible... # Of course they are willkuerlich und klingen ziemlich deutsch, # denn das ist meine native language... ;-) my @cnv; foreach my $w (@wrd) { # I do my conversions to uppercase to avoid side effects in # the sequence of substitions. # in-Groups to vocal E $w =~ s/iiiin/EULEM/g; $w =~ s/iiin/EM/g; $w =~ s/iin/EN/g; $w =~ s/in/EL/g; # il-Groups to vocal O $w =~ s/iiiil/EULOM/g; $w =~ s/iiil/OM/g; $w =~ s/iil/ON/g; $w =~ s/il/OL/g; # ir-Groups to vocal A $w =~ s/iiiir/EULAM/g; $w =~ s/iiir/AM/g; $w =~ s/iir/AN/g; $w =~ s/ir/AL/g; # is-Groups to vocal U $w =~ s/iiiis/EULUM/g; $w =~ s/iiis/UM/g; $w =~ s/iis/UN/g; $w =~ s/is/UL/g; # im-Groups to vocal I $w =~ s/iiiim/EULIM/g; $w =~ s/iiim/IM/g; $w =~ s/iim/IN/g; $w =~ s/im/IL/g; # ee-Gruoups to dipthong AU $w =~ s/eeee/AUVE/g; $w =~ s/eee/äU/g; $w =~ s/ee/AU/g; # ch-clusters with gallow $w =~ s/([ci])([tkpf])([hoy])/$2ö$1$3/g; $w =~ s/([tkpf])(cis])h/$1ü$2h/g; # double h in ch-clusters $w =~ s/hh/hEH/g; # ch-clusters $w =~ s/ch/ST/g; $w =~ s/sh/SCH/g; $w =~ s/ih/TSCH/g; # initial and final y $w =~ s/^y/AN/g; $w =~ s/dy$/LICH/g; $w =~ s/y$/UNG/g; $w =~ s/y(.)$/$1EHUNG/g; # initial and final s $w =~ s/^s/SE/g; $w =~ s/s$/ES/g; # initial and final d $w =~ s/^d/GE/g; $w =~ s/d$/KEIT/g; $w =~ s/d(.)$/di$1/g; # (rare) double d $w =~ s/dd/DETH/g; # final m $w =~ s/mm$/SCHAFT/g; $w =~ s/m$/HEIT/g; # initial and final l $w =~ s/^l/ME/g; $w =~ s/l$/IG/g; # (rare) remaining i and e $w =~ s/e/I/g; $w =~ s/iii/EM/g; $w =~ s/ii/EN/g; $w =~ s/i/E/g; # initial q and qo $w =~ s/^qo/UND-/g; $w =~ s/^q/ODER-/g; # let the gallows sound more german... ;-) $w =~ s/p/PFE/g; $w =~ s/f/BE/g; $w =~ s/t/FE/g; $w =~ s/k/THE/g; # convert to lowercase and do some funny conversions to make the # pronounciation easier (for a German). # This process may destroy some information or make it really hard # to go back to the original text with a set of regular expressions. # This is no serious research but an attempt to make it "audible" to # germans. $w =~ s/([PFBT]+)([AEIOU])([AEIOUäöü])/$1$3/ig; $w =~ s/^TSCH/ETSCH/ig; $w =~ s/aa/ACHE/ig; $w =~ s/([aeio])ung/$1NUNG/ig; $w =~ s/([aeiu])ing/$1NING/ig; $w =~ s/a([aeiouäöü])/EN$1/ig; $w =~ s/ah/AN/ig; $w =~ s/oh/ON/ig; $w =~ s/ea/ENNA/ig; $w =~ s/eo/ENNO/ig; $w =~ s/oa/ONNA/ig; $w =~ s/auung/AHNUNG/ig; $w =~ s/eü/EHü/ig; $w =~ s/fung/FANG/ig; $w =~ s/chd/CHID/ig; $w =~ s/uo/UH/ig; $w =~ s/ou/OH/ig; $w =~ s/ue/UH/ig; $w =~ s/tt/TET/ig; $w =~ s/^ge([aeiou])/G$1/i; $w =~ s/st([pfbt]+)/STE$1/gi; $w =~ s/ying$/LING/i; $w =~ s/g/LICHT/g; push @cnv, lc $w; } # Remove the (now wrong) transcriber code from the locator $loc =~ s/\;.$//; # Write the converted line to the output file my $cln = sprintf ("<%s> %s", $loc, join (' ', @cnv)); print $of "$cln\n"; } close $ofx; close $ofb; close $ofa; close INFILE; # That's all