#!/usr/bin/perl

use warnings;
use strict;

use Getopt::Long;

Getopt::Long::Configure ("bundling");

my $with_variants;
my $file_names_only;
my $dir;
my $no_implied;
my $variants_str;

sub usage() {
    print STDERR "$0 [-v#] [<options>] <spelling categories> <size>\n";
    exit 1;
}

GetOptions ("dir|d=s", \$dir,
            "with-variants|v=i", => \$with_variants,
            "variants=s", \$variants_str,
            "file-names-only|f", \$file_names_only,
            "no-implied", \$no_implied) or usage();
$dir = "final" unless defined $dir;

my %spelling_map = qw(en-us american en-gb british? en-gb-ise british
                      en-gb-ize british_z en-gb-oed british_z en-ca canadian);
$spelling_map{$_} = $_
    foreach (qw(english special american british canadian
                variant_0 variant_1 variant_2 
                british_variant_0 british_variant_1
                canadian_variant_0 canadian_variant_1));

die "Cannot specify both --with-variants and --variants" if $with_variants && $variants_str;
$variants_str = "non" unless defined $variants_str || defined $with_variants;
$variants_str = join(',', 'non', 0..$with_variants) if defined $with_variants;
my %use_variant_level;
foreach (split ',', $variants_str) {
    die "Invalid variant num: $_" unless /^(non|[012])$/;
    $use_variant_level{$_} = 1;
}

$no_implied = 1 unless $use_variant_level{non};

usage unless @ARGV >= 2;

my $SIZE = pop @ARGV;
die "Invalid size: $SIZE\n" unless $SIZE =~ /^\d\d$/;

my @SPS = map {my $sp = $_;
               $sp =~ tr/_/-/;
               $sp = $spelling_map{lc $sp};
               die "Must specify en_GB-ise or en_GB-ize\n" 
                   if $sp eq 'british?' && $use_variant_level{non};
               $sp = 'british' if $sp eq 'british?';
               die "Unknown spelling category: $_\n" unless defined $sp;
               $sp;} @ARGV;
my %SPS;
$SPS{$_} = 1 foreach (@SPS);

$SPS{english} = 1 unless $no_implied;
$SPS{special} = 1 unless $no_implied;

$SPS{variant_0} = 1 if $SPS{american} && $use_variant_level{0}; 
$SPS{variant_1} = 1 if $SPS{american} && $use_variant_level{1};
$SPS{british_variant_0} = 1 if ($SPS{british} || $SPS{british_z}) && $use_variant_level{0};
$SPS{british_variant_1} = 1 if ($SPS{british} || $SPS{british_z}) && $use_variant_level{1};
$SPS{canadian_variant_0} = 1 if $SPS{canadian} && $use_variant_level{0};
$SPS{canadian_variant_1} = 1 if $SPS{canadian} && $use_variant_level{1};
$SPS{variant_2} = 1 if $use_variant_level{2};

unless ($use_variant_level{non}) {
    delete $SPS{$_} foreach (@SPS);
}

opendir D, $dir or die "Unable to open dir $dir\n";

my @files = readdir(D);

my @words;

foreach (sort @files) {
    my ($sp, $type, $size) = /^(\w+)-([^.]+)\.(\d\d)/ or next;
    next unless $SPS{$sp};
    next unless $size <= $SIZE;
    if ($file_names_only) {
        print "$_\n";
    } else {
        open F, "$dir/$_" or die "Unable to open $dir/$_ for reading";
        while (<F>) {
            chop;
            push @words, $_;
        }
    }
}

my $prev = '';
foreach (sort @words) {
    next if $_ eq $prev;
    print "$_\n";
    $prev = $_;
}


                


            
