#!/usr/bin/perl -w
# word frequency look-up

use Getopt::Std;
use strict;
my (%opts, %rank, %freq);

%opts = (
    f => undef,	# frequency file
    v => 0,	# verbosity
);

getopts('f:', \%opts);
die "please specify frequency file by -f" unless $opts{f};
open FREQ, $opts{f} or die "can't open frequency file $opts{f}";
while (<FREQ>) {
    next if (/^\s*$/ or /^#/);
    if (not /^[\w\s]+$/) {
	print STDERR "ignoring a line in '$opts{f}': $_"
	    if $opts{v} > 0;
	next;
    }
    my @f;
    @f = split " ";
    $rank{$f[0]} = $.;
    $freq{$f[0]} = $f[1] if defined $f[1];
}
close FREQ;

#my ($k);
#print "ranks:\n";
#foreach $k (keys %rank) {
#    printf "%4d $k\n", $rank{$k};
#}
#print "\n\nfreq:\n";
#foreach $k (keys %freq) {
#    printf "%4d $k\n", $freq{$k};
#}
#

my (@r, @f, $f_exists);
@r = keys %rank;
@f = keys %freq;
if (($#f+1) * 10 < ($#r+1)) {
    # assuming this file does not contain frequency count
    $f_exists = 0;
} elsif (($#f+1) * 10 < ($#r+1) * 9) {
    die "format error: many words have frequency counts and many others do not.";
} else {
    # assuming this file contains frequency count
    $f_exists = 1;
}

while (<>) {
    next if (/^\s*$/ or /^#/);
    my ($w) = /^(\w+)/;
    if (not $w) {
	print STDERR "ignoring a line in '$ARGV': $_"
	    if $opts{v} > 0;
	next;
    }
    my ($i) = ($rank{$w} or 9999);
    my ($s) = sprintf "%4d", $i;
    if ($f_exists) {
	$i = ($freq{$w} or 0);
	$s .= sprintf " %5d", $i;
    }
    print "$s $_";
}

