#!/usr/bin/perl -nw
# Name: b5tm
# Author: Chao-Kuei Hung http://www.cyut.edu.tw/~ckhung/
# Release date: Oct 1999
# Purpose: Find potential "trouble-makers" in a text file containing
#     big-5 characters 
# See also: http://www.cyut.edu.tw/~ckhung/b/pl/big5.shtml

# To make sure that this program is not affected by the big-5 problem,
# we insist on writing comments in English.

use strict;
my (@line, $ch0, $asc0, $ch1, $asc1);
use vars qw($trouble);

BEGIN {
    $trouble = shift;
    die "usage: b5tm chars-to-avoid [f1 f2 ... fn]" unless defined $trouble;
}

@line = split //, $_;
$ch0 = shift @line;			# the leading byte
$asc0 = ord $ch0;			# its ascii code
foreach $ch1 (@line) {			# the trailing byte
    $asc1 = ord $ch1;			# its ascii code
    if ($asc0 < 0xa1 or $asc0 > 0xf9) {
	# this can not be the leading byte of a big-5 character
	printf STDERR "strange character 0x%02x\n", $asc0
	    if ($asc0 < 0x20 and $asc0 != 0x9 and $asc0 != 0x0a);
	($ch0, $asc0) = ($ch1, $asc1);
	next;
    }
    if ($asc1 < 0x40 or $asc1 > 0x7e and $asc1 < 0xa1 or $asc1 > 0xfe) {
        printf STDERR "strange character pair 0x%02x-%02x\n", $asc0, $asc1;
	($ch0, $asc0) = ($ch1, $asc1);
    }
    printf "$ARGV %4d: $ch0$ch1 $ch1 %02x %02x\n", $., $asc0, $asc1
	if (index($trouble, $ch1) >= 0);
    ($ch0, $asc0) = (" ", ord " ");
}

# print "$ARGV $.: $_";

close ARGV if eof;

# 好了, 程式結束, 可以用中文寫註解了 :-)
# 以下是一些含有 []{}\ 等碼的中文字: 開現陣程認加久址功因也設許淚
# 可以在 UNIX 的 shell 底下用 b5tm '[]{}\@' b5tm 查看.

