在1980年前,仍然没有任何国际标准如ISO-8859或Unicode来定义如何扩展US-ASCII编码以便非英语国家的用户使用.很多IT 厂商发明了他们自己的编码,并且使用了难以记忆的数目来标识: 
		例如936代表简体中文. 950代表繁体中文. 
		同 Extended Unix Coding ( EUC )编码大不一样的是,下面所有的远东 codepage 都利用了C1控制码 { =80..=9F } 做为首字节, 使用ASCII值 { =40..=7E { 做为第二字节,这样才能包含多达数万个双字节字符,这表明在这种编码之中小于3F的ASCII值不一定代表ASCII字符. 
		Shift-JIS包含日本语 charset JIS X 0201 (每个字符一个字节) 和 JIS X 0208 (每个字符两个字节),所以 JIS X 0201平假名包含一个字节半宽的字符,其剩馀的60个字节被用做7076个汉字以及648个其他全宽字符的首字节.同EUC-JP编码区别的是, Shift-JIS没有包含JIS X 202中定义的5802个汉字. 
		GBK 扩展了 EUC-CN 编码( GB 2312-80编码,包含 6763 个汉字)到Unicode (GB13000.1-93)中定义的20902个汉字,中国大陆使用的是简体中文zh_CN. 
		UnifiedHangul (UHC) 是韩文 EUC-KR 编码(KS C 5601-1992 编码,包括2350 韩文音节和 4888 个汉字a)的超集,包含 8822个附加的韩文音节( 在C1中 ) 
		是代替EUC-TW (CNS 11643-1992)的 Big5 编码(13072 繁体 zh_TW 中文字) 繁体中文,这些定义都在Ken Lunde的 CJK.INF中或者 Unicode 编码表中找到. 
		繁体中文: mount -t vfat /dev/hda1 /mnt/1 -o codepage=950,iocharset=cp936 
有趣的是,由于GBK包含了全部的GB2312/Big5/JIS的内码,所以使用936的Codepage也可以显示Big5的文件名.
 
制作codepage950支持的是台湾的 cosmos先生, 主页为 http://www.cis.nctu.edu.tw:8080/~is84086/Project/kernel_cp950/
制作GBK的cp936支持的是TurboLinux的中文研发小组的 方汉和 陈向阳
 
#!/bin/sh
cat $1  | awk '{if(index($1,"#")==0)print $0}' | awk 'BEGIN{FS="0x"}{print $2 $3}' | awk '{if(length($1)==length($2))print $1,$2}'
 
 
 
#!/usr/bin/perl
@code = (
        "00", "01", "02", "03", "04", "05", "06", "07",
        "08", "09", "0A", "0B", "0C", "0D", "0E", "0F",
        "10", "11", "12", "13", "14", "15", "16", "17",
        "18", "19", "1A", "1B", "1C", "1D", "1E", "1F",
        "20", "21", "22", "23", "24", "25", "26", "27",
        "28", "29", "2A", "2B", "2C", "2D", "2E", "2F",
        "30", "31", "32", "33", "34", "35", "36", "37",
        "38", "39", "3A", "3B", "3C", "3D", "3E", "3F",
        "40", "41", "42", "43", "44", "45", "46", "47",
        "48", "49", "4A", "4B", "4C", "4D", "4E", "4F",
        "50", "51", "52", "53", "54", "55", "56", "57",
        "58", "59", "5A", "5B", "5C", "5D", "5E", "5F",
        "60", "61", "62", "63", "64", "65", "66", "67",
        "68", "69", "6A", "6B", "6C", "6D", "6E", "6F",
        "70", "71", "72", "73", "74", "75", "76", "77",
        "78", "79", "7A", "7B", "7C", "7D", "7E", "7F",
        "80", "81", "82", "83", "84", "85", "86", "87",
        "88", "89", "8A", "8B", "8C", "8D", "8E", "8F",
        "90", "91", "92", "93", "94", "95", "96", "97",
        "98", "99", "9A", "9B", "9C", "9D", "9E", "9F",
        "A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7",
        "A8", "A9", "AA", "AB", "AC", "AD", "AE", "AF",
        "B0", "B1", "B2", "B3", "B4", "B5", "B6", "B7",
        "B8", "B9", "BA", "BB", "BC", "BD", "BE", "BF",
        "C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7",
        "C8", "C9", "CA", "CB", "CC", "CD", "CE", "CF",
        "D0", "D1", "D2", "D3", "D4", "D5", "D6", "D7",
        "D8", "D9", "DA", "DB", "DC", "DD", "DE", "DF",
        "E0", "E1", "E2", "E3", "E4", "E5", "E6", "E7",
        "E8", "E9", "EA", "EB", "EC", "ED", "EE", "EF",
        "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7",
        "F8", "F9", "FA", "FB", "FC", "FD", "FE", "FF");
while (<STDIN>){
        ($unicode, $big5) = split;
        ($high, $low) = $unicode =~ /(..)(..)/;
        $table2{$high}{$low} = $big5;
        ($high, $low) = $big5 =~ /(..)(..)/;
        $table{$high}{$low} = $unicode;
}
print <<EOF;
/*
 * linux/fs/nls_cp874.c
 *
 * Charset cp874 translation tables.
 * Generated automatically from the Unicode and charset
 * tables from the Unicode Organization (www.unicode.org).
 * The Unicode to charset table has only exact mappings.
 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/nls.h>
/* A1 - F9*/
static struct nls_unicode charset2uni[(0xF9-0xA1+1)*(0x100-0x60)] = {
EOF
for ($high=0xA1; $high <= 0xF9; $high++){
        for ($low=0x40; $low <= 0x7F; $low++){
                $unicode = $table2{$code[$high]}{$code[$low]};
                $unicode = "0000" if (!(defined $unicode));
                print "\n\t" if ($low%4 == 0);
                print "/* $code[$high]$code[$low]*/\n\t" if ($low%0x10 == 0);
                ($uhigh, $ulow) = $unicode =~ /(..)(..)/;
                printf("{0x%2s, 0x%2s}, ", $ulow, $uhigh);
        }
        for ($low=0xA0; $low <= 0xFF; $low++){
                $unicode = $table2{$code[$high]}{$code[$low]};
                $unicode = "0000" if (!(defined $unicode));
                print "\n\t" if ($low%4 == 0);
                print "/* $code[$high]$code[$low]*/\n\t" if ($low%0x10 == 0);
                ($uhigh, $ulow) = $unicode =~ /(..)(..)/;
                printf("{0x%2s, 0x%2s}, ", $ulow, $uhigh);
        }
}
print "\n};\n\n";
for ($high=1; $high <= 255;$high++){
        if (defined $table{$code[$high]}){
                print "static unsigned char page$code[$high]\[512\] = {\n\t";
                for ($low=0; $low<=255;$low++){
                        $big5 = $table{$code[$high]}{$code[$low]};
                        $big5 = "3F3F" if (!(defined $big5));
                        if ($low > 0 && $low%4 == 0){
                                printf("/* 0x%02X-0x%02X */\n\t", $low-4, $low-1);
                        }
                        print "\n\t" if ($low == 0x80);
                        ($bhigh, $blow) = $big5 =~ /(..)(..)/;
                        printf("0x%2s, 0x%2s, ", $bhigh, $blow);
                }
                print "/* 0xFC-0xFF */\n};\n\n";
        }
}
print "static unsigned char *page_uni2charset[256] = {";
for ($high=0; $high<=255;$high++){
        print "\n\t" if ($high%8 == 0);
        if ($high>0 && defined $table{$code[$high]}){
                print "page$code[$high], ";
        }
        else{
                print "NULL,   ";
        }
}
print <<EOF;
};
static unsigned char charset2upper[256] = {
        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
        0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
        0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
        0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
        0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
        0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
        0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
        0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */
        0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */
        0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */
        0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
        0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */
        0x00, 0x00, 0x00, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
        0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
        0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
        0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
        0x98, 0x99, 0x9a, 0x00, 0x9c, 0x00, 0x00, 0x00, /* 0x98-0x9f */
        0x00, 0x00, 0x00, 0x00, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
        0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
        0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
        0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
        0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
        0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
        0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0x00, 0x00, /* 0xd0-0xd7 */
        0x00, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, /* 0xd8-0xdf */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xef, /* 0xe8-0xef */
        0xf0, 0xf1, 0x00, 0x00, 0x00, 0xf5, 0x00, 0xf7, /* 0xf0-0xf7 */
        0xf8, 0xf9, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, /* 0xf8-0xff */
};
static void inc_use_count(void)
{
        MOD_INC_USE_COUNT;
}
static void dec_use_count(void)
{
        MOD_DEC_USE_COUNT;
}
static struct nls_table table = {
        "cp950",
        page_uni2charset,
        charset2uni,
        inc_use_count,
        dec_use_count,
        NULL
};
int init_nls_cp950(void)
{
        return register_nls();
}
#ifdef MODULE
int init_module(void)
{
        return init_nls_cp950();
}
void cleanup_module(void)
{
        unregister_nls();
        return;
}
#endif
/*
 * Overrides for Emacs so that we follow Linus's tabbing style.
 * Emacs will notice this stuff at the end of the file and automatically
 * adjust the settings for this buffer only.  This must remain at the end
 * of the file.
 *
---------------------------------------------------------------------------
 * Local variables:
 * c-indent-level: 8
 * c-brace-imaginary-offset: 0
 * c-brace-offset: -8
 * c-argdecl-indent: 8
 * c-label-offset: -8
 * c-continued-statement-offset: 8
 * c-continued-brace-offset: 0
 * End:
 */
EOF
 
 
#!/usr/bin/perl
@code = (
        "00", "01", "02", "03", "04", "05", "06", "07",
        "08", "09", "0A", "0B", "0C", "0D", "0E", "0F",
        "10", "11", "12", "13", "14", "15", "16", "17",
        "18", "19", "1A", "1B", "1C", "1D", "1E", "1F",
        "20", "21", "22", "23", "24", "25", "26", "27",
        "28", "29", "2A", "2B", "2C", "2D", "2E", "2F",
        "30", "31", "32", "33", "34", "35", "36", "37",
        "38", "39", "3A", "3B", "3C", "3D", "3E", "3F",
        "40", "41", "42", "43", "44", "45", "46", "47",
        "48", "49", "4A", "4B", "4C", "4D", "4E", "4F",
        "50", "51", "52", "53", "54", "55", "56", "57",
        "58", "59", "5A", "5B", "5C", "5D", "5E", "5F",
        "60", "61", "62", "63", "64", "65", "66", "67",
        "68", "69", "6A", "6B", "6C", "6D", "6E", "6F",
        "70", "71", "72", "73", "74", "75", "76", "77",
        "78", "79", "7A", "7B", "7C", "7D", "7E", "7F",
        "80", "81", "82", "83", "84", "85", "86", "87",
        "88", "89", "8A", "8B", "8C", "8D", "8E", "8F",
        "90", "91", "92", "93", "94", "95", "96", "97",
        "98", "99", "9A", "9B", "9C", "9D", "9E", "9F",
        "A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7",
        "A8", "A9", "AA", "AB", "AC", "AD", "AE", "AF",
        "B0", "B1", "B2", "B3", "B4", "B5", "B6", "B7",
        "B8", "B9", "BA", "BB", "BC", "BD", "BE", "BF",
        "C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7",
        "C8", "C9", "CA", "CB", "CC", "CD", "CE", "CF",
        "D0", "D1", "D2", "D3", "D4", "D5", "D6", "D7",
        "D8", "D9", "DA", "DB", "DC", "DD", "DE", "DF",
        "E0", "E1", "E2", "E3", "E4", "E5", "E6", "E7",
        "E8", "E9", "EA", "EB", "EC", "ED", "EE", "EF",
        "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7",
        "F8", "F9", "FA", "FB", "FC", "FD", "FE", "FF");
while (<STDIN>){
        ($unicode, $big5) = split;
        ($high, $low) = $unicode =~ /(..)(..)/;
        $table2{$high}{$low} = $big5;
        ($high, $low) = $big5 =~ /(..)(..)/;
        $table{$high}{$low} = $unicode;
}
print <<EOF;
/*
 * linux/fs/nls_cp936.c
 *
 * Charset cp936 translation tables.
 * Generated automatically from the Unicode and charset
 * tables from the Unicode Organization (www.unicode.org).
 * The Unicode to charset table has only exact mappings.
 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/nls.h>
/* 81 - FE*/
static struct nls_unicode charset2uni[(0xFE-0x81+1)*(0x100-0x40)] = {
EOF
for ($high=0x81; $high <= 0xFE; $high++){
        for ($low=0x40; $low <= 0x7F; $low++){
                $unicode = $table2{$code[$high]}{$code[$low]};
                $unicode = "0000" if (!(defined $unicode));
                print "\n\t" if ($low%4 == 0);
                print "/* $code[$high]$code[$low]*/\n\t" if ($low%0x10 == 0);
                ($uhigh, $ulow) = $unicode =~ /(..)(..)/;
                printf("{0x%2s, 0x%2s}, ", $ulow, $uhigh);
        }
        for ($low=0x80; $low <= 0xFF; $low++){
                $unicode = $table2{$code[$high]}{$code[$low]};
                $unicode = "0000" if (!(defined $unicode));
                print "\n\t" if ($low%4 == 0);
                print "/* $code[$high]$code[$low]*/\n\t" if ($low%0x10 == 0);
                ($uhigh, $ulow) = $unicode =~ /(..)(..)/;
                printf("{0x%2s, 0x%2s}, ", $ulow, $uhigh);
        }
}
print "\n};\n\n";
for ($high=1; $high <= 255;$high++){
        if (defined $table{$code[$high]}){
                print "static unsigned char page$code[$high]\[512\] = {\n\t";
                for ($low=0; $low<=255;$low++){
                        $big5 = $table{$code[$high]}{$code[$low]};
                        $big5 = "3F3F" if (!(defined $big5));
                        if ($low > 0 && $low%4 == 0){
                                printf("/* 0x%02X-0x%02X */\n\t", $low-4, $low-1);
                        }
                        print "\n\t" if ($low == 0x80);
                        ($bhigh, $blow) = $big5 =~ /(..)(..)/;
                        printf("0x%2s, 0x%2s, ", $bhigh, $blow);
                }
                print "/* 0xFC-0xFF */\n};\n\n";
        }
}
print "static unsigned char *page_uni2charset[256] = {";
for ($high=0; $high<=255;$high++){
        print "\n\t" if ($high%8 == 0);
        if ($high>0 && defined $table{$code[$high]}){
                print "page$code[$high], ";
        }
        else{
                print "NULL,   ";
        }
}
print <<EOF;
};
static unsigned char charset2upper[256] = {
        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
        0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
        0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
        0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
        0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
        0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
        0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
        0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */
        0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */
        0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */
        0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
        0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */
        0x00, 0x00, 0x00, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
        0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
        0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
        0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
        0x98, 0x99, 0x9a, 0x00, 0x9c, 0x00, 0x00, 0x00, /* 0x98-0x9f */
        0x00, 0x00, 0x00, 0x00, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
        0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
        0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
        0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
        0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
        0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
        0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0x00, 0x00, /* 0xd0-0xd7 */
        0x00, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, /* 0xd8-0xdf */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xef, /* 0xe8-0xef */
        0xf0, 0xf1, 0x00, 0x00, 0x00, 0xf5, 0x00, 0xf7, /* 0xf0-0xf7 */
        0xf8, 0xf9, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, /* 0xf8-0xff */
};
static void inc_use_count(void)
{
        MOD_INC_USE_COUNT;
}
static void dec_use_count(void)
{
        MOD_DEC_USE_COUNT;
}
static struct nls_table table = {
        "cp936",
        page_uni2charset,
        charset2uni,
        inc_use_count,
        dec_use_count,
        NULL
};
int init_nls_cp936(void)
{
        return register_nls();
}
#ifdef MODULE
int init_module(void)
{
        return init_nls_cp936();
}
void cleanup_module(void)
{
        unregister_nls();
        return;
}
#endif
/*
 * Overrides for Emacs so that we follow Linus's tabbing style.
 * Emacs will notice this stuff at the end of the file and automatically
 * adjust the settings for this buffer only.  This must remain at the end
 * of the file.
 *
---------------------------------------------------------------------------
 * Local variables:
 * c-indent-level: 8
 * c-brace-imaginary-offset: 0
 * c-brace-offset: -8
 * c-argdecl-indent: 8
 * c-label-offset: -8
 * c-continued-statement-offset: 8
 * c-continued-brace-offset: 0
 * End:
 */
EOF
 
 
 
/*
 * CPI.C: A program to examine MSDOS codepage files (*.cpi)
 * and extract specific codepages.
 * Compiles under Linux & DOS (using BC++ 3.1).
 *
 * Compile: gcc -o cpi cpi.c
 * Call: codepage file.cpi [-a|-l|nnn]
 *
 * Author: Ahmed M. Naas (ahmed@oea.xs4all.nl)
 * Many changes: aeb@cwi.nl  [changed until it would handle all
 *      *.cpi files people have sent me; I have no documentation,
 *      so all this is experimental]
 * Remains to do: DRDOS fonts.
 *
 * Copyright: Public domain.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
int handle_codepage(int);
void handle_fontfile(void);
#define PACKED __attribute__ ((packed))
/* Use this (instead of the above) to compile under MSDOS */
/*#define PACKED  */
struct {
        unsigned char id[8] PACKED;
        unsigned char res[8] PACKED;
        unsigned short num_pointers PACKED;
        unsigned char p_type PACKED;
        unsigned long offset PACKED;
} FontFileHeader;
struct {
        unsigned short num_codepages PACKED;
} FontInfoHeader;
struct {
        unsigned short size PACKED;
        unsigned long off_nexthdr PACKED;
        unsigned short device_type PACKED; /* screen=1; printer=2 */
        unsigned char device_name[8] PACKED;
        unsigned short codepage PACKED;
        unsigned char res[6] PACKED;
        unsigned long off_font PACKED;
} CPEntryHeader;
struct {
        unsigned short reserved PACKED;
        unsigned short num_fonts PACKED;
        unsigned short size PACKED;
} CPInfoHeader;
struct {
        unsigned char height PACKED;
        unsigned char width PACKED;
        unsigned short reserved PACKED;
        unsigned short num_chard PACKED;
} ScreenFontHeader;
struct {
        unsigned short p1 PACKED;
        unsigned short p2 PACKED;
} PrinterFontHeader;
FILE *in, *out;
void usage(void);
int opta, optc, optl, optL, optx;
extern int optind;
extern char *optarg;
unsigned short codepage;
int main (int argc, char *argv[])
{
        if (argc < 2)
                usage();
        if ((in = fopen(argv[1], "r")) == NULL) {
                printf("\nUnable to open file %s.\n", argv[1]);
                exit(0);
        }
        opta = optc = optl = optL = optx = 0;
        optind = 2;
        if (argc == 2)
                optl = 1;
        else
        while(1) {
            switch(getopt(argc, argv, "alLc")) {
              case 'a':
                opta = 1;
                continue;
              case 'c':
                optc = 1;
                continue;
              case 'L':
                optL = 1;
                continue;
              case 'l':
                optl = 1;
                continue;
              case '?':
              default:
                usage();
              case -1:
                break;
            }
            break;
        }
        if (optind != argc) {
            if (optind != argc-1 || opta)
              usage();
            codepage = atoi(argv[optind]);
            optx = 1;
        }
        if (optc)
          handle_codepage(0);
        else
          handle_fontfile();
        if (optx) {
            printf("no page %d found\n", codepage);
            exit(1);
        }
        fclose(in);
        return (0);
}
void
handle_fontfile(){
        int i, j;
        j = fread(, 1, sizeof(FontFileHeader), in);
        if (j != sizeof(FontFileHeader)) {
            printf("error reading FontFileHeader - got %d chars\n", j);
            exit (1);
        }
        if (!strcmp(FontFileHeader.id + 1, "DRFONT ")) {
            printf("this program cannot handle DRDOS font files\n");
            exit(1);
        }
        if (optL)
          printf("FontFileHeader: id=%8.8s res=%8.8s num=%d typ=%c offset=%ld\n\n",
                 FontFileHeader.id, FontFileHeader.res,
                 FontFileHeader.num_pointers,
                 FontFileHeader.p_type,
                 FontFileHeader.offset);
        j = fread(, 1, sizeof(FontInfoHeader), in);
        if (j != sizeof(FontInfoHeader)) {
            printf("error reading FontInfoHeader - got %d chars\n", j);
            exit (1);
        }
        if (optL)
          printf("FontInfoHeader: num_codepages=%d\n\n",
                 FontInfoHeader.num_codepages);
        for (i = FontInfoHeader.num_codepages; i; i--)
          if (handle_codepage(i-1))
            break;
}
int
handle_codepage(int more_to_come) {
        int j;
        char outfile[20];
        unsigned char *fonts;
        long inpos, nexthdr;
        j = fread(, 1, sizeof(CPEntryHeader), in);
        if (j != sizeof(CPEntryHeader)) {
            printf("error reading CPEntryHeader - got %d chars\n", j);
            exit(1);
        }
        if (optL) {
            int t = CPEntryHeader.device_type;
            printf("CPEntryHeader: size=%d dev=%d [%s] name=%8.8s \
codepage=%d\n\t\tres=%6.6s nxt=%ld off_font=%ld\n\n",
                   CPEntryHeader.size,
                   t, (t==1) ? "screen" : (t==2) ? "printer" : "?",
                   CPEntryHeader.device_name,
                   CPEntryHeader.codepage,
                   CPEntryHeader.res,
                   CPEntryHeader.off_nexthdr, CPEntryHeader.off_font);
        } else if (optl) {
            printf("\nCodepage = %d\n", CPEntryHeader.codepage);
            printf("Device = %.8s\n", CPEntryHeader.device_name);
        }
#if 0
        if (CPEntryHeader.size != sizeof(CPEntryHeader)) {
            /* seen 26 and 28, so that the difference below is -2 or 0 */
            if (optl)
              printf("Skipping %d bytes of garbage\n",
                     CPEntryHeader.size - sizeof(CPEntryHeader));
            fseek(in, CPEntryHeader.size - sizeof(CPEntryHeader),
                  SEEK_CUR);
        }
#endif
        if (!opta && (!optx || CPEntryHeader.codepage != codepage) && !optc)
          goto next;
        inpos = ftell(in);
        if (inpos != CPEntryHeader.off_font && !optc) {
            if (optL)
              printf("pos=%ld font at %ld\n", inpos, CPEntryHeader.off_font);
            fseek(in, CPEntryHeader.off_font, SEEK_SET);
        }
        j = fread(, 1, sizeof(CPInfoHeader), in);
        if (j != sizeof(CPInfoHeader)) {
            printf("error reading CPInfoHeader - got %d chars\n", j);
            exit(1);
        }
        if (optl) {
            printf("Number of Fonts = %d\n", CPInfoHeader.num_fonts);
            printf("Size of Bitmap = %d\n", CPInfoHeader.size);
        }
        if (CPInfoHeader.num_fonts == 0)
          goto next;
        if (optc)
          return 0;
        sprintf(outfile, "%d.cp", CPEntryHeader.codepage);
        if ((out = fopen(outfile, "w")) == NULL) {
            printf("\nUnable to open file %s.\n", outfile);
            exit(1);            
        } else printf("\nWriting %s\n", outfile);
        fonts = (unsigned char *) malloc(CPInfoHeader.size);
        fread(fonts, CPInfoHeader.size, 1, in);
        fwrite(, sizeof(CPEntryHeader), 1, out);
        fwrite(, sizeof(CPInfoHeader), 1, out);
        j = fwrite(fonts, 1, CPInfoHeader.size, out);
        if (j != CPInfoHeader.size) {
            printf("error writing %s - wrote %d chars\n", outfile, j);
            exit(1);
        }
        fclose(out);
        free(fonts);
        if (optx) exit(0);
      next:
        /*
         * It seems that if entry headers and fonts are interspersed,
         * then nexthdr will point past the font, regardless of
         * whether more entries follow.
         * Otherwise, first all entry headers are given, and then
         * all fonts; in this case nexthdr will be 0 in the last entry.
         */
        nexthdr = CPEntryHeader.off_nexthdr;
        if (nexthdr == 0 || nexthdr == -1) {
            if (more_to_come) {
                printf("mode codepages expected, but nexthdr=%ld\n",
                       nexthdr);
                exit(1);
            } else
                return 1;
        }
        inpos = ftell(in);
        if (inpos != CPEntryHeader.off_nexthdr) {
            if (optL)
              printf("pos=%ld nexthdr at %ld\n", inpos, nexthdr);
            if (opta && !more_to_come) {
                printf("no more code pages, but nexthdr != 0\n");
                return 1;
            }
            fseek(in, CPEntryHeader.off_nexthdr, SEEK_SET);
        }
        return 0;
}
void usage(void)
{
        printf("\nUsage: cpi code_page_file [-c] [-L] [-l] [-a|nnn]\n");
        printf(" -c: input file is a single codepage\n");
        printf(" -L: print header info (you don't want to see this)\n");
        printf(" -l or no option: list all codepages contained in the file\n");
        printf(" -a: extract all codepages from the file\n");
        printf(" nnn (3 digits): extract codepage nnn from the file\n");
        printf("Example: cpi ega.cpi 850 \n");
        printf(" will create a file 850.cp containing the requested codepage.\n\n");
        exit(1);
}
 
 
 
Code Page   Character Set  语种 
708         ASMO-708   阿拉伯字符 (ASMO 708)
720         DOS-720   阿拉伯字符 (DOS)
28596       iso-8859-6   阿拉伯字符 (ISO)
1256        windows-1256  阿拉伯字符 (Windows)
1257        windows-1257  波罗的海字符 (Windows)
852         ibm852   中欧字符 (DOS)
28592       iso-8859-2   中欧字符 (ISO)
1250        windows-1250  中欧字符 (Windows)
936         gb2312   简体中文 (GB2312)
950         big5   繁体中文 (Big5)
862         DOS-862   希伯来字符 (DOS)
866         cp866   西里尔字符 (DOS)
874         windows-874   泰语 (Windows)
932         shift_jis   日语 (Shift-JIS)
949         ks_c_5601-1987  朝鲜语
1251        windows-1251  西里尔字符 (Windows)
1252        iso-8859-1   西欧字符
1253        windows-1253  希腊字符 (Windows)
1254        iso-8859-9   土耳其字符 (Windows)
1255        windows-1255  希伯来字符 (Windows)
1258        windows-1258  越南字符 (Windows)
20866       koi8-r   西里尔字符 (KOI8-R)
21866       koi8-ru   西里尔字符 (KOI8-U)
28595       iso-8859-5   西里尔字符 (ISO)
28597       iso-8859-7   希腊字符 (ISO)
28598       iso-8859-8   希伯来字符 (ISO-Visual)
38598       iso-8859-8-i  希伯来字符 (ISO-Logical)
50932       _autodetect   日语 (自动选择)
51932       euc-jp   日语 (EUC)
52936       hz-gb-2312   简体中文 (HZ)
65001       utf-8   Unicode (UTF-8)