Blogger Scott

一个utf8转换程序

据说是一个通用的手机上使用的UTF8转换程序,先记下来。

 1private final String readUnicodeFileUTF8(String filename) {
 2        StringBuffer sb = new StringBuffer(256);
 3        try {
 4            int[] surrogatePair = new int[2];
 5            InputStream is = this.getClass().getResourceAsStream(filename);
 6
 7            int val = 0;
 8            int unicharCount = 0;
 9            while ((val = readNextCharFromStreamUTF8(is))!=-1{
10                unicharCount++;
11                if (val <= 0xFFFF{
12                    // if first value is the Byte Order Mark (BOM), do not add
13                    if (! (unicharCount == 1 && val == 0xFEFF)) {
14                        sb.append((char)val);
15                    }

16                }
 else {
17                    supplementCodePointToSurrogatePair(val, surrogatePair);
18                    sb.append((char)surrogatePair[0]);
19                    sb.append((char)surrogatePair[1]);
20                }

21            }

22            is.close();
23        }
 catch (Exception e) {};
24
25        return new String(sb);
26    }

27   
28    private final static int readNextCharFromStreamUTF8(InputStream is) {
29        int c = -1;
30        if (is==nullreturn c;
31        boolean complete = false;
32       
33        try {
34            int byteVal;
35            int expecting=0;
36            int composedVal=0;
37           
38            while (!complete && (byteVal = is.read()) != -1{
39                if (expecting > 0 && (byteVal & 0xC0== 0x80{  /* 10xxxxxx */
40                    expecting--;
41                    composedVal = composedVal | ((byteVal & 0x3F<< (expecting*6));
42                    if (expecting == 0{
43                        c = composedVal;
44                        complete = true;
45                        //System.out.println("appending: U+" + Integer.toHexString(composedVal) );
46                    }

47                }
 else {
48                    composedVal = 0;
49                    expecting = 0;
50                    if ((byteVal & 0x80== 0{    /* 0xxxxxxx */
51                        // one byte character, no extending byte expected
52                        c = byteVal;
53                        complete = true;
54                        //System.out.println("appending: U+" + Integer.toHexString(byteVal) );
55                    }
 else if ((byteVal & 0xE0== 0xC0{  /* 110xxxxx */
56                        expecting = 1;  // expecting 1 extending byte
57                        composedVal = ((byteVal & 0x1F<< 6);
58                    }
 else if ((byteVal & 0xF0== 0xE0{  /* 1110xxxx */
59                        expecting = 2;  // expecting 2 extending bytes
60                        composedVal = ((byteVal & 0x0F<< 12);
61                    }
 else if ((byteVal & 0xF8== 0xF0{  /* 11110xxx */
62                        expecting = 3;  // expecting 3 extending bytes
63                        composedVal = ((byteVal & 0x07<< 18);
64                    }
 else {
65                        // non conformant utf-8, ignore or catch error
66                    }

67                }

68            }

69           
70        }
 catch (Exception e) {
71            System.out.println(e.toString());
72        }

73       
74        return c;
75    }

76
77    private final static void supplementCodePointToSurrogatePair(int codePoint, int[] surrogatePair) {
78        int high4 = ((codePoint >> 16& 0x1F- 1;
79        int mid6 = ((codePoint >> 10& 0x3F);
80        int low10 = codePoint & 0x3FF;
81
82        surrogatePair[0= (0xD800 | (high4 << 6| (mid6));
83        surrogatePair[1= (0xDC00 | (low10));
84    }

posted on 2009-06-07 16:37 江天部落格 阅读(295) 评论(0)  编辑  收藏 所属分类: AndroidJava


只有注册用户登录后才能发表评论。


网站导航: