汉字与
unicode
编码相互转化
(2006年7月17日
11:07:58
)
一、
概述:
如果项目采用了
GBK
的编码,那么汉字转化就不是问题了。但是如果采用了
utf-8
的编码,汉字的处理就相对比较麻烦一些。
二、
功能实现:
代码如下:
1
//
转为unicode
2
public
static
void
writeUnicode(
final
DataOutputStream out,
3
final
String value)
{
4
try
{
5
final
String unicode
=
gbEncoding(value);
6
final
byte
[] data
=
unicode.getBytes();
7
final
int
dataLength
=
data.length;
8
9
System.out.println(
"
Data Length is:
"
+
dataLength);
10
System.out.println(
"
Data is:
"
+
value);
11
out.writeInt(dataLength);
//
先写出字符串的长度
12
out.write(data,
0
, dataLength);
//
然后写出转化后的字符串
13
}
catch
(IOException e)
{
14
15
}
16
}
17
18
public
static
String gbEncoding(
final
String gbString)
{
19
char
[] utfBytes
=
gbString.toCharArray();
20
String unicodeBytes
=
""
;
21
for
(
int
byteIndex
=
0
; byteIndex
<
utfBytes.length; byteIndex
++
)
{
22
String hexB
=
Integer.toHexString(utfBytes[byteIndex]);
23
if
(hexB.length()
<=
2
)
{
24
hexB
=
"
00
"
+
hexB;
25
}
26
unicodeBytes
=
unicodeBytes
+
"
\\u
"
+
hexB;
27
}
28
//
System.out.println("unicodeBytes is: " + unicodeBytes);
29
return
unicodeBytes;
30
}
31
32
/** */
/**
33
* This method will decode the String to a recognized String in ui.
34
* 功能:将unicod码转为需要的格式(utf-8)
35
*
@author
javajohn
36
*
@param
dataStr
37
*
@return
38
*/
39
public
static
StringBuffer decodeUnicode(
final
String dataStr)
{
40
final
StringBuffer buffer
=
new
StringBuffer();
41
String tempStr
=
""
;
42
String operStr
=
dataStr;
43
if
(operStr
!=
null
&&
operStr.indexOf(
"
\\u
"
)
==
-
1
)
return
buffer.append(operStr);
//
44
if
(operStr
!=
null
&&
!
operStr.equals(
""
)
&&
!
operStr.startsWith(
"
\\u
"
))
{
//
45
tempStr
=
operStr.substring(
0
,operStr.indexOf(
"
\\u
"
));
//
46 operStr = operStr.substring(operStr.indexOf("\\u"),operStr.length());//operStr字符一定是以unicode编码字符打头的字符串
47 }
48
buffer.append(tempStr);
49
while
(operStr
!=
null
&&
!
operStr.equals(
""
)
&&
operStr.startsWith(
"
\\u
"
))
{
//
循环处理,处理对象一定是以unicode编码字符打头的字符串
50
tempStr
=
operStr.substring(
0
,
6
);
51
operStr
=
operStr.substring(
6
,operStr.length());
52
String charStr
=
""
;
53
charStr
=
tempStr.substring(
2
, tempStr.length());
54
char
letter
=
(
char
) Integer.parseInt(charStr,
16
);
//
16进制parse整形字符串。
55
buffer.append(
new
Character(letter).toString());
56
if
(operStr.indexOf(
"
\\u
"
)
==
-
1
)
{
//
57 buffer.append(operStr);
58 }
else
{
//
处理operStr使其打头字符为unicode字符
59
tempStr
=
operStr.substring(
0
,operStr.indexOf(
"
\\u
"
));
60
operStr
=
operStr.substring(operStr.indexOf(
"
\\u
"
),operStr.length());
61
buffer.append(tempStr);
62
}
63
}
64
return
buffer;
65
}
一、
结尾: