本站不再更新,欢迎光临 java开发技术网
随笔-230  评论-230  文章-8  trackbacks-0

把最近工作中所写的代码贴一下,以备后用,如能给予你帮助我万分高兴

package com.easylotto.omas.util;


import java.util.*;
import java.io.*;
import org.apache.commons.lang.*;
/**
 * 替换HTMl里面的字符 e.g.: < > " å И 水
 * 
 * 
@author 赵学庆
 * modify peidw 2008-06-20
 * 
 
*/
public class HTMLDecoder {

  
public static final HashMap<String, Character> charTable;

  
public static String decode(String s) {
    String t;
    Character ch;
    
int tmpPos, i;

    
int maxPos = s.length();
    StringBuffer sb 
= new StringBuffer(maxPos);
    
int curPos = 0;
    
while (curPos < maxPos) {
      
char c = s.charAt(curPos++);
      
if (c == '&') {
        tmpPos 
= curPos;
        
if (tmpPos < maxPos) {
          
char d = s.charAt(tmpPos++);
          
if (d == '#') {
            
if (tmpPos < maxPos) {
              d 
= s.charAt(tmpPos++);
              
if ((d == 'x'|| (d == 'X')) {
                
if (tmpPos < maxPos) {
                  d 
= s.charAt(tmpPos++);
                  
if (isHexDigit(d)) {
                    
while (tmpPos < maxPos) {
                      d 
= s.charAt(tmpPos++);
                      
if (!isHexDigit(d)) {
                        
if (d == ';') {
                          t 
= s.substring(curPos + 2, tmpPos - 1);
                          
try {
                            i 
= Integer.parseInt(t, 16);
                            
if ((i >= 0&& (i < 65536)) {
                              c 
= (char) i;
                              curPos 
= tmpPos;
                            }
                          } 
catch (NumberFormatException e) {
                          }
                        }
                        
break;
                      }
                    }
                  }
                }
              } 
else if (isDigit(d)) {
                
while (tmpPos < maxPos) {
                  d 
= s.charAt(tmpPos++);
                  
if (!isDigit(d)) {
                    
if (d == ';') {
                      t 
= s.substring(curPos + 1, tmpPos - 1);
                      
try {
                        i 
= Integer.parseInt(t);
                        
if ((i >= 0&& (i < 65536)) {
                          c 
= (char) i;
                          curPos 
= tmpPos;
                        }
                      } 
catch (NumberFormatException e) {
                      }
                    }
                    
break;
                  }
                }
              }
            }
          } 
else if (isLetter(d)) {
            
while (tmpPos < maxPos) {
              d 
= s.charAt(tmpPos++);
              
if (!isLetterOrDigit(d)) {
                
if (d == ';') {
                  t 
= s.substring(curPos, tmpPos - 1);
        
                  ch 
= (Character) charTable.get(t);
                  
if (ch != null) {
                    c 
= ch.charValue();
                    curPos 
= tmpPos;
                  }
                }
                
break;
              }
            }
          }
        }
      }
      sb.append(c);
    }
    
return sb.toString();
  }

  
private static boolean isLetterOrDigit(char c) {
    
return isLetter(c) || isDigit(c);
  }

  
private static boolean isHexDigit(char c) {
    
return isHexLetter(c) || isDigit(c);
  }

  
private static boolean isLetter(char c) {
    
return ((c >= 'a'&& (c <= 'z')) || ((c >= 'A'&& (c <= 'Z'));
  }

  
private static boolean isHexLetter(char c) {
    
return ((c >= 'a'&& (c <= 'f')) || ((c >= 'A'&& (c <= 'F'));
  }

  
private static boolean isDigit(char c) {
    
return (c >= '0'&& (c <= '9');
  }

  
public static String compact(String s) {
    
int maxPos = s.length();
    StringBuffer sb 
= new StringBuffer(maxPos);
    
int curPos = 0;
    
while (curPos < maxPos) {
      
char c = s.charAt(curPos++);
      
if (isWhitespace(c)) {
        
while ((curPos < maxPos) && isWhitespace(s.charAt(curPos))) {
          curPos
++;
        }
        c 
= '\u0020';
      }
      sb.append(c);
    }
    
return sb.toString();
  }

  
// HTML is very particular about what constitutes white space.
  public static boolean isWhitespace(char ch) {
    
return (ch == '\u0020'|| (ch == '\r'|| (ch == '\n'|| (ch == '\u0009'|| (ch == '\u000c'|| (ch == '\u200b');
  }

  
static {
    charTable 
= new HashMap<String, Character>();
    charTable.put(
"quot"new Character((char34));
    charTable.put(
"amp"new Character((char38));
    charTable.put(
"apos"new Character((char39));
    charTable.put(
"lt"new Character((char60));
    charTable.put(
"gt"new Character((char62));
    charTable.put(
"nbsp"new Character((char32));
    charTable.put(
"iexcl"new Character((char161));
    charTable.put(
"cent"new Character((char162));
    charTable.put(
"pound"new Character((char163));
    charTable.put(
"curren"new Character((char164));
    charTable.put(
"yen"new Character((char165));
    charTable.put(
"brvbar"new Character((char166));
    charTable.put(
"sect"new Character((char167));
    charTable.put(
"uml"new Character((char168));
    charTable.put(
"copy"new Character((char169));
    charTable.put(
"ordf"new Character((char170));
    charTable.put(
"laquo"new Character((char171));
    charTable.put(
"not"new Character((char172));
    charTable.put(
"shy"new Character((char173));
    charTable.put(
"reg"new Character((char174));
    charTable.put(
"macr"new Character((char175));
    charTable.put(
"deg"new Character((char176));
    charTable.put(
"plusmn"new Character((char177));
    charTable.put(
"sup2"new Character((char178));
    charTable.put(
"sup3"new Character((char179));
    charTable.put(
"acute"new Character((char180));
    charTable.put(
"micro"new Character((char181));
    charTable.put(
"para"new Character((char182));
    charTable.put(
"middot"new Character((char183));
    charTable.put(
"cedil"new Character((char184));
    charTable.put(
"sup1"new Character((char185));
    charTable.put(
"ordm"new Character((char186));
    charTable.put(
"raquo"new Character((char187));
    charTable.put(
"frac14"new Character((char188));
    charTable.put(
"frac12"new Character((char189));
    charTable.put(
"frac34"new Character((char190));
    charTable.put(
"iquest"new Character((char191));
    charTable.put(
"Agrave"new Character((char192));
    charTable.put(
"Aacute"new Character((char193));
    charTable.put(
"Acirc"new Character((char194));
    charTable.put(
"Atilde"new Character((char195));
    charTable.put(
"Auml"new Character((char196));
    charTable.put(
"Aring"new Character((char197));
    charTable.put(
"AElig"new Character((char198));
    charTable.put(
"Ccedil"new Character((char199));
    charTable.put(
"Egrave"new Character((char200));
    charTable.put(
"Eacute"new Character((char201));
    charTable.put(
"Ecirc"new Character((char202));
    charTable.put(
"Euml"new Character((char203));
    charTable.put(
"Igrave"new Character((char204));
    charTable.put(
"Iacute"new Character((char205));
    charTable.put(
"Icirc"new Character((char206));
    charTable.put(
"Iuml"new Character((char207));
    charTable.put(
"ETH"new Character((char208));
    charTable.put(
"Ntilde"new Character((char209));
    charTable.put(
"Ograve"new Character((char210));
    charTable.put(
"Oacute"new Character((char211));
    charTable.put(
"Ocirc"new Character((char212));
    charTable.put(
"Otilde"new Character((char213));
    charTable.put(
"Ouml"new Character((char214));
    charTable.put(
"times"new Character((char215));
    charTable.put(
"Oslash"new Character((char216));
    charTable.put(
"Ugrave"new Character((char217));
    charTable.put(
"Uacute"new Character((char218));
    charTable.put(
"Ucirc"new Character((char219));
    charTable.put(
"Uuml"new Character((char220));
    charTable.put(
"Yacute"new Character((char221));
    charTable.put(
"THORN"new Character((char222));
    charTable.put(
"szlig"new Character((char223));
    charTable.put(
"agrave"new Character((char224));
    charTable.put(
"aacute"new Character((char225));
    charTable.put(
"acirc"new Character((char226));
    charTable.put(
"atilde"new Character((char227));
    charTable.put(
"auml"new Character((char228));
    charTable.put(
"aring"new Character((char229));
    charTable.put(
"aelig"new Character((char230));
    charTable.put(
"ccedil"new Character((char231));
    charTable.put(
"egrave"new Character((char232));
    charTable.put(
"eacute"new Character((char233));
    charTable.put(
"ecirc"new Character((char234));
    charTable.put(
"euml"new Character((char235));
    charTable.put(
"igrave"new Character((char236));
    charTable.put(
"iacute"new Character((char237));
    charTable.put(
"icirc"new Character((char238));
    charTable.put(
"iuml"new Character((char239));
    charTable.put(
"eth"new Character((char240));
    charTable.put(
"ntilde"new Character((char241));
    charTable.put(
"ograve"new Character((char242));
    charTable.put(
"oacute"new Character((char243));
    charTable.put(
"ocirc"new Character((char244));
    charTable.put(
"otilde"new Character((char245));
    charTable.put(
"ouml"new Character((char246));
    charTable.put(
"divide"new Character((char247));
    charTable.put(
"oslash"new Character((char248));
    charTable.put(
"ugrave"new Character((char249));
    charTable.put(
"uacute"new Character((char250));
    charTable.put(
"ucirc"new Character((char251));
    charTable.put(
"uuml"new Character((char252));
    charTable.put(
"yacute"new Character((char253));
    charTable.put(
"thorn"new Character((char254));
    charTable.put(
"yuml"new Character((char255));
    charTable.put(
"OElig"new Character((char338));
    charTable.put(
"oelig"new Character((char339));
    charTable.put(
"Scaron"new Character((char352));
    charTable.put(
"scaron"new Character((char353));
    charTable.put(
"fnof"new Character((char402));
    charTable.put(
"circ"new Character((char710));
    charTable.put(
"tilde"new Character((char732));
    charTable.put(
"Alpha"new Character((char913));
    charTable.put(
"Beta"new Character((char914));
    charTable.put(
"Gamma"new Character((char915));
    charTable.put(
"Delta"new Character((char916));
    charTable.put(
"Epsilon"new Character((char917));
    charTable.put(
"Zeta"new Character((char918));
    charTable.put(
"Eta"new Character((char919));
    charTable.put(
"Theta"new Character((char920));
    charTable.put(
"Iota"new Character((char921));
    charTable.put(
"Kappa"new Character((char922));
    charTable.put(
"Lambda"new Character((char923));
    charTable.put(
"Mu"new Character((char924));
    charTable.put(
"Nu"new Character((char925));
    charTable.put(
"Xi"new Character((char926));
    charTable.put(
"Omicron"new Character((char927));
    charTable.put(
"Pi"new Character((char928));
    charTable.put(
"Rho"new Character((char929));
    charTable.put(
"Sigma"new Character((char931));
    charTable.put(
"Tau"new Character((char932));
    charTable.put(
"Upsilon"new Character((char933));
    charTable.put(
"Phi"new Character((char934));
    charTable.put(
"Chi"new Character((char935));
    charTable.put(
"Psi"new Character((char936));
    charTable.put(
"Omega"new Character((char937));
    charTable.put(
"alpha"new Character((char945));
    charTable.put(
"beta"new Character((char946));
    charTable.put(
"gamma"new Character((char947));
    charTable.put(
"delta"new Character((char948));
    charTable.put(
"epsilon"new Character((char949));
    charTable.put(
"zeta"new Character((char950));
    charTable.put(
"eta"new Character((char951));
    charTable.put(
"theta"new Character((char952));
    charTable.put(
"iota"new Character((char953));
    charTable.put(
"kappa"new Character((char954));
    charTable.put(
"lambda"new Character((char955));
    charTable.put(
"mu"new Character((char956));
    charTable.put(
"nu"new Character((char957));
    charTable.put(
"xi"new Character((char958));
    charTable.put(
"omicron"new Character((char959));
    charTable.put(
"pi"new Character((char960));
    charTable.put(
"rho"new Character((char961));
    charTable.put(
"sigmaf"new Character((char962));
    charTable.put(
"sigma"new Character((char963));
    charTable.put(
"tau"new Character((char964));
    charTable.put(
"upsilon"new Character((char965));
    charTable.put(
"phi"new Character((char966));
    charTable.put(
"chi"new Character((char967));
    charTable.put(
"psi"new Character((char968));
    charTable.put(
"omega"new Character((char969));
    charTable.put(
"thetasym"new Character((char977));
    charTable.put(
"upsih"new Character((char978));
    charTable.put(
"piv"new Character((char982));
    charTable.put(
"ensp"new Character((char8194));
    charTable.put(
"emsp"new Character((char8195));
    charTable.put(
"thinsp"new Character((char8201));
    charTable.put(
"zwnj"new Character((char8204));
    charTable.put(
"zwj"new Character((char8205));
    charTable.put(
"lrm"new Character((char8206));
    charTable.put(
"rlm"new Character((char8207));
    charTable.put(
"ndash"new Character((char8211));
    charTable.put(
"mdash"new Character((char8212));
    charTable.put(
"lsquo"new Character((char8216));
    charTable.put(
"rsquo"new Character((char8217));
    charTable.put(
"sbquo"new Character((char8218));
    charTable.put(
"ldquo"new Character((char8220));
    charTable.put(
"rdquo"new Character((char8221));
    charTable.put(
"bdquo"new Character((char8222));
    charTable.put(
"dagger"new Character((char8224));
    charTable.put(
"Dagger"new Character((char8225));
    charTable.put(
"bull"new Character((char8226));
    charTable.put(
"hellip"new Character((char8230));
    charTable.put(
"permil"new Character((char8240));
    charTable.put(
"prime"new Character((char8242));
    charTable.put(
"Prime"new Character((char8243));
    charTable.put(
"lsaquo"new Character((char8249));
    charTable.put(
"rsaquo"new Character((char8250));
    charTable.put(
"oline"new Character((char8254));
    charTable.put(
"frasl"new Character((char8260));
    charTable.put(
"euro"new Character((char8364));
    charTable.put(
"image"new Character((char8465));
    charTable.put(
"weierp"new Character((char8472));
    charTable.put(
"real"new Character((char8476));
    charTable.put(
"trade"new Character((char8482));
    charTable.put(
"alefsym"new Character((char8501));
    charTable.put(
"larr"new Character((char8592));
    charTable.put(
"uarr"new Character((char8593));
    charTable.put(
"rarr"new Character((char8594));
    charTable.put(
"darr"new Character((char8595));
    charTable.put(
"harr"new Character((char8596));
    charTable.put(
"crarr"new Character((char8629));
    charTable.put(
"lArr"new Character((char8656));
    charTable.put(
"uArr"new Character((char8657));
    charTable.put(
"rArr"new Character((char8658));
    charTable.put(
"dArr"new Character((char8659));
    charTable.put(
"hArr"new Character((char8660));
    charTable.put(
"forall"new Character((char8704));
    charTable.put(
"part"new Character((char8706));
    charTable.put(
"exist"new Character((char8707));
    charTable.put(
"empty"new Character((char8709));
    charTable.put(
"nabla"new Character((char8711));
    charTable.put(
"isin"new Character((char8712));
    charTable.put(
"notin"new Character((char8713));
    charTable.put(
"ni"new Character((char8715));
    charTable.put(
"prod"new Character((char8719));
    charTable.put(
"sum"new Character((char8721));
    charTable.put(
"minus"new Character((char8722));
    charTable.put(
"lowast"new Character((char8727));
    charTable.put(
"radic"new Character((char8730));
    charTable.put(
"prop"new Character((char8733));
    charTable.put(
"infin"new Character((char8734));
    charTable.put(
"ang"new Character((char8736));
    charTable.put(
"and"new Character((char8743));
    charTable.put(
"or"new Character((char8744));
    charTable.put(
"cap"new Character((char8745));
    charTable.put(
"cup"new Character((char8746));
    charTable.put(
"int"new Character((char8747));
    charTable.put(
"there4"new Character((char8756));
    charTable.put(
"sim"new Character((char8764));
    charTable.put(
"cong"new Character((char8773));
    charTable.put(
"asymp"new Character((char8776));
    charTable.put(
"ne"new Character((char8800));
    charTable.put(
"equiv"new Character((char8801));
    charTable.put(
"le"new Character((char8804));
    charTable.put(
"ge"new Character((char8805));
    charTable.put(
"sub"new Character((char8834));
    charTable.put(
"sup"new Character((char8835));
    charTable.put(
"nsub"new Character((char8836));
    charTable.put(
"sube"new Character((char8838));
    charTable.put(
"supe"new Character((char8839));
    charTable.put(
"oplus"new Character((char8853));
    charTable.put(
"otimes"new Character((char8855));
    charTable.put(
"perp"new Character((char8869));
    charTable.put(
"sdot"new Character((char8901));
    charTable.put(
"lceil"new Character((char8968));
    charTable.put(
"rceil"new Character((char8969));
    charTable.put(
"lfloor"new Character((char8970));
    charTable.put(
"rfloor"new Character((char8971));
    charTable.put(
"lang"new Character((char9001));
    charTable.put(
"rang"new Character((char9002));
    charTable.put(
"loz"new Character((char9674));
    charTable.put(
"spades"new Character((char9824));
    charTable.put(
"clubs"new Character((char9827));
    charTable.put(
"hearts"new Character((char9829));
    charTable.put(
"diams"new Character((char9830));
  }
  
  
public static void main(String[]args)throws Exception{
      StringBuffer sb
=new StringBuffer();
      File file
=new File("e:\\temp\\统计表.htm");
      InputStream is
=new FileInputStream(file);
      BufferedReader br
=new BufferedReader(new InputStreamReader(is));
      String str
=null;
      
while((str=br.readLine())!=null){
          sb.append(str);
      }
      String tmp
=decode(sb.toString());
      tmp
=StringUtils.replace(tmp, "?""");
      System.out.println(tmp);
      
  }
  
  
}
posted on 2008-06-19 11:33 有猫相伴的日子 阅读(4006) 评论(3)  编辑  收藏 所属分类: jdk

评论:
# re: unicode 编码转换成汉字 2008-10-31 22:13 | wuzhikun
hao   回复  更多评论
  
# re: unicode 编码转换成汉字 2008-10-31 22:14 | wuzhikun
yao   回复  更多评论
  
# re: unicode 编码转换成汉字[未登录] 2012-08-23 23:25 | 菜鸟
不错,正用得上,万分感谢!  回复  更多评论
  

只有注册用户登录后才能发表评论。


网站导航:
 
本站不再更新,欢迎光临 java开发技术网