赛迪网 > IT技术 Java > 技术动态
  IT资讯搜索
 
IT产品搜索
[程序开发][网管世界][网络安全][数据库技术]
[操作系统][嘉宾聊天·在线访谈][活动集锦]
[精彩专题][Symantec专区][订阅IT技术周刊]
[开发论坛][网管论坛][安全论坛][数据库论坛]
[操作系统论坛][Sybase专区][IBM dW技术专区]
[病毒求助][病毒与漏洞播报][文档·源码下载]

从GB2312 到 Unicode转换表制作程式

发布时间:2007.04.10 05:06     来源:和荣笔记    作者:和荣笔记

本文摘自“和荣笔记 - GB2312 字符集与编码对照表”

简体中文有两种常用编码:GB2312 和 Unicode. 它们的对应关系可以用

下面的程式所生成:

/** 
 * GB2312Unicde.java 
 * Copyright (c) 2003 by Dr. Herong Yang, http://www.herongyang.com/ 
 */ 
import java.io.*; 
import java.nio.*; 
import java.nio.charset.*; 
class GB2312Unicde { 
   static OutputStream out = null; 
   static char hexDigit[] = {'0', '1', '2', '3', '4', '5', '6', '7', 
                             '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; 
   static int b_out[] = {201,267,279,293,484,587,625,657,734,782,827, 
      874,901,980,5590}; 
   static int e_out[] = {216,268,280,294,494,594,632,694,748,794,836, 
      894,903,994,5594}; 
   public static void main(String[] args) { 
      try { 
         out = new FileOutputStream("gb2312.gb"); 
         writeCode(); 
         out.close(); 
      } catch (IOException e) { 
         System.out.println(e.toString()); 
      } 
   } 
   public static void writeCode() throws IOException { 
      boolean reserved = false; 
      String name = null; 
      // GB2312 is not supported by JDK. So I am using GBK. 
      CharsetDecoder gbdc = Charset.forName("GBK").newDecoder(); 
      CharsetEncoder uxec = Charset.forName("UTF-16BE").newEncoder(); 
      CharsetEncoder u8ec = Charset.forName("UTF-8").newEncoder(); 
      ByteBuffer gbbb = null; 
      ByteBuffer uxbb = null; 
      ByteBuffer u8bb = null; 
      CharBuffer cb = null; 
      int count = 0; 
      for (int i=1; i<=94; i++) { 
         // Defining row settings 
         if (i>=1 && i<=9) { 
            reserved = false; 
            name = "Graphic symbols"; 
         } else if (i>=10 && i<=15) { 
            reserved = true; 
            name = "Reserved"; 
         } else if (i>=16 && i<=55) { 
            reserved = false; 
            name = "Level 1 characters"; 
         } else if (i>=56 && i<=87) { 
            reserved = false; 
            name = "Level 2 characters"; 
         } else if (i>=88 && i<=94) { 
            reserved = true; 
            name = "Reserved"; 
         } 
         // writing row title 
         writeln(); 
         writeString("<p>"); 
         writeNumber(i); 
         writeString(" Row: "+name); 
         writeln(); 
         writeString("</p>"); 
         writeln(); 
         if (!reserved) { 
            writeln(); 
            writeHeader(); 
           // looping through all characters in one row 
            for (int j=1; j<=94; j++) { 
               byte hi = (byte)(0xA0 + i); 
               byte lo = (byte)(0xA0 + j); 
               if (validGB(i,j)) { 
                  // getting GB, UTF-16BE, UTF-8 codes 
                  gbbb = ByteBuffer.wrap(new byte[]{hi,lo}); 
                  try { 
                     cb = gbdc.decode(gbbb); 
                     uxbb = uxec.encode(cb); 
                     cb.rewind(); 
                     u8bb = u8ec.encode(cb); 
                  } catch (CharacterCodingException e) { 
                     cb = null; 
                     uxbb = null; 
                     u8bb = null; 
                  } 
               } else { 
                  cb = null; 
                  uxbb = null; 
                  u8bb = null; 
               } 
               writeNumber(i); 
               writeNumber(j); 
               writeString(" "); 
               if (cb!=null) { 
                  writeByte(hi); 
                  writeByte(lo); 
                  writeString(" "); 
                  writeHex(hi); 
                  writeHex(lo); 
                  count++; 
               } else { 
                  writeGBSpace(); 
                  writeString(" null"); 
               } 
               writeString(" "); 
               writeByteBuffer(uxbb,2); 
               writeString(" "); 
               writeByteBuffer(u8bb,3); 
               if (j%2 == 0) { 
                  writeln(); 
               } else { 
                  writeString("   "); 
               } 
            } 
            writeFooter(); 
         } 
      } 
      System.out.println("Number of GB characters worte: "+count); 
   } 
   public static void writeln() throws IOException { 
      out.write(0x0D); 
      out.write(0x0A); 
   } 
   public static void writeByte(byte b) throws IOException { 
      out.write(b & 0xFF); 
   } 
   public static void writeByteBuffer(ByteBuffer b, int l) 
      throws IOException { 
      int i = 0; 
      if (b==null) { 
           writeString("null"); 
           i = 2; 
      } else { 
    for (i=0; i<b.limit(); i++) writeHex(b.get(i)); 
      } 
      for (int j=i; j<l; j++) writeString("  "); 
   } 
   public static void writeGBSpace() throws IOException { 
      out.write(0xA1); 
      out.write(0xA1); 
   } 
   public static void writeString(String s) throws IOException { 
      if (s!=null) { 
         for (int i=0; i<s.length(); i++) { 
            out.write((int) (s.charAt(i) & 0xFF)); 
         } 
      }          
   } 
   public static void writeNumber(int i) throws IOException { 
      String s = "00" + String.valueOf(i); 
      writeString(s.substring(s.length()-2,s.length())); 
   } 
   public static void writeHex(byte b) throws IOException { 
      out.write((int) hexDigit[(b >> 4) & 0x0F]); 
      out.write((int) hexDigit[b & 0x0F]); 
   } 
   public static void writeHeader() throws IOException { 
      writeString("<pre>"); 
      writeln(); 
      writeString("Q.W. "); 
      writeGBSpace(); 
      writeString(" GB   Uni. UTF-8 "); 
      writeString("   "); 
      writeString("Q.W. "); 
      writeGBSpace(); 
      writeString(" GB   Uni. UTF-8 "); 
      writeln(); 
      writeln(); 
   } 
   public static void writeFooter() throws IOException { 
      writeString("</pre>"); 
      writeln(); 
   } 
   public static boolean validGB(int i,int j) { 
      for (int l=0; l<b_out.length; l++) { 
         if (i*100+j>=b_out[l] && i*100+j<=e_out[l]) return false;  
      } 
      return true; 
   } 
}

程式输出的例表格式如下:

Q.W.   GB Uni. UTF-8 Q.W.   GB Uni. UTF-8

1601 啊 B0A1 554A E5958A 1602 阿 B0A2 963F E998BF

1603 埃 B0A3 57C3 E59F83 1604 挨 B0A4 6328 E68CA8

1605 哎 B0A5 54CE E5938E 1606 唉 B0A6 5509 E59489

1607 哀 B0A7 54C0 E59380 1608 皑 B0A8 7691 E79A91

1609 癌 B0A9 764C E7998C 1610 蔼 B0AA 853C E894BC

1611 矮 B0AB 77EE E79FAE 1612 艾 B0AC 827E E889BE

1613 碍 B0AD 788D E7A28D 1614 爱 B0AE 7231 E788B1

1615 隘 B0AF 9698 E99A98 1616 鞍 B0B0 978D E99E8D

1617 氨 B0B1 6C28 E6B0A8 1618 安 B0B2 5B89 E5AE89

1619 俺 B0B3 4FFA E4BFBA 1620 按 B0B4 6309 E68C89

1621 暗 B0B5 6697 E69A97 1622 岸 B0B6 5CB8 E5B2B8

1623 胺 B0B7 80FA E883BA 1624 案 B0B8 6848 E6A188

1625 肮 B0B9 80AE E882AE 1626 昂 B0BA 6602 E69882

1627 盎 B0BB 76CE E79B8E 1628 凹 B0BC 51F9 E587B9

1629 敖 B0BD 6556 E69596 1630 熬 B0BE 71AC E786AC

1631 翱 B0BF 7FF1 E7BFB1 1632 袄 B0C0 8884 E8A284

1633 傲 B0C1 50B2 E582B2 1634 奥 B0C2 5965 E5A5A5

1635 懊 B0C3 61CA E6878A 1636 澳 B0C4 6FB3 E6BEB3

。。。

(t006)


[ 发表评论 ] 字体[  ] [ 打印 ] [ 进入博客 ] [ 进入论坛 ]  [ 推荐给朋友 ]
  相关文章
· JAVA基础: 中国年历算法和程式(代码) (04-09) · JAVA基础应用:java日期加减法(代码) (04-09)
· JAVA进阶: 优化Derby数据库程序性能 (04-09) · JAVA基础应用: 在Java中使用VC++组件 (04-09)
· JAVA基础: 响应鼠标点击事件的小程序 (04-04) · 使用JMX监控应用程序内、 外部的状况 (04-03)
· JAVA进阶: 如何用Java实现Web服务器 (04-03) · 如何用Java操作Word, Excel, PDF文档 (04-03)
· JAVA高级编程:编写跨平台程序注意事项 (04-03) · JAVA进阶:提高代码可重用性的三个措施 (04-03)
  客户需求反馈表
* 姓  名:
更多资料  了解方案  认识厂商
* 单位名称:
* 联系电话:
* 电子邮件:
  赛迪推荐  
  手机·资费 ·新品·导购·评测·手机资费·宽带
手机搜索  诺基亚 N73 MOTO Z6
  IT产品 ·笔记本·台式机·服务器·打印·投影
IT产品搜索 
  IT技术 ·开发·网管·安全·数据库·操作系统
  信息化 ·热点·专题·访谈·周刊·方案案例
· 北京新规不能霸王硬上弓 网店牌照缓期执行
· 软件外包之变的新台阶: 提高全球交付能力
· ERP案例分析 SaaS带来冲击 IT服务商面临挑战
· 通方期货CRM解决方案 房地产行业CRM解决方案
  IT博客 ·曾剑秋·项立刚·Java学习·网管
  IT技术论坛 ·开发·网管·安全·数据库·系统