URI's UTF8 to GB*
Author
Zhou Renjian
Create@
2005-08-08 15:22
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
/**
* $Log: HTMLEncoding.java,v $
* Revision 1.1 2005/08/04 09:46:24 zhourj
* 实现替换HTML中对UTF的地址为GBK
*
*/
public class HTMLEncoding {
public static void convertURI2GBK(Reader reader, Writer writer) throws IOException {
StringBuffer firstLine = new StringBuffer();
StringBuffer nextLine = new StringBuffer();
char[] buf = new char[1024];
int read = 0;
while (firstLine.length() < 512) {
read = reader.read(buf);
if (read == -1) {
break;
}
firstLine.append(buf, 0, read);
}
boolean isEncodingReplace = false;
while (read != -1) {
while (nextLine.length() < 512) {
read = reader.read(buf);
if (read == -1) {
break;
}
nextLine.append(buf, 0, read);
}
String currentStr = firstLine.toString() + nextLine.toString();
if (!isEncodingReplace) {
String contentStr = currentStr.replaceFirst("charset=UTF-8", "charset=utf-8"); //$NON-NLS-1$//$NON-NLS-2$
if (!contentStr.equals(currentStr)) {
isEncodingReplace = true;
currentStr = contentStr;
}
}
if (currentStr.indexOf('%') != -1) {
int i = 0;
int length = currentStr.length();
StringBuffer buffer = new StringBuffer();
while (i < length - 9) {
if (!(currentStr.charAt(i) == '%' && i + 6 < length
&& currentStr.charAt(i + 3) == '%'
&& currentStr.charAt(i + 6) == '%')) {
buffer.append(currentStr.charAt(i));
} else {
byte[] chars = new byte[3];
chars[0] = (byte) (getCharHex(currentStr.charAt(i + 1)) * 16);
chars[0] += getCharHex(currentStr.charAt(i + 2));
chars[1] = (byte) (getCharHex(currentStr.charAt(i + 4)) * 16);
chars[1] += getCharHex(currentStr.charAt(i + 5));
chars[2] = (byte) (getCharHex(currentStr.charAt(i + 7)) * 16);
chars[2] += getCharHex(currentStr.charAt(i + 8));
// byte[] bytes = new String(chars, "utf-8").getBytes("gbk"); //$NON-NLS-1$//$NON-NLS-2$
// buf.append('%');
// buf.append(byte2HexStr(bytes[0]));
// buf.append('%');
// buf.append(byte2HexStr(bytes[1]));
buffer.append(new String(chars, "utf-8").charAt(0)); //$NON-NLS-1$
i += 8;
}
i++;
}
writer.write(buffer.toString());
firstLine.delete(0, firstLine.length());
firstLine.append(currentStr.substring(i));
nextLine.delete(0, nextLine.length());
} else {
writer.write(firstLine.toString());
firstLine.delete(0, firstLine.length());
firstLine.append(nextLine);
nextLine.delete(0, nextLine.length());
}
}
}
public static byte getCharHex(char c1) {
if (c1 >= '0' && c1 <= '9') {
return (byte) (c1 - '0');
} else if (c1 >= 'a' && c1 <= 'f') {
return (byte) (c1 - 'a' + 10);
} else if (c1 >= 'A' && c1 <= 'F') {
return (byte) (c1 - 'A' + 10);
}
return 0;
}
public static String decodeURI(String lastAnchor) throws UnsupportedEncodingException {
if (lastAnchor.indexOf('%') != -1) {
int i = 0;
int length = lastAnchor.length();
StringBuffer buf = new StringBuffer();
while (i < length) {
if (lastAnchor.charAt(i) != '%') {
buf.append(lastAnchor.charAt(i));
} else {
byte[] chars = new byte[3];
chars[0] = (byte) (getCharHex(lastAnchor.charAt(i + 1)) * 16);
chars[0] += getCharHex(lastAnchor.charAt(i + 2));
chars[1] = (byte) (getCharHex(lastAnchor.charAt(i + 4)) * 16);
chars[1] += getCharHex(lastAnchor.charAt(i + 5));
chars[2] = (byte) (getCharHex(lastAnchor.charAt(i + 7)) * 16);
chars[2] += getCharHex(lastAnchor.charAt(i + 8));
buf.append(new String(chars, "utf-8").charAt(0)); //$NON-NLS-1$
i += 8;
}
i++;
}
lastAnchor = buf.toString();
}
return lastAnchor;
}
public static String decodeURI2(String lastAnchor) throws UnsupportedEncodingException {
if (lastAnchor.indexOf('%') != -1) {
int i = 0;
int length = lastAnchor.length();
StringBuffer buf = new StringBuffer();
while (i < length) {
if (lastAnchor.charAt(i) != '%') {
buf.append(lastAnchor.charAt(i));
} else {
byte[] chars = new byte[2];
chars[0] = (byte) (getCharHex(lastAnchor.charAt(i + 1)) * 16);
chars[0] += getCharHex(lastAnchor.charAt(i + 2));
chars[1] = (byte) (getCharHex(lastAnchor.charAt(i + 4)) * 16);
chars[1] += getCharHex(lastAnchor.charAt(i + 5));
buf.append(new String(chars, "gbk").charAt(0)); //$NON-NLS-1$
i += 5;
}
i++;
}
lastAnchor = buf.toString();
}
return lastAnchor;
}
public static String byte2HexStr(byte b) {
int i = b;
if (i < 0) {
i += 256;
}
int h = i / 16;
int l = i % 16;
StringBuffer buf = new StringBuffer();
if (h < 10) {
char c = '0';
c += h;
buf.append(c);
} else {
char c = 'A';
c += h - 10;
buf.append(c);
}
if (l < 10) {
char c = '0';
c += l;
buf.append(c);
} else {
char c = 'A';
c += l - 10;
buf.append(c);
}
System.out.println(buf.toString());
return buf.toString();
}
public static String decodeURI2GBK(String lastAnchor) throws UnsupportedEncodingException {
if (lastAnchor.indexOf('%') != -1) {
int i = 0;
int length = lastAnchor.length();
StringBuffer buf = new StringBuffer();
while (i < length) {
if (!(lastAnchor.charAt(i) == '%' && i + 6 < length
&& lastAnchor.charAt(i + 3) == '%'
&& lastAnchor.charAt(i + 6) == '%')) {
buf.append(lastAnchor.charAt(i));
} else {
byte[] chars = new byte[3];
chars[0] = (byte) (getCharHex(lastAnchor.charAt(i + 1)) * 16);
chars[0] += getCharHex(lastAnchor.charAt(i + 2));
chars[1] = (byte) (getCharHex(lastAnchor.charAt(i + 4)) * 16);
chars[1] += getCharHex(lastAnchor.charAt(i + 5));
chars[2] = (byte) (getCharHex(lastAnchor.charAt(i + 7)) * 16);
chars[2] += getCharHex(lastAnchor.charAt(i + 8));
// byte[] bytes = new String(chars, "utf-8").getBytes("gbk"); //$NON-NLS-1$//$NON-NLS-2$
// buf.append('%');
// buf.append(byte2HexStr(bytes[0]));
// buf.append('%');
// buf.append(byte2HexStr(bytes[1]));
buf.append(new String(chars, "utf-8").charAt(0)); //$NON-NLS-1$
i += 8;
}
i++;
}
lastAnchor = buf.toString();
}
return lastAnchor;
}
}