String acceptCharset = request.getHeader(ACCEPT_CHARSET);
Charset charset = getBestCharset(acceptCharset!=null ? acceptCharset :
getCharsetFromContentType(request.getHeader(CONTENT_TYPE), content));
private static Charset getBestCharset(String acceptCharset) {
if (acceptCharset==null)
return CharsetUtil.UTF_8;
StringTokenizer st = new StringTokenizer(acceptCharset, ",");
while (st.hasMoreTokens()) {
try {
return Charset.forName(st.nextToken().trim());
}
catch (Exception exc) {};
}
return CharsetUtil.UTF_8;
}
private static String getCharsetFromContentType(String ct, ChannelBuffer data) {
ct = ct!=null ? ct.toLowerCase() : "";
int charsetPos = ct.indexOf("charset=");
if (charsetPos!=-1) {
for (int i=charsetPos+8; i < ct.length(); i++) {
if (!Character.isLetterOrDigit(ct.charAt(i))) {
return ct.substring(charsetPos+8, i);
}
}
}
if (data!=null) {
int win1250_score = 0;
int iso8859_2_score = 0;
for (int i=0; i < data.readableBytes(); i++) {
int b = (data.getByte(i) & 0xff);
if (b == '%') {
if (i+2 < data.readableBytes()) {
try {
b = Integer.rotateLeft(getHex(data.getByte(i+1)), 4) + getHex(data.getByte(i+2));
}
catch (Exception parseExc) {}
}
}
for (int k=0; k < WIN1250.length; k++) {
if (b == WIN1250[k])
win1250_score++;
else if (b == ISO_8859_2[k])
iso8859_2_score++;
}
}
System.out.println(win1250_score+"/"+iso8859_2_score);
if (win1250_score > 0 && win1250_score >= iso8859_2_score)
return "windows-1250";
if (iso8859_2_score > 0 && iso8859_2_score >= win1250_score)
return "iso-8859-2";
}
return null;
}
private final static int getHex(byte b) {
int c = b & 0xff;
if (c >= '0' && c <= '9')
return c-'0';
if (c >= 'a' && c <= 'f')
return 10+c-'a';
if (c >= 'A' && c <= 'F')
return 10+c-'A';
throw new RuntimeException("Char "+c+" is not valid hex");
}
/* ąśźĄŚŹ */
private final static int WIN1250[] = { 185,156,159,165,140,143 };
private final static int ISO_8859_2[] = { 177,182,188,161,166,172 };
poniedziałek, grudnia 10, 2012
Wykrywanie kodowania znaków w requescie HTTP
Subskrybuj:
Komentarze do posta (Atom)
0 komentarze:
Prześlij komentarz