String acceptCharset = request.getHeader(ACCEPT_CHARSET); Charset charset = getBestCharset(acceptCharset!=null ? acceptCharset : getCharsetFromContentType(request.getHeader(CONTENT_TYPE), content)); private static Charset getBestCharset(String acceptCharset) { if (acceptCharset==null) return CharsetUtil.UTF_8; StringTokenizer st = new StringTokenizer(acceptCharset, ","); while (st.hasMoreTokens()) { try { return Charset.forName(st.nextToken().trim()); } catch (Exception exc) {}; } return CharsetUtil.UTF_8; } private static String getCharsetFromContentType(String ct, ChannelBuffer data) { ct = ct!=null ? ct.toLowerCase() : ""; int charsetPos = ct.indexOf("charset="); if (charsetPos!=-1) { for (int i=charsetPos+8; i < ct.length(); i++) { if (!Character.isLetterOrDigit(ct.charAt(i))) { return ct.substring(charsetPos+8, i); } } } if (data!=null) { int win1250_score = 0; int iso8859_2_score = 0; for (int i=0; i < data.readableBytes(); i++) { int b = (data.getByte(i) & 0xff); if (b == '%') { if (i+2 < data.readableBytes()) { try { b = Integer.rotateLeft(getHex(data.getByte(i+1)), 4) + getHex(data.getByte(i+2)); } catch (Exception parseExc) {} } } for (int k=0; k < WIN1250.length; k++) { if (b == WIN1250[k]) win1250_score++; else if (b == ISO_8859_2[k]) iso8859_2_score++; } } System.out.println(win1250_score+"/"+iso8859_2_score); if (win1250_score > 0 && win1250_score >= iso8859_2_score) return "windows-1250"; if (iso8859_2_score > 0 && iso8859_2_score >= win1250_score) return "iso-8859-2"; } return null; } private final static int getHex(byte b) { int c = b & 0xff; if (c >= '0' && c <= '9') return c-'0'; if (c >= 'a' && c <= 'f') return 10+c-'a'; if (c >= 'A' && c <= 'F') return 10+c-'A'; throw new RuntimeException("Char "+c+" is not valid hex"); } /* ąśźĄŚŹ */ private final static int WIN1250[] = { 185,156,159,165,140,143 }; private final static int ISO_8859_2[] = { 177,182,188,161,166,172 };
poniedziałek, grudnia 10, 2012
Wykrywanie kodowania znaków w requescie HTTP
Subskrybuj:
Komentarze do posta (Atom)
0 komentarze:
Prześlij komentarz