Path p1 = Files.newDirectoryStream(Paths.get("/home/user/jdk/test"))
.iterator().next();
System.out.println("API 1.7: "+p1.toUri().getPath());
System.out.println("API 1.7: "+p1.toFile().getAbsolutePath());
String p2 = new java.io.File("/home/user/jdk/test").list()[0];
System.out.println("API 1.4: "+p2);
}
user@user-Aspire-4530:~/workspace/TestApp/bin$ LC_ALL="pl_PL.cp1250" java -Dfile.encoding=utf8 TestApp
API 1.7: /home/user/jdk/test/gżegżółka-ąęćśń
API 1.7: /home/user/jdk/test/gżegżółka-ąęćśń
API 1.4: g��eg������ka-����������
user@user-Aspire-4530:~/workspace/TestApp/bin$ LC_ALL="pl_PL.cp1250" java -Dfile.encoding=cp1250 TestApp
API 1.7: /home/user/jdk/test/g�eg���ka-����
API 1.7: /home/user/jdk/test/gżegżółka-ąęćśń
API 1.4: g??eg??????ka-??????????
user@user-Aspire-4530:~/workspace/TestApp/bin$ LC_ALL="pl_PL.utf-8" java -Dfile.encoding=cp1250 TestApp
API 1.7: /home/user/jdk/test/g�eg���ka-����
API 1.7: /home/user/jdk/test/gżegżółka-ąęćśń
API 1.4: g�eg���ka-����
user@user-Aspire-4530:~/workspace/TestApp/bin$ LC_ALL="en_US" java -Dfile.encoding=utf8 TestApp
API 1.7: /home/user/jdk/test/gżegżółka-ąęćśń
API 1.7: /home/user/jdk/test/gżegżółka-ąęćśń
API 1.4: g��eg������ka-����������
user@user-Aspire-4530:~/workspace/TestApp/bin$ LC_ALL="en_US" java TestApp
API 1.7: /home/user/jdk/test/g?eg???ka-?????
API 1.7: /home/user/jdk/test/g??eg??????ka-??????????
API 1.4: g??eg??????ka-??????????
user@user-Aspire-4530:~/workspace/TestApp/bin$ LC_ALL="pl_PL.utf8" java -Dfile.encoding=utf8 TestApp
API 1.7: /home/user/jdk/test/gżegżółka-ąęćśń
API 1.7: /home/user/jdk/test/gżegżółka-ąęćśń
API 1.4: gżegżółka-ąęćśń
Why there are differences?
sun.nio.fs.UnixDirectoryStreamIterator creates String from bytes with default JVM encoding:
private Path readNextEntry() {
assert Thread.holdsLock(this);
for (;;) {
byte[] nameAsBytes = null;
// prevent close while reading
readLock().lock();
try {
if (isOpen()) {
nameAsBytes = readdir(dp);
}
} catch (UnixException x) {
IOException ioe = x.asIOException(dir);
throw new DirectoryIteratorException( ioe);
} finally {
readLock().unlock();
}
...
File.list() is a native C/C++ method (suprise!):
JNIEXPORT jobjectArray JNICALL
JNIEXPORT jobjectArray JNICALL
Java_java_io_UnixFileSystem_ list(JNIEnv *env, jobject this,
jobject file)
{
DIR *dir = NULL;
struct dirent64 *ptr;
struct dirent64 *result;
int len, maxlen;
jobjectArray rv, old;
WITH_FIELD_PLATFORM_STRING( env, file, ids.path, path) {
dir = opendir(path);
} END_PLATFORM_STRING(env, path);
if (dir == NULL) return NULL;
ptr = malloc(sizeof(struct dirent64) + (PATH_MAX + 1));
if (ptr == NULL) {
JNU_ThrowOutOfMemoryError(env, "heap allocation failed");
closedir(dir);
return NULL;
}
/* Allocate an initial String array */
len = 0;
maxlen = 16;
rv = (*env)->NewObjectArray(env, maxlen, JNU_ClassString(env), NULL);
if (rv == NULL) goto error;
/* Scan the directory */
while ((readdir64_r(dir, ptr, &result) == 0) && (result != NULL)) {
jstring name;
if (!strcmp(ptr->d_name, ".") || !strcmp(ptr->d_name, ".."))
continue;
if (len == maxlen) {
old = rv;
rv = (*env)->NewObjectArray(env, maxlen <<= 1,
JNU_ClassString(env), NULL);
if (rv == NULL) goto error;
if (JNU_CopyObjectArray(env, rv, old, len) < 0) goto error;
(*env)->DeleteLocalRef(env, old);
}
name = JNU_NewStringPlatform(env, ptr->d_name);
...
JNIEXPORT jstring JNICALL
JNU_NewStringPlatform(JNIEnv *env, const char *str)
{
jstring result;
result = nativeNewStringPlatform(env, str);
if (result == NULL) {
jbyteArray hab = 0;
int len;
if (fastEncoding == NO_ENCODING_YET)
initializeEncoding(env);
if ((fastEncoding == FAST_8859_1) || (fastEncoding == NO_ENCODING_YET))
return newString8859_1(env, str);
if (fastEncoding == FAST_646_US)
return newString646_US(env, str);
if (fastEncoding == FAST_CP1252)
return newStringCp1252(env, str);
if ((*env)->EnsureLocalCapacity(env, 2) < 0)
return NULL;
len = (int)strlen(str);
hab = (*env)->NewByteArray(env, len);
if (hab != 0) {
(*env)->SetByteArrayRegion(env, hab, 0, len, (jbyte *)str);
if (jnuEncodingSupported(env)) {
result = (*env)->NewObject(env, JNU_ClassString(env),
String_init_ID, hab, jnuEncoding);
} else {
/*If the encoding specified in sun.jnu.encoding is not endorsed
by "Charset.isSupported" we have to fall back to use String(byte[])
explicitly here without specifying the encoding name, in which the
StringCoding class will pickup the iso-8859-1 as the fallback
converter for us.
*/
jmethodID mid = (*env)->GetMethodID(env, JNU_ClassString(env),
"", "([B)V");
result = (*env)->NewObject(env, JNU_ClassString(env), mid, hab);
}
(*env)->DeleteLocalRef(env, hab);
return result;
}
}
return NULL;
}
Conclusions: When you get file names of everything other than native filesystem, you may have damaged strings. It they are not damaged, but in different UTF-8 representations you can translate them with Normalizer.
JNIEXPORT jstring JNICALL
JNU_NewStringPlatform(JNIEnv *env, const char *str)
{
jstring result;
result = nativeNewStringPlatform(env, str);
if (result == NULL) {
jbyteArray hab = 0;
int len;
if (fastEncoding == NO_ENCODING_YET)
initializeEncoding(env);
if ((fastEncoding == FAST_8859_1) || (fastEncoding == NO_ENCODING_YET))
return newString8859_1(env, str);
if (fastEncoding == FAST_646_US)
return newString646_US(env, str);
if (fastEncoding == FAST_CP1252)
return newStringCp1252(env, str);
if ((*env)->EnsureLocalCapacity(env, 2) < 0)
return NULL;
len = (int)strlen(str);
hab = (*env)->NewByteArray(env, len);
if (hab != 0) {
(*env)->SetByteArrayRegion(env, hab, 0, len, (jbyte *)str);
if (jnuEncodingSupported(env)) {
result = (*env)->NewObject(env, JNU_ClassString(env),
String_init_ID, hab, jnuEncoding);
} else {
/*If the encoding specified in sun.jnu.encoding is not endorsed
by "Charset.isSupported" we have to fall back to use String(byte[])
explicitly here without specifying the encoding name, in which the
StringCoding class will pickup the iso-8859-1 as the fallback
converter for us.
*/
jmethodID mid = (*env)->GetMethodID(env, JNU_ClassString(env),
"
result = (*env)->NewObject(env, JNU_ClassString(env), mid, hab);
}
(*env)->DeleteLocalRef(env, hab);
return result;
}
}
return NULL;
}
Conclusions: When you get file names of everything other than native filesystem, you may have damaged strings. It they are not damaged, but in different UTF-8 representations you can translate them with Normalizer.
0 komentarze:
Prześlij komentarz