| /* |
| * Copyright (C) 2009 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include <media/mediascanner.h> |
| |
| #include <utils/StringArray.h> |
| |
| #include "autodetect.h" |
| #include "unicode/ucnv.h" |
| #include "unicode/ustring.h" |
| |
| namespace android { |
| |
| MediaScannerClient::MediaScannerClient() |
| : mNames(NULL), |
| mValues(NULL), |
| mLocaleEncoding(kEncodingNone) |
| { |
| } |
| |
| MediaScannerClient::~MediaScannerClient() |
| { |
| delete mNames; |
| delete mValues; |
| } |
| |
| void MediaScannerClient::setLocale(const char* locale) |
| { |
| if (!locale) return; |
| |
| if (!strncmp(locale, "ja", 2)) |
| mLocaleEncoding = kEncodingShiftJIS; |
| else if (!strncmp(locale, "ko", 2)) |
| mLocaleEncoding = kEncodingEUCKR; |
| else if (!strncmp(locale, "zh", 2)) { |
| if (!strcmp(locale, "zh_CN")) { |
| // simplified chinese for mainland China |
| mLocaleEncoding = kEncodingGBK; |
| } else { |
| // assume traditional for non-mainland Chinese locales (Taiwan, Hong Kong, Singapore) |
| mLocaleEncoding = kEncodingBig5; |
| } |
| } |
| } |
| |
| void MediaScannerClient::beginFile() |
| { |
| mNames = new StringArray; |
| mValues = new StringArray; |
| } |
| |
| bool MediaScannerClient::addStringTag(const char* name, const char* value) |
| { |
| // don't bother caching strings that are all ASCII. |
| // call handleStringTag directly instead. |
| // check to see if value (which should be utf8) has any non-ASCII characters |
| bool nonAscii = false; |
| const char* chp = value; |
| char ch; |
| while ((ch = *chp++)) { |
| if (ch & 0x80) { |
| nonAscii = true; |
| break; |
| } |
| } |
| |
| if (nonAscii) { |
| // save the strings for later so they can be used for native encoding detection |
| mNames->push_back(name); |
| mValues->push_back(value); |
| return true; |
| } |
| // else fall through |
| |
| // autodetection is not necessary, so no need to cache the values |
| // pass directly to the client instead |
| return handleStringTag(name, value); |
| } |
| |
| static uint32_t possibleEncodings(const char* s) |
| { |
| uint32_t result = kEncodingAll; |
| // if s contains a native encoding, then it was mistakenly encoded in utf8 as if it were latin-1 |
| // so we need to reverse the latin-1 -> utf8 conversion to get the native chars back |
| uint8_t ch1, ch2; |
| uint8_t* chp = (uint8_t *)s; |
| |
| while ((ch1 = *chp++)) { |
| if (ch1 & 0x80) { |
| ch2 = *chp++; |
| ch1 = ((ch1 << 6) & 0xC0) | (ch2 & 0x3F); |
| // ch1 is now the first byte of the potential native char |
| |
| ch2 = *chp++; |
| if (ch2 & 0x80) |
| ch2 = ((ch2 << 6) & 0xC0) | (*chp++ & 0x3F); |
| // ch2 is now the second byte of the potential native char |
| int ch = (int)ch1 << 8 | (int)ch2; |
| result &= findPossibleEncodings(ch); |
| } |
| // else ASCII character, which could be anything |
| } |
| |
| return result; |
| } |
| |
| void MediaScannerClient::convertValues(uint32_t encoding) |
| { |
| const char* enc = NULL; |
| switch (encoding) { |
| case kEncodingShiftJIS: |
| enc = "shift-jis"; |
| break; |
| case kEncodingGBK: |
| enc = "gbk"; |
| break; |
| case kEncodingBig5: |
| enc = "Big5"; |
| break; |
| case kEncodingEUCKR: |
| enc = "EUC-KR"; |
| break; |
| } |
| |
| if (enc) { |
| UErrorCode status = U_ZERO_ERROR; |
| |
| UConverter *conv = ucnv_open(enc, &status); |
| if (U_FAILURE(status)) { |
| LOGE("could not create UConverter for %s\n", enc); |
| return; |
| } |
| UConverter *utf8Conv = ucnv_open("UTF-8", &status); |
| if (U_FAILURE(status)) { |
| LOGE("could not create UConverter for UTF-8\n"); |
| ucnv_close(conv); |
| return; |
| } |
| |
| // for each value string, convert from native encoding to UTF-8 |
| for (int i = 0; i < mNames->size(); i++) { |
| // first we need to untangle the utf8 and convert it back to the original bytes |
| // since we are reducing the length of the string, we can do this in place |
| uint8_t* src = (uint8_t *)mValues->getEntry(i); |
| int len = strlen((char *)src); |
| uint8_t* dest = src; |
| |
| uint8_t uch; |
| while ((uch = *src++)) { |
| if (uch & 0x80) |
| *dest++ = ((uch << 6) & 0xC0) | (*src++ & 0x3F); |
| else |
| *dest++ = uch; |
| } |
| *dest = 0; |
| |
| // now convert from native encoding to UTF-8 |
| const char* source = mValues->getEntry(i); |
| int targetLength = len * 3 + 1; |
| char* buffer = new char[targetLength]; |
| if (!buffer) |
| break; |
| char* target = buffer; |
| |
| ucnv_convertEx(utf8Conv, conv, &target, target + targetLength, |
| &source, (const char *)dest, NULL, NULL, NULL, NULL, TRUE, TRUE, &status); |
| if (U_FAILURE(status)) { |
| LOGE("ucnv_convertEx failed: %d\n", status); |
| mValues->setEntry(i, "???"); |
| } else { |
| // zero terminate |
| *target = 0; |
| mValues->setEntry(i, buffer); |
| } |
| |
| delete[] buffer; |
| } |
| |
| ucnv_close(conv); |
| ucnv_close(utf8Conv); |
| } |
| } |
| |
| void MediaScannerClient::endFile() |
| { |
| int size = mNames->size(); |
| uint32_t encoding = kEncodingAll; |
| |
| // compute a bit mask containing all possible encodings |
| for (int i = 0; i < mNames->size(); i++) |
| encoding &= possibleEncodings(mValues->getEntry(i)); |
| |
| // If one of the possible encodings matches the locale encoding, use that. |
| // Otherwise, if there is only one possible encoding, use that. |
| if (encoding & mLocaleEncoding) |
| convertValues(mLocaleEncoding); |
| else if ((encoding & (encoding - 1)) == 0) |
| convertValues(encoding); |
| else { |
| // TODO: try harder to disambiguate the encoding, perhaps by looking at |
| // other files by same artist, or even the user's entire collection. |
| // For now, fall through and insert the strings as they are. |
| } |
| |
| // finally, push all name/value pairs to the client |
| for (int i = 0; i < mNames->size(); i++) { |
| if (!handleStringTag(mNames->getEntry(i), mValues->getEntry(i))) |
| break; |
| } |
| // else addStringTag() has done all the work so we have nothing to do |
| |
| delete mNames; |
| delete mValues; |
| mNames = NULL; |
| mValues = NULL; |
| } |
| |
| } // namespace android |
| |