| /** |
| * \file unicode.c |
| * |
| * This file contains general Unicode string manipulation functions. |
| * It mainly consist of functions for converting between UCS-2 (used on |
| * the devices) and UTF-8 (used by several applications). |
| * |
| * For a deeper understanding of Unicode encoding formats see the |
| * Wikipedia entries for |
| * <a href="http://en.wikipedia.org/wiki/UTF-16/UCS-2">UTF-16/UCS-2</a> |
| * and <a href="http://en.wikipedia.org/wiki/UTF-8">UTF-8</a>. |
| * |
| * Copyright (C) 2005-2007 Linus Walleij <triad@df.lth.se> |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with this library; if not, write to the |
| * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| * Boston, MA 02111-1307, USA. |
| * |
| */ |
| |
| #include <stdlib.h> |
| #include <string.h> |
| #include <iconv.h> |
| #include "libmtp.h" |
| #include "unicode.h" |
| #include "util.h" |
| #include "ptp.h" |
| |
| /** |
| * The size of the buffer (in characters) used for creating string copies. |
| */ |
| #define STRING_BUFFER_LENGTH 1024 |
| |
| /** |
| * Gets the length (in characters, not bytes) of a unicode |
| * UCS-2 string, eg a string which physically is 0x00 0x41 0x00 0x00 |
| * will return a value of 1. |
| * |
| * @param unicstr a UCS-2 Unicode string |
| * @return the length of the string, in number of characters. If you |
| * want to know the length in bytes, multiply this by two and |
| * add two (for zero terminator). |
| */ |
| int ucs2_strlen(uint16_t const * const unicstr) |
| { |
| int length; |
| |
| /* Unicode strings are terminated with 2 * 0x00 */ |
| for(length = 0; unicstr[length] != 0x0000U; length ++); |
| return length; |
| } |
| |
| /** |
| * Converts a big-endian UTF-16 2-byte string |
| * to a UTF-8 string. Actually just a UCS-2 internal conversion |
| * routine that strips off the BOM if there is one. |
| * |
| * @param device a pointer to the current device. |
| * @param unicstr the UTF-16 unicode string to convert |
| * @return a UTF-8 string. |
| */ |
| char *utf16_to_utf8(LIBMTP_mtpdevice_t *device, const uint16_t *unicstr) |
| { |
| PTPParams *params = (PTPParams *) device->params; |
| char *stringp = (char *) unicstr; |
| char loclstr[STRING_BUFFER_LENGTH*3+1]; // UTF-8 encoding is max 3 bytes per UCS2 char. |
| char *locp = loclstr; |
| size_t nconv; |
| size_t convlen = (ucs2_strlen(unicstr)+1) * sizeof(uint16_t); // UCS-2 is 16 bit wide, include terminator |
| size_t convmax = STRING_BUFFER_LENGTH*3; |
| |
| loclstr[0]='\0'; |
| /* Do the conversion. */ |
| nconv = iconv(params->cd_ucs2_to_locale, &stringp, &convlen, &locp, &convmax); |
| if (nconv == (size_t) -1) { |
| // Return partial string anyway. |
| *locp = '\0'; |
| } |
| loclstr[STRING_BUFFER_LENGTH*3] = '\0'; |
| // Strip off any BOM, it's totally useless... |
| if ((uint8_t) loclstr[0] == 0xEFU && (uint8_t) loclstr[1] == 0xBBU && (uint8_t) loclstr[2] == 0xBFU) { |
| return strdup(loclstr+3); |
| } |
| return strdup(loclstr); |
| } |