blob: 92b6158576abf020abcffa5acbcfb33b8473768a [file] [log] [blame]
/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
*/
/*
*
* (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
*
* This file is a modification of the ICU file IndicReordering.h
* by Jens Herden and Javier Sola for Khmer language
*
*/
#ifndef __KHMERREORDERING_H
#define __KHMERREORDERING_H
#include "LETypes.h"
#include "OpenTypeTables.h"
class LEGlyphStorage;
// Vocabulary
// Base ->
// A consonant or an independent vowel in its full (not
// subscript) form. It is the center of the syllable, it can be
// souranded by coeng (subscript) consonants, vowels, split
// vowels, signs... but there is only one base in a syllable, it
// has to be coded as the first character of the syllable.
// split vowel ->
// vowel that has two parts placed separately (e.g. Before and
// after the consonant). Khmer language has five of them. Khmer
// split vowels either have one part before the base and one after
// the base or they have a part before the base and a part above
// the base. The first part of all Khmer split vowels is the same
// character, identical to the glyph of Khmer dependent vowel SRA
// EI
// coeng ->
// modifier used in Khmer to construct coeng (subscript)
// consonants differently than indian languages, the coeng
// modifies the consonant that follows it, not the one preceding
// it Each consonant has two forms, the base form and the
// subscript form the base form is the normal one (using the
// consonants code-point), the subscript form is displayed when
// the combination coeng + consonant is encountered.
// Consonant of type 1 ->
// A consonant which has subscript for that only occupies space
// under a base consonant
// Consonant of type 2 ->
// Its subscript form occupies space under and before the base
// (only one, RO)
// Consonant of Type 3 ->
// Its subscript form occupies space under and after the base
// (KHO, CHHO, THHO, BA, YO, SA)
// Consonant shifter ->
// Khmer has to series of consonants. The same dependent vowel has
// different sounds if it is attached to a consonant of the first
// series or a consonant of the second series Most consonants have
// an equivalent in the other series, but some of theme exist only
// in one series (for example SA). If we want to use the consonant
// SA with a vowel sound that can only be done with a vowel sound
// that corresponds to a vowel accompanying a consonant of the
// other series, then we need to use a consonant shifter: TRIISAP
// or MUSIKATOAN x17C9 y x17CA. TRIISAP changes a first series
// consonant to second series sound and MUSIKATOAN a second series
// consonant to have a first series vowel sound. Consonant
// shifter are both normally supercript marks, but, when they are
// followed by a superscript, they change shape and take the form
// of subscript dependent vowel SRA U. If they are in the same
// syllable as a coeng consonant, Unicode 3.0 says that they
// should be typed before the coeng. Unicode 4.0 breaks the
// standard and says that it should be placed after the coeng
// consonant.
// Dependent vowel ->
// In khmer dependent vowels can be placed above, below, before or
// after the base Each vowel has its own position. Only one vowel
// per syllable is allowed.
// Signs ->
// Khmer has above signs and post signs. Only one above sign
// and/or one post sign are Allowed in a syllable.
//
// This list must include all types of components that can be used
// inside a syllable
struct KhmerClassTable
{
// order is important here! This order must be the same that is
// found in each horizontal line in the statetable for Khmer (file
// KhmerReordering.cpp).
enum CharClassValues
{
CC_RESERVED = 0,
CC_CONSONANT = 1, // consonant of type 1 or independent vowel
CC_CONSONANT2 = 2, // Consonant of type 2
CC_CONSONANT3 = 3, // Consonant of type 3
CC_ZERO_WIDTH_NJ_MARK = 4, // Zero Width non joiner character (0x200C)
CC_CONSONANT_SHIFTER = 5,
CC_ROBAT = 6, // Khmer special diacritic accent
// -treated differently in state table
CC_COENG = 7, // Subscript consonant combining character
CC_DEPENDENT_VOWEL = 8,
CC_SIGN_ABOVE = 9,
CC_SIGN_AFTER = 10,
CC_ZERO_WIDTH_J_MARK = 11, // Zero width joiner character
CC_COUNT = 12 // This is the number of character classes
};
enum CharClassFlags
{
CF_CLASS_MASK = 0x0000FFFF,
CF_CONSONANT = 0x01000000, // flag to speed up comparing
CF_SPLIT_VOWEL = 0x02000000, // flag for a split vowel -> the first part
// is added in front of the syllable
CF_DOTTED_CIRCLE = 0x04000000, // add a dotted circle if a character with
// this flag is the first in a syllable
CF_COENG = 0x08000000, // flag to speed up comparing
CF_SHIFTER = 0x10000000, // flag to speed up comparing
CF_ABOVE_VOWEL = 0x20000000, // flag to speed up comparing
// position flags
CF_POS_BEFORE = 0x00080000,
CF_POS_BELOW = 0x00040000,
CF_POS_ABOVE = 0x00020000,
CF_POS_AFTER = 0x00010000,
CF_POS_MASK = 0x000f0000
};
typedef le_uint32 CharClass;
typedef le_int32 ScriptFlags;
LEUnicode firstChar; // for Khmer this will become x1780
LEUnicode lastChar; // and this x17DF
const CharClass *classTable;
CharClass getCharClass(LEUnicode ch) const;
static const KhmerClassTable *getKhmerClassTable();
};
class KhmerReordering {
public:
static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount,
le_int32 scriptCode, LEUnicode *outChars, LEGlyphStorage &glyphStorage);
static const FeatureMap *getFeatureMap(le_int32 &count);
private:
// do not instantiate
KhmerReordering();
static le_int32 findSyllable(const KhmerClassTable *classTable,
const LEUnicode *chars, le_int32 prev, le_int32 charCount);
};
#endif