jdk/src/share/native/sun/font/layout/KhmerReordering.h - platform/libcore - Gitiles

 /*
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Sun designates this
  * particular file as subject to the "Classpath" exception as provided
  * by Sun in the LICENSE file that accompanied this code.
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  * CA 95054 USA or visit www.sun.com if you need additional information or
  * have any questions.
  *
  */

 /*
  *
  * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
  *
  * This file is a modification of the ICU file IndicReordering.h
  * by Jens Herden and Javier Sola for Khmer language
  *
  */

 #ifndef __KHMERREORDERING_H
 #define __KHMERREORDERING_H

 #include "LETypes.h"
 #include "OpenTypeTables.h"

 class LEGlyphStorage;

 // Vocabulary

 // Base ->
 //     A consonant or an independent vowel in its full (not
 //     subscript) form. It is the center of the syllable, it can be
 //     souranded by coeng (subscript) consonants, vowels, split
 //     vowels, signs... but there is only one base in a syllable, it
 //     has to be coded as the first character of the syllable.
 // split vowel ->
 //     vowel that has two parts placed separately (e.g. Before and
 //     after the consonant).  Khmer language has five of them. Khmer
 //     split vowels either have one part before the base and one after
 //     the base or they have a part before the base and a part above
 //     the base.  The first part of all Khmer split vowels is the same
 //     character, identical to the glyph of Khmer dependent vowel SRA
 //     EI
 // coeng ->
 //     modifier used in Khmer to construct coeng (subscript)
 //     consonants differently than indian languages, the coeng
 //     modifies the consonant that follows it, not the one preceding
 //     it Each consonant has two forms, the base form and the
 //     subscript form the base form is the normal one (using the
 //     consonants code-point), the subscript form is displayed when
 //     the combination coeng + consonant is encountered.
 // Consonant of type 1 ->
 //     A consonant which has subscript for that only occupies space
 //     under a base consonant
 // Consonant of type 2 ->
 //     Its subscript form occupies space under and before the base
 //     (only one, RO)
 // Consonant of Type 3 ->
 //     Its subscript form occupies space under and after the base
 //     (KHO, CHHO, THHO, BA, YO, SA)
 // Consonant shifter ->
 //     Khmer has to series of consonants. The same dependent vowel has
 //     different sounds if it is attached to a consonant of the first
 //     series or a consonant of the second series Most consonants have
 //     an equivalent in the other series, but some of theme exist only
 //     in one series (for example SA). If we want to use the consonant
 //     SA with a vowel sound that can only be done with a vowel sound
 //     that corresponds to a vowel accompanying a consonant of the
 //     other series, then we need to use a consonant shifter: TRIISAP
 //     or MUSIKATOAN x17C9 y x17CA. TRIISAP changes a first series
 //     consonant to second series sound and MUSIKATOAN a second series
 //     consonant to have a first series vowel sound.  Consonant
 //     shifter are both normally supercript marks, but, when they are
 //     followed by a superscript, they change shape and take the form
 //     of subscript dependent vowel SRA U.  If they are in the same
 //     syllable as a coeng consonant, Unicode 3.0 says that they
 //     should be typed before the coeng. Unicode 4.0 breaks the
 //     standard and says that it should be placed after the coeng
 //     consonant.
 // Dependent vowel ->
 //     In khmer dependent vowels can be placed above, below, before or
 //     after the base Each vowel has its own position. Only one vowel
 //     per syllable is allowed.
 // Signs ->
 //     Khmer has above signs and post signs. Only one above sign
 //     and/or one post sign are Allowed in a syllable.
 //

 // This list must include all types of components that can be used
 // inside a syllable
 struct KhmerClassTable
 {
     // order is important here! This order must be the same that is
     // found in each horizontal line in the statetable for Khmer (file
     // KhmerReordering.cpp).
     enum CharClassValues
     {
         CC_RESERVED             =  0,
         CC_CONSONANT            =  1, // consonant of type 1 or independent vowel
         CC_CONSONANT2           =  2, // Consonant of type 2
         CC_CONSONANT3           =  3, // Consonant of type 3
         CC_ZERO_WIDTH_NJ_MARK   =  4, // Zero Width non joiner character (0x200C)
         CC_CONSONANT_SHIFTER    =  5,
         CC_ROBAT                =  6, // Khmer special diacritic accent
                                       // -treated differently in state table
         CC_COENG                =  7, // Subscript consonant combining character
         CC_DEPENDENT_VOWEL      =  8,
         CC_SIGN_ABOVE           =  9,
         CC_SIGN_AFTER           = 10,
         CC_ZERO_WIDTH_J_MARK    = 11, // Zero width joiner character
         CC_COUNT                = 12  // This is the number of character classes
     };

     enum CharClassFlags
     {
         CF_CLASS_MASK    = 0x0000FFFF,

         CF_CONSONANT     = 0x01000000,  // flag to speed up comparing
         CF_SPLIT_VOWEL   = 0x02000000,  // flag for a split vowel -> the first part
                                         // is added in front of the syllable
         CF_DOTTED_CIRCLE = 0x04000000,  // add a dotted circle if a character with
                                         // this flag is the first in a syllable
         CF_COENG         = 0x08000000,  // flag to speed up comparing
         CF_SHIFTER       = 0x10000000,  // flag to speed up comparing
         CF_ABOVE_VOWEL   = 0x20000000,  // flag to speed up comparing

         // position flags
         CF_POS_BEFORE    = 0x00080000,
         CF_POS_BELOW     = 0x00040000,
         CF_POS_ABOVE     = 0x00020000,
         CF_POS_AFTER     = 0x00010000,
         CF_POS_MASK      = 0x000f0000
     };

     typedef le_uint32 CharClass;

     typedef le_int32 ScriptFlags;

     LEUnicode firstChar;   // for Khmer this will become x1780
     LEUnicode lastChar;    //  and this x17DF
     const CharClass *classTable;

     CharClass getCharClass(LEUnicode ch) const;

     static const KhmerClassTable *getKhmerClassTable();
 };


 class KhmerReordering {
 public:
     static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount,
         le_int32 scriptCode, LEUnicode *outChars, LEGlyphStorage &glyphStorage);

     static const FeatureMap *getFeatureMap(le_int32 &count);

 private:
     // do not instantiate
     KhmerReordering();

     static le_int32 findSyllable(const KhmerClassTable *classTable,
         const LEUnicode *chars, le_int32 prev, le_int32 charCount);
 };

 #endif
	/*
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation. Sun designates this
	* particular file as subject to the "Classpath" exception as provided
	* by Sun in the LICENSE file that accompanied this code.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
	* CA 95054 USA or visit www.sun.com if you need additional information or
	* have any questions.
	*
	*/

	/*
	*
	* (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
	*
	* This file is a modification of the ICU file IndicReordering.h
	* by Jens Herden and Javier Sola for Khmer language
	*
	*/

	#ifndef __KHMERREORDERING_H
	#define __KHMERREORDERING_H

	#include "LETypes.h"
	#include "OpenTypeTables.h"

	class LEGlyphStorage;

	// Vocabulary

	// Base ->
	// A consonant or an independent vowel in its full (not
	// subscript) form. It is the center of the syllable, it can be
	// souranded by coeng (subscript) consonants, vowels, split
	// vowels, signs... but there is only one base in a syllable, it
	// has to be coded as the first character of the syllable.
	// split vowel ->
	// vowel that has two parts placed separately (e.g. Before and
	// after the consonant). Khmer language has five of them. Khmer
	// split vowels either have one part before the base and one after
	// the base or they have a part before the base and a part above
	// the base. The first part of all Khmer split vowels is the same
	// character, identical to the glyph of Khmer dependent vowel SRA
	// EI
	// coeng ->
	// modifier used in Khmer to construct coeng (subscript)
	// consonants differently than indian languages, the coeng
	// modifies the consonant that follows it, not the one preceding
	// it Each consonant has two forms, the base form and the
	// subscript form the base form is the normal one (using the
	// consonants code-point), the subscript form is displayed when
	// the combination coeng + consonant is encountered.
	// Consonant of type 1 ->
	// A consonant which has subscript for that only occupies space
	// under a base consonant
	// Consonant of type 2 ->
	// Its subscript form occupies space under and before the base
	// (only one, RO)
	// Consonant of Type 3 ->
	// Its subscript form occupies space under and after the base
	// (KHO, CHHO, THHO, BA, YO, SA)
	// Consonant shifter ->
	// Khmer has to series of consonants. The same dependent vowel has
	// different sounds if it is attached to a consonant of the first
	// series or a consonant of the second series Most consonants have
	// an equivalent in the other series, but some of theme exist only
	// in one series (for example SA). If we want to use the consonant
	// SA with a vowel sound that can only be done with a vowel sound
	// that corresponds to a vowel accompanying a consonant of the
	// other series, then we need to use a consonant shifter: TRIISAP
	// or MUSIKATOAN x17C9 y x17CA. TRIISAP changes a first series
	// consonant to second series sound and MUSIKATOAN a second series
	// consonant to have a first series vowel sound. Consonant
	// shifter are both normally supercript marks, but, when they are
	// followed by a superscript, they change shape and take the form
	// of subscript dependent vowel SRA U. If they are in the same
	// syllable as a coeng consonant, Unicode 3.0 says that they
	// should be typed before the coeng. Unicode 4.0 breaks the
	// standard and says that it should be placed after the coeng
	// consonant.
	// Dependent vowel ->
	// In khmer dependent vowels can be placed above, below, before or
	// after the base Each vowel has its own position. Only one vowel
	// per syllable is allowed.
	// Signs ->
	// Khmer has above signs and post signs. Only one above sign
	// and/or one post sign are Allowed in a syllable.
	//

	// This list must include all types of components that can be used
	// inside a syllable
	struct KhmerClassTable
	{
	// order is important here! This order must be the same that is
	// found in each horizontal line in the statetable for Khmer (file
	// KhmerReordering.cpp).
	enum CharClassValues
	{
	CC_RESERVED = 0,
	CC_CONSONANT = 1, // consonant of type 1 or independent vowel
	CC_CONSONANT2 = 2, // Consonant of type 2
	CC_CONSONANT3 = 3, // Consonant of type 3
	CC_ZERO_WIDTH_NJ_MARK = 4, // Zero Width non joiner character (0x200C)
	CC_CONSONANT_SHIFTER = 5,
	CC_ROBAT = 6, // Khmer special diacritic accent
	// -treated differently in state table
	CC_COENG = 7, // Subscript consonant combining character
	CC_DEPENDENT_VOWEL = 8,
	CC_SIGN_ABOVE = 9,
	CC_SIGN_AFTER = 10,
	CC_ZERO_WIDTH_J_MARK = 11, // Zero width joiner character
	CC_COUNT = 12 // This is the number of character classes
	};

	enum CharClassFlags
	{
	CF_CLASS_MASK = 0x0000FFFF,

	CF_CONSONANT = 0x01000000, // flag to speed up comparing
	CF_SPLIT_VOWEL = 0x02000000, // flag for a split vowel -> the first part
	// is added in front of the syllable
	CF_DOTTED_CIRCLE = 0x04000000, // add a dotted circle if a character with
	// this flag is the first in a syllable
	CF_COENG = 0x08000000, // flag to speed up comparing
	CF_SHIFTER = 0x10000000, // flag to speed up comparing
	CF_ABOVE_VOWEL = 0x20000000, // flag to speed up comparing

	// position flags
	CF_POS_BEFORE = 0x00080000,
	CF_POS_BELOW = 0x00040000,
	CF_POS_ABOVE = 0x00020000,
	CF_POS_AFTER = 0x00010000,
	CF_POS_MASK = 0x000f0000
	};

	typedef le_uint32 CharClass;

	typedef le_int32 ScriptFlags;

	LEUnicode firstChar; // for Khmer this will become x1780
	LEUnicode lastChar; // and this x17DF
	const CharClass *classTable;

	CharClass getCharClass(LEUnicode ch) const;

	static const KhmerClassTable *getKhmerClassTable();
	};


	class KhmerReordering {
	public:
	static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount,
	le_int32 scriptCode, LEUnicode *outChars, LEGlyphStorage &glyphStorage);

	static const FeatureMap *getFeatureMap(le_int32 &count);

	private:
	// do not instantiate
	KhmerReordering();

	static le_int32 findSyllable(const KhmerClassTable *classTable,
	const LEUnicode *chars, le_int32 prev, le_int32 charCount);
	};

	#endif