jdk/src/share/classes/sun/io/CharToByteCp949C.java - platform/libcore - Gitiles

 /*
  * Copyright 1997-2003 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Sun designates this
  * particular file as subject to the "Classpath" exception as provided
  * by Sun in the LICENSE file that accompanied this code.
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  * CA 95054 USA or visit www.sun.com if you need additional information or
  * have any questions.
  */

 package sun.io;

 import sun.nio.cs.ext.IBM949C;

 /**
 * @author Malcolm Ayres
 */

 /*
 Cp949C is a hand-modified version of Cp949
 maps Unicode U-005C <-> 0x5C (local code page)
  */

 public class CharToByteCp949C extends CharToByteConverter
 {
     private static final char SBase = '\uAC00';
     private static final char LBase = '\u1100';
     private static final char VBase = '\u1161';
     private static final char TBase = '\u11A7';
     private static final int  VCount = 21;
     private static final int  TCount = 28;
     private static final byte G0 = 0;
     private static final byte G1 = 1;
     private static final byte G2 = 2;
     private static final byte G3 = 3;
     private byte   charState = G0;
     private char   l, v, t;

     private byte[] outputByte;

     private char highHalfZoneCode;
     private int  mask1;
     private int  mask2;
     private int  shift;
     private short[] index1;
     private String index2;
     private String index2a;

     private final static IBM949C nioCoder = new IBM949C();

     public CharToByteCp949C() {
        super();
        index1 = nioCoder.getEncoderIndex1();
        index2 = nioCoder.getEncoderIndex2();
        index2a = nioCoder.getEncoderIndex2a();
        highHalfZoneCode = 0;
        outputByte = new byte[2];
        mask1 = 0xFFF8;
        mask2 = 0x0007;
        shift = 3;
     }

     /**
       * flush out any residual data and reset the buffer state
       */
     public int flush(byte[] output, int outStart, int outEnd)
         throws MalformedInputException,
                ConversionBufferFullException
     {
        int bytesOut;

        byteOff = outStart;

        if (highHalfZoneCode != 0) {
            reset();
            badInputLength = 0;
            throw new MalformedInputException();
        }

        if (charState != G0) {
            try {
               unicodeToBuffer(composeHangul() ,output, outEnd);
            }
            catch(UnknownCharacterException e) {
               reset();
               badInputLength = 0;
               throw new MalformedInputException();
            }
            charState = G0;
        }

        bytesOut = byteOff - outStart;

        reset();
        return bytesOut;
     }

     /**
      * Resets converter to its initial state.
      */
     public void reset() {
        highHalfZoneCode = 0;
        charState = G0;
        charOff = byteOff = 0;
     }

     /**
      * Returns true if the given character can be converted to the
      * target character encoding.
      */
     public boolean canConvert(char ch) {
        int  index;
        int  theBytes;

        index = index1[((ch & mask1) >> shift)] + (ch & mask2);
        if (index < 15000)
          theBytes = (int)(index2.charAt(index));
        else
          theBytes = (int)(index2a.charAt(index-15000));

        if (theBytes != 0)
           return (true);

        // only return true if input char was unicode null - all others are
        //    undefined
        return( ch == '\u0000');
     }

     /**
      * Character conversion
      */

     public int convert(char[] input, int inOff, int inEnd,
                        byte[] output, int outOff, int outEnd)
         throws UnknownCharacterException, MalformedInputException,
                ConversionBufferFullException
     {
        char    inputChar;
        int     inputSize;

        charOff = inOff;
        byteOff = outOff;

        while (charOff < inEnd) {

           if (highHalfZoneCode == 0) {
              inputChar = input[charOff];
              inputSize = 1;
           } else {
              inputChar = highHalfZoneCode;
              inputSize = 0;
              highHalfZoneCode = 0;
           }

           switch (charState) {
           case G0:

              l = LBase;
              v = VBase;
              t = TBase;

              if ( isLeadingC(inputChar) ) {     // Leading Consonant
                 l = inputChar;
                 charState = G1;
                 break;
              }

              if ( isVowel(inputChar) ) {        // Vowel
                 v = inputChar;
                 charState = G2;
                 break;
              }

              if ( isTrailingC(inputChar) ) {    // Trailing Consonant
                 t = inputChar;
                 charState = G3;
                 break;
              }

              break;

           case G1:
              if ( isLeadingC(inputChar) ) {     // Leading Consonant
                 l = composeLL(l, inputChar);
                 break;
              }

              if ( isVowel(inputChar) ) {        // Vowel
                 v = inputChar;
                 charState = G2;
                 break;
              }

              if ( isTrailingC(inputChar) ) {    // Trailing Consonant
                 t = inputChar;
                 charState = G3;
                 break;
              }

              unicodeToBuffer(composeHangul(), output, outEnd);

              charState = G0;
              break;

           case G2:
              if ( isLeadingC(inputChar) ) {     // Leading Consonant

                 unicodeToBuffer(composeHangul(), output, outEnd);

                 l = inputChar;
                 v = VBase;
                 t = TBase;
                 charState = G1;
                 break;
              }

              if ( isVowel(inputChar) ) {        // Vowel
                 v = composeVV(l, inputChar);
                 charState = G2;
                 break;
              }

              if ( isTrailingC(inputChar) ) {    // Trailing Consonant
                 t = inputChar;
                 charState = G3;
                 break;
              }

              unicodeToBuffer(composeHangul(), output, outEnd);

              charState = G0;

              break;

           case G3:
              if ( isTrailingC(inputChar) ) {    // Trailing Consonant
                 t = composeTT(t, inputChar);
                 charState = G3;
                 break;
              }

              unicodeToBuffer(composeHangul(), output, outEnd);

              charState = G0;

              break;
           }

           if (charState != G0)
              charOff++;
           else {

              // Is this a high surrogate?
              if(inputChar >= '\ud800' && inputChar <= '\udbff') {
                 // Is this the last character of the input?
                 if (charOff + inputSize >= inEnd) {
                    highHalfZoneCode = inputChar;
                    charOff += inputSize;
                    break;
                 }

                 // Is there a low surrogate following?
                 inputChar = input[charOff + inputSize];
                 if (inputChar >= '\udc00' && inputChar <= '\udfff') {
                    // We have a valid surrogate pair.  Too bad we don't do
                    // surrogates.  Is substitution enabled?
                    if (subMode) {
                       if (subBytes.length == 1) {
                          outputByte[0] = 0x00;
                          outputByte[1] = subBytes[0];
                       } else {
                          outputByte[0] = subBytes[0];
                          outputByte[1] = subBytes[1];
                       }

                       bytesToBuffer(outputByte, output, outEnd);
                       inputSize++;
                    } else {
                       badInputLength = 2;
                       throw new UnknownCharacterException();
                    }
                 } else {
                    // We have a malformed surrogate pair
                    badInputLength = 1;
                    throw new MalformedInputException();
                 }
              }

                // Is this an unaccompanied low surrogate?
              else
                 if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
                    badInputLength = 1;
                    throw new MalformedInputException();
                 } else {
                    unicodeToBuffer(inputChar, output, outEnd);
                 }

              charOff += inputSize;

           }

        }

        return byteOff - outOff;

     }

     private char composeHangul() {
        int lIndex, vIndex, tIndex;

        lIndex = l - LBase;
        vIndex = v - VBase;
        tIndex = t - TBase;

        return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase);
     }

     private char composeLL(char l1, char l2) {
        return l2;
     }

     private char composeVV(char v1, char v2) {
        return v2;
     }

     private char composeTT(char t1, char t2) {
        return t2;
     }

     private boolean isLeadingC(char c) {
        return (c >= LBase && c <= '\u1159');
     }

     private boolean isVowel(char c) {
        return (c >= VBase && c <= '\u11a2');
     }

     private boolean isTrailingC(char c) {
        return (c >= TBase && c <= '\u11f9');
     }

     /**
      * returns the maximum number of bytes needed to convert a char
      */
     public int getMaxBytesPerChar() {
        return 2;
     }


     /**
      * Return the character set ID
      */
     public String getCharacterEncoding() {
        return "Cp949C";
     }

     /**
      * private function to add the bytes to the output buffer
      */
     private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd)
         throws ConversionBufferFullException,
                UnknownCharacterException {

        int spaceNeeded;

        // ensure sufficient space for the bytes(s)

        if (theBytes[0] == 0x00)
           spaceNeeded = 1;
        else
           spaceNeeded = 2;

        if (byteOff + spaceNeeded > outEnd)
           throw new ConversionBufferFullException();

        // move the data into the buffer

        if (spaceNeeded == 1)
           output[byteOff++] = theBytes[1];
        else {
           output[byteOff++] = theBytes[0];
           output[byteOff++] = theBytes[1];
        }

     }

     /**
      * private function to add a unicode character to the output buffer
      */
     private void unicodeToBuffer(char unicode, byte[] output, int outEnd)
         throws ConversionBufferFullException,
                UnknownCharacterException {

        int index;
        int theBytes;

        // first we convert the unicode to its byte representation

        index = index1[((unicode & mask1) >> shift)] + (unicode & mask2);
        if (index < 15000)
          theBytes = (int)(index2.charAt(index));
        else
          theBytes = (int)(index2a.charAt(index-15000));
        outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8);
        outputByte[1] = (byte)(theBytes & 0x000000ff);

        // if the unicode was not mappable - look for the substitution bytes

        if (outputByte[0] == 0x00 && outputByte[1] == 0x00
                           && unicode != '\u0000') {
           if (subMode) {
              if (subBytes.length == 1) {
                 outputByte[0] = 0x00;
                 outputByte[1] = subBytes[0];
              } else {
                 outputByte[0] = subBytes[0];
                 outputByte[1] = subBytes[1];
              }
           } else {
              badInputLength = 1;
              throw new UnknownCharacterException();
           }
        }

        // now put the bytes in the buffer

        bytesToBuffer(outputByte, output, outEnd);

     }
 }
	/*
	* Copyright 1997-2003 Sun Microsystems, Inc. All Rights Reserved.
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation. Sun designates this
	* particular file as subject to the "Classpath" exception as provided
	* by Sun in the LICENSE file that accompanied this code.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
	* CA 95054 USA or visit www.sun.com if you need additional information or
	* have any questions.
	*/

	package sun.io;

	import sun.nio.cs.ext.IBM949C;

	/**
	* @author Malcolm Ayres
	*/

	/*
	Cp949C is a hand-modified version of Cp949
	maps Unicode U-005C <-> 0x5C (local code page)
	*/

	public class CharToByteCp949C extends CharToByteConverter
	{
	private static final char SBase = '\uAC00';
	private static final char LBase = '\u1100';
	private static final char VBase = '\u1161';
	private static final char TBase = '\u11A7';
	private static final int VCount = 21;
	private static final int TCount = 28;
	private static final byte G0 = 0;
	private static final byte G1 = 1;
	private static final byte G2 = 2;
	private static final byte G3 = 3;
	private byte charState = G0;
	private char l, v, t;

	private byte[] outputByte;

	private char highHalfZoneCode;
	private int mask1;
	private int mask2;
	private int shift;
	private short[] index1;
	private String index2;
	private String index2a;

	private final static IBM949C nioCoder = new IBM949C();

	public CharToByteCp949C() {
	super();
	index1 = nioCoder.getEncoderIndex1();
	index2 = nioCoder.getEncoderIndex2();
	index2a = nioCoder.getEncoderIndex2a();
	highHalfZoneCode = 0;
	outputByte = new byte[2];
	mask1 = 0xFFF8;
	mask2 = 0x0007;
	shift = 3;
	}

	/**
	* flush out any residual data and reset the buffer state
	*/
	public int flush(byte[] output, int outStart, int outEnd)
	throws MalformedInputException,
	ConversionBufferFullException
	{
	int bytesOut;

	byteOff = outStart;

	if (highHalfZoneCode != 0) {
	reset();
	badInputLength = 0;
	throw new MalformedInputException();
	}

	if (charState != G0) {
	try {
	unicodeToBuffer(composeHangul() ,output, outEnd);
	}
	catch(UnknownCharacterException e) {
	reset();
	badInputLength = 0;
	throw new MalformedInputException();
	}
	charState = G0;
	}

	bytesOut = byteOff - outStart;

	reset();
	return bytesOut;
	}

	/**
	* Resets converter to its initial state.
	*/
	public void reset() {
	highHalfZoneCode = 0;
	charState = G0;
	charOff = byteOff = 0;
	}

	/**
	* Returns true if the given character can be converted to the
	* target character encoding.
	*/
	public boolean canConvert(char ch) {
	int index;
	int theBytes;

	index = index1[((ch & mask1) >> shift)] + (ch & mask2);
	if (index < 15000)
	theBytes = (int)(index2.charAt(index));
	else
	theBytes = (int)(index2a.charAt(index-15000));

	if (theBytes != 0)
	return (true);

	// only return true if input char was unicode null - all others are
	// undefined
	return( ch == '\u0000');
	}

	/**
	* Character conversion
	*/

	public int convert(char[] input, int inOff, int inEnd,
	byte[] output, int outOff, int outEnd)
	throws UnknownCharacterException, MalformedInputException,
	ConversionBufferFullException
	{
	char inputChar;
	int inputSize;

	charOff = inOff;
	byteOff = outOff;

	while (charOff < inEnd) {

	if (highHalfZoneCode == 0) {
	inputChar = input[charOff];
	inputSize = 1;
	} else {
	inputChar = highHalfZoneCode;
	inputSize = 0;
	highHalfZoneCode = 0;
	}

	switch (charState) {
	case G0:

	l = LBase;
	v = VBase;
	t = TBase;

	if ( isLeadingC(inputChar) ) { // Leading Consonant
	l = inputChar;
	charState = G1;
	break;
	}

	if ( isVowel(inputChar) ) { // Vowel
	v = inputChar;
	charState = G2;
	break;
	}

	if ( isTrailingC(inputChar) ) { // Trailing Consonant
	t = inputChar;
	charState = G3;
	break;
	}

	break;

	case G1:
	if ( isLeadingC(inputChar) ) { // Leading Consonant
	l = composeLL(l, inputChar);
	break;
	}

	if ( isVowel(inputChar) ) { // Vowel
	v = inputChar;
	charState = G2;
	break;
	}

	if ( isTrailingC(inputChar) ) { // Trailing Consonant
	t = inputChar;
	charState = G3;
	break;
	}

	unicodeToBuffer(composeHangul(), output, outEnd);

	charState = G0;
	break;

	case G2:
	if ( isLeadingC(inputChar) ) { // Leading Consonant

	unicodeToBuffer(composeHangul(), output, outEnd);

	l = inputChar;
	v = VBase;
	t = TBase;
	charState = G1;
	break;
	}

	if ( isVowel(inputChar) ) { // Vowel
	v = composeVV(l, inputChar);
	charState = G2;
	break;
	}

	if ( isTrailingC(inputChar) ) { // Trailing Consonant
	t = inputChar;
	charState = G3;
	break;
	}

	unicodeToBuffer(composeHangul(), output, outEnd);

	charState = G0;

	break;

	case G3:
	if ( isTrailingC(inputChar) ) { // Trailing Consonant
	t = composeTT(t, inputChar);
	charState = G3;
	break;
	}

	unicodeToBuffer(composeHangul(), output, outEnd);

	charState = G0;

	break;
	}

	if (charState != G0)
	charOff++;
	else {

	// Is this a high surrogate?
	if(inputChar >= '\ud800' && inputChar <= '\udbff') {
	// Is this the last character of the input?
	if (charOff + inputSize >= inEnd) {
	highHalfZoneCode = inputChar;
	charOff += inputSize;
	break;
	}

	// Is there a low surrogate following?
	inputChar = input[charOff + inputSize];
	if (inputChar >= '\udc00' && inputChar <= '\udfff') {
	// We have a valid surrogate pair. Too bad we don't do
	// surrogates. Is substitution enabled?
	if (subMode) {
	if (subBytes.length == 1) {
	outputByte[0] = 0x00;
	outputByte[1] = subBytes[0];
	} else {
	outputByte[0] = subBytes[0];
	outputByte[1] = subBytes[1];
	}

	bytesToBuffer(outputByte, output, outEnd);
	inputSize++;
	} else {
	badInputLength = 2;
	throw new UnknownCharacterException();
	}
	} else {
	// We have a malformed surrogate pair
	badInputLength = 1;
	throw new MalformedInputException();
	}
	}

	// Is this an unaccompanied low surrogate?
	else
	if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
	badInputLength = 1;
	throw new MalformedInputException();
	} else {
	unicodeToBuffer(inputChar, output, outEnd);
	}

	charOff += inputSize;

	}

	}

	return byteOff - outOff;

	}

	private char composeHangul() {
	int lIndex, vIndex, tIndex;

	lIndex = l - LBase;
	vIndex = v - VBase;
	tIndex = t - TBase;

	return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase);
	}

	private char composeLL(char l1, char l2) {
	return l2;
	}

	private char composeVV(char v1, char v2) {
	return v2;
	}

	private char composeTT(char t1, char t2) {
	return t2;
	}

	private boolean isLeadingC(char c) {
	return (c >= LBase && c <= '\u1159');
	}

	private boolean isVowel(char c) {
	return (c >= VBase && c <= '\u11a2');
	}

	private boolean isTrailingC(char c) {
	return (c >= TBase && c <= '\u11f9');
	}

	/**
	* returns the maximum number of bytes needed to convert a char
	*/
	public int getMaxBytesPerChar() {
	return 2;
	}


	/**
	* Return the character set ID
	*/
	public String getCharacterEncoding() {
	return "Cp949C";
	}

	/**
	* private function to add the bytes to the output buffer
	*/
	private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd)
	throws ConversionBufferFullException,
	UnknownCharacterException {

	int spaceNeeded;

	// ensure sufficient space for the bytes(s)

	if (theBytes[0] == 0x00)
	spaceNeeded = 1;
	else
	spaceNeeded = 2;

	if (byteOff + spaceNeeded > outEnd)
	throw new ConversionBufferFullException();

	// move the data into the buffer

	if (spaceNeeded == 1)
	output[byteOff++] = theBytes[1];
	else {
	output[byteOff++] = theBytes[0];
	output[byteOff++] = theBytes[1];
	}

	}

	/**
	* private function to add a unicode character to the output buffer
	*/
	private void unicodeToBuffer(char unicode, byte[] output, int outEnd)
	throws ConversionBufferFullException,
	UnknownCharacterException {

	int index;
	int theBytes;

	// first we convert the unicode to its byte representation

	index = index1[((unicode & mask1) >> shift)] + (unicode & mask2);
	if (index < 15000)
	theBytes = (int)(index2.charAt(index));
	else
	theBytes = (int)(index2a.charAt(index-15000));
	outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8);
	outputByte[1] = (byte)(theBytes & 0x000000ff);

	// if the unicode was not mappable - look for the substitution bytes

	if (outputByte[0] == 0x00 && outputByte[1] == 0x00
	&& unicode != '\u0000') {
	if (subMode) {
	if (subBytes.length == 1) {
	outputByte[0] = 0x00;
	outputByte[1] = subBytes[0];
	} else {
	outputByte[0] = subBytes[0];
	outputByte[1] = subBytes[1];
	}
	} else {
	badInputLength = 1;
	throw new UnknownCharacterException();
	}
	}

	// now put the bytes in the buffer

	bytesToBuffer(outputByte, output, outEnd);

	}
	}