Logo Search packages:      
Sourcecode: firebird1.5 version File versions

intl.cpp

/*************  history ************
*
*       COMPONENT: JRD  MODULE: INTL.C
*       generated by Marion V2.5     2/6/90
*       from dev              db        on 4-JAN-1995
*****************************************************************
*
*       PR  2002-06-02 Added ugly c hack in
*       intl_back_compat_alloc_func_lookup.
*       When someone has time we need to change the references to
*       return (void*) function to something more C++ like
*
*       42 4711 3 11 17  tamlin   2001
*       Added silly numbers before my name, and converted it to C++.
*
*       18850   daves   4-JAN-1995
*       Fix gds__alloc usage
*
*       18837   deej    31-DEC-1994
*       fixing up HARBOR_MERGE
*
*       18821   deej    27-DEC-1994
*       HARBOR MERGE
*
*       18789   jdavid  19-DEC-1994
*       Cast some functions
*
*       17508   jdavid  15-JUL-1994
*       Bring it up to date
*
*       17500   daves   13-JUL-1994
*       Bug 6645: Different calculation of partial keys
*
*       17202   katz    24-MAY-1994
*       PC_PLATFORM requires the .dll extension
*
*       17191   katz    23-MAY-1994
*       OS/2 requires the .dll extension
*
*       17180   katz    23-MAY-1994
*       Define location of DLL on OS/2
*
*       17149   katz    20-MAY-1994
*       In JRD, gds_arg_number arguments are SLONG's not int's
*
*       16633   daves   19-APR-1994
*       Bug 6202: International licensing uses INTERNATIONAL product code
*
*       16555   katz    17-APR-1994
*       The last argument of calls to ERR_post should be 0
*
*       16521   katz    14-APR-1994
*       Borland C needs a decorated symbol to lookup
*
*       16403   daves   8-APR-1994 
*       Bug 6441: Emit an error whenever transliteration from ttype_binary attempted
*
*       16141   katz    28-MAR-1994
*       Don't declare return value from ISC_lookup_entrypoint as API_ROUTINE
*
 * The contents of this file are subject to the Interbase Public
 * License Version 1.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy
 * of the License at http://www.Inprise.com/IPL.html
 *
 * Software distributed under the License is distributed on an
 * "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * The Original Code was created by Inprise Corporation
 * and its predecessors. Portions created by Inprise Corporation are
 * Copyright (C) Inprise Corporation.
 *
 * All Rights Reserved.
 * Contributor(s): ______________________________________.
 *
 * 2002.10.29 Sean Leyne - Removed obsolete "Netware" port
 *
 * 2002.10.30 Sean Leyne - Removed support for obsolete "PC_PLATFORM" define
 *
*/


/*
 *      PROGRAM:        JRD Intl
 *      MODULE:         intl.c
 *      DESCRIPTION:    International text support routines
 *
 * copyright (c) 1992, 1993 by Borland International
 */

#include "firebird.h"
#include <string.h>
#include "../jrd/ib_stdio.h"
#include "../jrd/jrd.h"
#include "../jrd/req.h"
#include "../jrd/val.h"
#include "gen/codes.h"
#include "../jrd/intl.h"
#include "../jrd/intl_classes.h"
#include "../jrd/ods.h"
#include "../jrd/btr.h"
#include "../intl/charsets.h"
#include "../intl/country_codes.h"
#include "../jrd/gdsassert.h"
#include "../jrd/license.h"
#ifdef INTL_BUILTIN
#include "../intl/ld_proto.h"
#endif
#include "../jrd/all_proto.h"
#include "../jrd/cvt_proto.h"
#include "../jrd/err_proto.h"
#include "../jrd/evl_proto.h"
#include "../jrd/flu_proto.h"
#include "../jrd/fun_proto.h"
#include "../jrd/gds_proto.h"
#include "../jrd/iberr_proto.h"
#include "../jrd/intl_proto.h"
#include "../jrd/isc_proto.h"
#include "../jrd/thd_proto.h"

#include "../jrd/plugin_manager.h"

#ifdef DEV_BUILD

#define isprintable(x)  ((((unsigned char)(x)) & 0x7F) >= ' ')

#ifdef DEBUG_INTL
#define INTL_TRACE(args)     gds__log args
#else
#define INTL_TRACE(args)
#endif

#else
#define INTL_TRACE(args)
#endif
/* 11 Sept 2002, Nickolay Samofatov. It is used only in asserts,
   move it out DEV_BUILD section and let optimizer optimize it out */
#define IS_TEXT(x)      (((x)->dsc_dtype == dtype_text)   ||\
                   ((x)->dsc_dtype == dtype_varying)||\
                   ((x)->dsc_dtype == dtype_cstring))

#define TTYPE_TO_CHARSET(tt)    ((SSHORT)((tt) & 0x00FF))
#define TTYPE_TO_COLLATION(tt)  ((SSHORT)((tt) >> 8))


typedef unsigned char FILECHAR;

// extern declarations for the allocator functions for builtin charsets
extern CharSetAllocFunc INTL_charset_alloc_func(short);
extern TextTypeAllocFunc INTL_texttype_alloc_func(short);
extern CsConvertAllocFunc INTL_csconvert_alloc_func(short, short);

static BOOLEAN all_spaces(TDBB, CHARSET_ID, BYTE *, USHORT, USHORT);
#ifdef NOT_USED_OR_REPLACED
#ifdef DEV_BUILD
static void dump_hex(UCHAR *, USHORT);
static void dump_latin(UCHAR *, USHORT);
#endif
#endif
//static void finish_texttype_init(TextType*, FPTR_VOID, ISC_STATUS *);
//static SSHORT internal_str_to_upper(TextType*, USHORT, UCHAR *, USHORT,
//                                                    UCHAR *);
//static USHORT internal_string_to_key(TextType*, USHORT, UCHAR *, USHORT,
//                                                     UCHAR *, USHORT);
//static USHORT mb_to_wc(CsConvert*, UCS2_CHAR *, USHORT, MBCHAR *, USHORT, SSHORT *,
//                               USHORT *);
#ifdef NOT_USED_OR_REPLACED
static USHORT nc_to_wc(CsConvert*, UCS2_CHAR *, USHORT, UCHAR *, USHORT, SSHORT *,
                                 USHORT *);
#endif
static void pad_spaces(TDBB, CHARSET_ID, BYTE *, USHORT);
//static USHORT wc_to_mb(CsConvert*, MBCHAR *, USHORT, UCS2_CHAR *, USHORT, SSHORT *,
//                               USHORT *);
//static USHORT wc_to_nc(CsConvert*, NCHAR *, USHORT, UCS2_CHAR *, USHORT, SSHORT *,
//                               USHORT *);
//static USHORT wc_to_wc(CsConvert*, UCS2_CHAR *, USHORT, UCS2_CHAR *, USHORT, SSHORT *,
//                               USHORT *);
                                 
static CharSetContainer *internal_charset_container_lookup(TDBB, SSHORT, ISC_STATUS *);
static void* search_out_alloc_func(const char *, CHARSET_ID, CHARSET_ID);
static void* intl_back_compat_alloc_func_lookup(USHORT, CHARSET_ID, CHARSET_ID);
static void* intl_back_compat_obj_init_lookup(USHORT, SSHORT, SSHORT);


/* Name of module that implements text-type (n) */

#ifdef VMS
/* Note: MUST be only the file name.  The VMS lib$find_shared_image
 *       call insists on file name only, not any "path" components.
 */
#define INTL_MODULE1    "FBINTL"
#define INTL_MODULE2    "FBINTL2"
#endif

#ifdef WIN_NT
/* prefixed with $INTERBASE */
#define INTL_MODULE1 "fbintl.dll"
#define INTL_MODULE2 "fbintl2.dll"
#endif

#ifndef INTL_MODULE1
/* prefixed with $INTERBASE */
#define INTL_MODULE1 "fbintl"
#define INTL_MODULE2 "fbintl2"
#endif

#define INTL_LOOKUP_ENTRY1      "LD_lookup"
#define INTL_LOOKUP_ENTRY2      "LD2_lookup"
#define INTL_USER_ENTRY         "USER_TEXTTYPE_%03d"


// Classes and structures used internally to this file and intl implementation
class CharSetContainer
{
public:
      CharSetContainer(MemoryPool &p, CharSet *cs = 0) :
            charset_converters(p),
            charset_collations(p),
            impossible_conversions(p),
            cs(cs)
      {}
      
      CharSet *getCharSet() { return cs; }
      
      void setCollation(TextType *cs, unsigned short id)
      {
            if (id >= charset_collations.size())
                  charset_collations.resize(id + 10);
            charset_collations[id] = cs;
      }
      
      TextType *collation(unsigned short id)
      {
            if (id >= charset_collations.size())
                  return NULL;
            return charset_collations[id];
      }
      
      bool findConverter(CHARSET_ID id, CsConvert **cvt)
      {
            *cvt = NULL;
            for(Firebird::vector<CsConvert*>::iterator itr1 = charset_converters.begin();
                        itr1 != charset_converters.end(); ++itr1)
                  if ((*itr1)->getToCS() == id)
                  {
                        *cvt = *itr1;
                        return true;
                  }

            for(Firebird::vector<CHARSET_ID>::iterator itr2 = impossible_conversions.begin();
                        itr2 != impossible_conversions.end(); ++itr2)
                  if (*itr2 == id)
                        return true;
            return false;
      }
      
      void addConverter(CsConvert *conv)
      {
            charset_converters.push_back(conv);
      }
      
      void addNullConverter(CHARSET_ID nullId)
      {
            impossible_conversions.push_back(nullId);
      }
      
private:
      Firebird::vector<CsConvert*>  charset_converters;
      Firebird::vector<TextType*>   charset_collations;
      Firebird::vector<CHARSET_ID>  impossible_conversions;
      CharSet *cs;
};

CHARSET_ID DLL_EXPORT INTL_charset(TDBB tdbb, USHORT ttype, FPTR_VOID err)
{
/**************************************
 *
 *      I N T L _ c h a r s e t
 *
 **************************************
 *
 * Functional description
 *      Return the character set ID for a piece of text.
 *
 **************************************/

      switch (ttype)
      {
      case ttype_none:
            return (CS_NONE);
      case ttype_ascii:
            return (CS_ASCII);
      case ttype_unicode_fss:
            return (CS_UNICODE_FSS);
      case ttype_binary:
            return (CS_BINARY);
      case ttype_dynamic:
            SET_TDBB(tdbb);
            return (tdbb->tdbb_attachment->att_charset);
      default:
            return (TTYPE_TO_CHARSET(ttype));
      }
}


int DLL_EXPORT INTL_compare(
                                          TDBB tdbb,
                                          DSC * pText1, DSC * pText2, FPTR_VOID err)
{
/**************************************
 *
 *      I N T L _ c o m p a r e
 *
 **************************************
 *
 * Functional description
 *      Compare two pieces of international text.
 *
 **************************************/
      UCHAR *p1, *p2;
      USHORT length1, length2;
      UCHAR buffer[MAX_KEY];
      SSHORT compare_type;
      TextType* obj;
      USHORT t1, t2;

      SET_TDBB(tdbb);

      assert(pText1 != NULL);
      assert(pText2 != NULL);
      assert(IS_TEXT(pText1) && IS_TEXT(pText2));
      assert(INTL_data_or_binary(pText1) || INTL_data_or_binary(pText2));
      assert(err);

/* normal compare routine from CVT_compare */
/* trailing spaces in strings are ignored for comparision */

      length1 = CVT_get_string_ptr(pText1, &t1, &p1, NULL, 0, err);
      length2 = CVT_get_string_ptr(pText2, &t2, &p2, NULL, 0, err);

/* YYY - by SQL II compare_type must be explicit in the
   SQL statement if there is any doubt */

      compare_type = MAX(t1, t2);   /* YYY */

      if (t1 != t2) {
            CHARSET_ID cs1, cs2;
            cs1 = INTL_charset(tdbb, t1, err);
            cs2 = INTL_charset(tdbb, t2, err);
            if (cs1 != cs2) {
                  if (compare_type != t2) {
                        /* convert pText2 to pText1's type, if possible */
                        /* YYY - should failure to convert really return 
                           an error here?  
                           Support joining a 437 & Latin1 Column, and we
                           pick the compare_type as 437, still only want the
                           equal values....
                           But then, what about < operations, which make no
                           sense if the string cannot be expressed...
                         */

                        length2 = INTL_convert_bytes(tdbb, cs1,
                                                                   buffer, sizeof(buffer),
                                                                   cs2, p2, length2, err);
                        p2 = buffer;
                  }
                  else {
                        /* convert pText1 to pText2's type, if possible */

                        length1 = INTL_convert_bytes(tdbb, cs2,
                                                                   buffer, sizeof(buffer),
                                                                   cs1, p1, length1, err);
                        p1 = buffer;
                  }
            }
      }

      obj = INTL_texttype_lookup(tdbb, compare_type, err, NULL);

      return obj->compare(length1, p1, length2, p2);
}


USHORT DLL_EXPORT INTL_convert_bytes(
                                                       TDBB tdbb,
                                                       CHARSET_ID dest_type,
                                                       BYTE * dest_ptr,
                                                       USHORT dest_len,
CHARSET_ID src_type, BYTE * src_ptr, USHORT src_len, FPTR_VOID err)
{
/**************************************
 *
 *      I N T L _ c o n v e r t _ b y t e s
 *
 **************************************
 *
 * Functional description
 *      Given a string of bytes in one character set, convert it to another 
 *      character set.
 *
 *      If (dest_ptr) is NULL, return the count of bytes needed to convert
 *      the string.  This does not guarantee the string can be converted,
 *      the purpose of this is to allocate a large enough buffer.
 *
 * RETURNS:
 *      Length of resulting string, in bytes.
 *      calls (err) if conversion error occurs.
 *
 **************************************/
      UCHAR *start_dest_ptr;
      USHORT len;
      USHORT len2;
      CsConvert* cs_obj;
      CharSet* from_cs, *to_cs;
      SSHORT err_code = 0;
      USHORT err_position;
      BYTE *tmp_buffer;

      SET_TDBB(tdbb);


      assert(src_ptr != NULL);
      assert(src_type != dest_type);
      assert(err != NULL);

      start_dest_ptr = dest_ptr;

      if ((dest_type == CS_BINARY) ||
            (dest_type == CS_NONE) ||
            (src_type == CS_NONE)) {

            /* See if we just need a length estimate */
            if (dest_ptr == NULL)
                  return (src_len);

            len = MIN(dest_len, src_len);
            if (len)
                  do
                        *dest_ptr++ = *src_ptr++;
                  while (--len);

            /* See if only space characters are remaining */
            len = src_len - MIN(dest_len, src_len);
            if (!len || all_spaces(tdbb, src_type, src_ptr, len, 0))
                  return (dest_ptr - start_dest_ptr);
            else
                  reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, 0);
      }
      else if (src_len == 0)
            return (0);
      else if (src_type == CS_BINARY)
            reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
                                                                              gds_arg_gds,
                                                                              gds_transliteration_failed,
                                                                              0);
      else
            /* character sets are known to be different */
      {
            /* Do we know an object from cs1 to cs2? */

            cs_obj = INTL_convert_lookup(tdbb, dest_type, src_type);
            if (cs_obj != NULL) {
                  len = cs_obj->convert(dest_ptr, dest_len, src_ptr,
                                                      src_len, &err_code, &err_position);
                  if (!err_code || ((err_code == CS_TRUNCATION_ERROR)
                                            && all_spaces(tdbb, src_type, src_ptr, src_len,
                                                                  err_position))) return (len);
                  else if (err_code == CS_TRUNCATION_ERROR)
                        reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
                                                                                          0);
                  else
                        reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
                                                                                          gds_arg_gds,
                                                                                          gds_transliteration_failed,
                                                                                          0);

            }

            /* Find a CS1 to UNICODE object */

            from_cs = INTL_charset_lookup(tdbb, src_type, NULL);
            if (from_cs == NULL)
                  reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
                                                                                    gds_arg_gds,
                                                                                    gds_text_subtype,
                                                                                    gds_arg_number,
                                                                                    (SLONG) src_type, 0);

            /* 
               ** allocate a temporary buffer that is large enough.
             */
            tmp_buffer = (BYTE *) FB_NEW(*getDefaultMemoryPool()) char[(SLONG) src_len * sizeof(UCS2_CHAR)];

            cs_obj = from_cs->getConvToUnicode();
            assert(cs_obj != NULL);
            len = cs_obj->convert(tmp_buffer, src_len * 2, src_ptr,
                                                src_len, &err_code, &err_position);
            if (err_code && !((err_code == CS_TRUNCATION_ERROR)
                                      && all_spaces(tdbb, src_type, src_ptr, src_len,
                                                            err_position))) {
                  delete [] tmp_buffer;
                  if (err_code == CS_TRUNCATION_ERROR)
                        reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
                                                                                          0);
                  else
                        reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
                                                                                          gds_arg_gds,
                                                                                          gds_transliteration_failed,
                                                                                          0);
            }

            /* Find a UNICODE to CS2 object */

            to_cs = INTL_charset_lookup(tdbb, dest_type, NULL);
            if (to_cs == NULL) {
                  delete [] tmp_buffer;
                  reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
                                                                                    gds_arg_gds,
                                                                                    gds_text_subtype,
                                                                                    gds_arg_number,
                                                                                    (SLONG) dest_type, 0);
            }
            cs_obj = to_cs->getConvFromUnicode();
            assert(cs_obj != NULL);
            len2 = cs_obj->convert(dest_ptr, dest_len, tmp_buffer,
                                          len, &err_code, &err_position);

            if (err_code &&
                  !((err_code == CS_TRUNCATION_ERROR) &&
                    all_spaces(tdbb, CS_UNICODE_UCS2, tmp_buffer, len, err_position))) {
                  delete [] tmp_buffer;
                  if (err_code == CS_TRUNCATION_ERROR)
                        reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
                                                                                          0);
                  else
                        reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
                                                                                          gds_arg_gds,
                                                                                          gds_transliteration_failed,
                                                                                          0);
            }

            delete [] tmp_buffer;
            return (len2);
      }
      return (0);                         /* to remove compiler errors.  This should never be executed */
}


CsConvert* DLL_EXPORT INTL_convert_lookup(TDBB tdbb,
                                                             CHARSET_ID to_cs, CHARSET_ID from_cs)
{
/**************************************
 *
 *      I N T L _ c o n v e r t _ l o o k u p
 *
 **************************************
 *
 * Functional description
 *
 **************************************/

      CharSetContainer *charset;
      CsConvert* converter;
      DBB dbb;

      SET_TDBB(tdbb);
      dbb = tdbb->tdbb_database;
      CHECK_DBB(dbb);

      if (from_cs == CS_dynamic)
            from_cs = tdbb->tdbb_attachment->att_charset;

      if (to_cs == CS_dynamic)
            to_cs = tdbb->tdbb_attachment->att_charset;

/* Should from_cs == to_cs? be handled better? YYY */

      assert(from_cs != CS_dynamic);
      assert(to_cs != CS_dynamic);

      charset = internal_charset_container_lookup(tdbb, from_cs, NULL);
      if (charset == NULL)
            return (NULL);

      if (charset->findConverter(to_cs, &converter))
            return converter;
            
      if (to_cs == CS_UNICODE_UCS2) {
            converter = charset->getCharSet()->getConvToUnicode();
      }
      else if (from_cs == CS_UNICODE_UCS2) {
            CharSet* charset2;
            charset2 = INTL_charset_lookup(tdbb, to_cs, NULL);
            if (charset2 == NULL)
                  return (NULL);
            converter = charset2->getConvFromUnicode();
      }
      else {
            CsConvertAllocFunc allocFunc;
            
            allocFunc = INTL_csconvert_alloc_func(from_cs, to_cs);
            if (!allocFunc)
                  allocFunc = (CsConvertAllocFunc)
                              search_out_alloc_func("FB_CsConvert_lookup", from_cs, to_cs);
            if (!allocFunc)
                  allocFunc = (CsConvertAllocFunc)
//                      intl_back_compat_alloc_func_lookup(type_csconvert, from_cs, to_cs);
                        intl_back_compat_alloc_func_lookup(type_csconvert, to_cs, from_cs);
            if (!allocFunc)
            {
                  charset->addNullConverter(to_cs);
                  return NULL;
            }
                  
            converter = (*allocFunc)(*dbb->dbb_permanent, from_cs, to_cs);
            if (!converter)
            {
                  charset->addNullConverter(to_cs);
                  return NULL;
            }
      }

      charset->addConverter(converter);

      assert(converter->getFromCS() == from_cs);
      assert(converter->getToCS() == to_cs);

      return (converter);
}


int DLL_EXPORT INTL_convert_string(DSC * to, DSC * from, FPTR_VOID err)
{
/**************************************
 *
 *      I N T L _ c o n v e r t _ s t r i n g
 *
 **************************************
 *
 * Functional description
 *      Convert a string from one type to another
 *
 * RETURNS:
 *      0 if no error in conversion
 *      non-zero otherwise.
 *
 **************************************/
      UCHAR *p, *q;
      UCHAR *from_ptr;
      CHARSET_ID to_cs, from_cs;
      USHORT from_type;
      TDBB tdbb;
      USHORT from_len, from_fill;
      USHORT to_size, to_len, to_fill;

/* Note: This function is called from outside the engine as
   well as inside - we likely can't get rid of GET_THREAD_DATA here */
      tdbb = GET_THREAD_DATA;
      if (tdbb == NULL)             /* are we in the Engine? */
            return (1);                   /* no, then can't access intl gah */

      assert(to != NULL);
      assert(from != NULL);
      assert(IS_TEXT(to) && IS_TEXT(from));

      from_cs = INTL_charset(tdbb, INTL_TTYPE(from), err);
      to_cs = INTL_charset(tdbb, INTL_TTYPE(to), err);

      p = to->dsc_address;

/* Must convert dtype(cstring,text,vary) and ttype(ascii,binary,..intl..) */

      from_len = CVT_get_string_ptr(from, &from_type, &from_ptr, NULL, 0, err);

      to_size = to_len = TEXT_LEN(to);

      q = from_ptr;
      switch (to->dsc_dtype) {
      case dtype_text:
            if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE)) {

                  to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
                                                            from_cs, from_ptr, from_len, err);
                  to_fill = to_size - to_len;
                  from_fill = 0;          /* Convert_bytes handles source truncation */
                  p += to_len;
            }
            else {
                  /* binary string can always be converted TO by byte-copy */

                  to_len = MIN(from_len, to_size);
                  from_fill = from_len - to_len;
                  to_fill = to_size - to_len;
                  if (to_len)
                        do
                              *p++ = *q++;
                        while (--to_len);
            }

            if (to_fill > 0)
                  pad_spaces(tdbb, to_cs, p, to_fill);
            break;

      case dtype_cstring:
            if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE)) {
                  to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
                                                            from_cs, from_ptr, from_len, err);
                  to->dsc_address[to_len] = 0;
                  from_fill = 0;          /* Convert_bytes handles source truncation */
            }
            else {
                  /* binary string can always be converted TO by byte-copy */

                  to_len = MIN(from_len, to_size);
                  from_fill = from_len - to_len;
                  if (to_len)
                        do
                              *p++ = *q++;
                        while (--to_len);
                  *p = 0;
            }
            break;

      case dtype_varying:
            if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE)) {

                  to_len =
                        INTL_convert_bytes(tdbb, to_cs,
                                                   reinterpret_cast<UCHAR*>(((VARY *) to->dsc_address)->vary_string),
                                                   to_size, from_cs, from_ptr, from_len, err);
                  ((VARY *) to->dsc_address)->vary_length = to_len;
                  from_fill = 0;          /* Convert_bytes handles source truncation */
            }
            else {
                  /* binary string can always be converted TO by byte-copy */
                  to_len = MIN(from_len, to_size);
                  from_fill = from_len - to_len;
                  ((VARY *) p)->vary_length = to_len;
                  p = reinterpret_cast<UCHAR*>(((VARY *) p)->vary_string);
                  if (to_len)
                        do
                              *p++ = *q++;
                        while (--to_len);
            }
            break;
      }

      if (from_fill)
            /* Make sure remaining characters on From string are spaces */
            if (!all_spaces(tdbb, from_cs, q, from_fill, 0))
                  reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, 0);

      return 0;
}


#ifdef DEV_BUILD
int DLL_EXPORT INTL_data(DSC * pText)
{
/**************************************
 *
 *      I N T L _ d a t a
 *
 **************************************
 *
 * Functional description
 *      Given an input text descriptor, 
 *      return TRUE if the data pointed to respresents
 *      international text (subject to user defined or non-binary
 *      collation or comparison).
 *
 **************************************/

      assert(pText != NULL);

      if (!IS_TEXT(pText))
            return FALSE;

      if (!INTERNAL_TTYPE(pText))
            return TRUE;

      return FALSE;
}
#endif

#ifdef DEV_BUILD
int DLL_EXPORT INTL_data_or_binary(DSC * pText)
{
/**************************************
 *
 *      I N T L _ d a t a _ o r _ b i n a r y
 *
 **************************************
 *
 * Functional description
 *
 **************************************/

      return (INTL_data(pText) || (pText->dsc_ttype == ttype_binary));
}
#else
// 11 Sent 2002, Nickolay Samofatov
// Used only in asserts, but let optimizer wipe it out
int DLL_EXPORT INTL_data_or_binary(DSC * pText)
{
  return TRUE;
}
#endif


int DLL_EXPORT INTL_defined_type(TDBB tdbb, ISC_STATUS * status, SSHORT t_type)
{
/**************************************
 *
 *      I N T L _ d e f i n e d _ t y p e
 *
 **************************************
 *
 * Functional description
 *      Is (t_type) a known text type?
 * Return:
 *      FALSE   type is not defined.
 *      TRUE    type is defined
 *      status  set to gds_status codes to describe any error.
 *
 * Note:
 *      Due to cleanup that must happen in DFW, this routine
 *      must return, and not call ERR directly.
 *
 **************************************/
      TextType* obj;

      SET_TDBB(tdbb);

      if (status)
            status[0] = gds_arg_end;
      obj = INTL_texttype_lookup(tdbb, t_type, NULL, status);
      if (obj == NULL)
            return FALSE;
      return TRUE;
}


UCS2_CHAR DLL_EXPORT INTL_getch(TDBB tdbb,
                                          TextType* * obj,
                                          SSHORT t_type, UCHAR ** ptr, USHORT * count)
{
/**************************************
 *
 *      I N T L _ g e t c h
 *
 **************************************
 *
 * Functional description
 *      Get next character from a buffer.
 *
 **************************************/
      SSHORT used;
      UCS2_CHAR wc;

      SET_TDBB(tdbb);

      assert(obj);
      assert(ptr);

      if (*obj == NULL) {
            *obj = INTL_texttype_lookup(tdbb, t_type, (FPTR_VOID) ERR_post, NULL);
            assert(*obj);
      }
      used = (*obj)->mbtowc(&wc, *ptr, *count);
      if (used == -1)
            return 0;
      *ptr += used;
      *count -= used;
      return wc;
}


void DLL_EXPORT INTL_init(TDBB tdbb)
{
/**************************************
 *
 *      I N T L _ i n i t
 *
 **************************************
 *
 * Functional description
 *
 **************************************/
      /*DBB dbb;
      VEC vector;

      SET_TDBB(tdbb);
      dbb = tdbb->tdbb_database;
      CHECK_DBB(dbb);

      if (!(vector = dbb->dbb_text_objects)) {
            vector = dbb->dbb_text_objects = vec::newVector(*dbb->dbb_permanent, 25);
      }
      if (!(vector = dbb->dbb_charsets)) {
            vector = dbb->dbb_charsets = vec::newVector(*dbb->dbb_permanent, 25);
      }*/
}


USHORT DLL_EXPORT INTL_key_length(TDBB tdbb, USHORT idxType, USHORT iLength)
{
/**************************************
 *
 *      I N T L _ k e y _ l e n g t h
 *
 **************************************
 *
 * Functional description
 *      Given an index type, and a maximum length (iLength)
 *      return the length of the byte string key descriptor to
 *      use when collating text of this type.
 *
 **************************************/
      USHORT key_length;
      TextType* obj;
      SSHORT ttype;

      SET_TDBB(tdbb);


      assert(idxType >= idx_first_intl_string);

      ttype = INTL_INDEX_TO_TEXT(idxType);

      if (ttype >= 0 && ttype <= ttype_last_internal)
            key_length = iLength;
      else {
            obj = INTL_texttype_lookup(tdbb, ttype, (FPTR_VOID) ERR_post, NULL);
            key_length = obj->key_length(iLength);
      }

/* Validity checks on the computed key_length */

      if (key_length > MAX_KEY)
            key_length = MAX_KEY;

      if (key_length < iLength)
            key_length = iLength;

      return (key_length);
}

static CharSetContainer *internal_charset_container_lookup(TDBB tdbb, SSHORT parm1, ISC_STATUS * status)
{
/**************************************
 *
 *      I N T L _ c h a r s e t _ l o o k u p
 *
 **************************************
 *
 * Functional description
 *
 *      Lookup a character set descriptor.
 *
 *      First, search the appropriate vector that hangs
 *      off the dbb.  If not found, then call the lower
 *      level lookup routine to allocate it, or return
 *          null if we don't know about the charset.
 *
 * Returns:
 *      *charset        - if no errors;
 *      <never>         - if error & err non NULL
 *      NULL            - if error & err NULL
 *
 **************************************/
      DBB dbb;
      CharSetContainer *cs = 0;
      USHORT id;

      SET_TDBB(tdbb);
      dbb = tdbb->tdbb_database;

      id = TTYPE_TO_CHARSET(parm1);
      if (id == CS_dynamic)
            id = tdbb->tdbb_attachment->att_charset;

      if (id >= dbb->dbb_charsets.size())
            dbb->dbb_charsets.resize(id + 10);
      else
            cs = dbb->dbb_charsets[id];

      // allocate a new character set object if we couldn't find one.
      if (!cs)
      {
            CharSet *newCs = NULL;
            CharSetAllocFunc allocFunc;
            
            allocFunc = INTL_charset_alloc_func(id);
            if (!allocFunc)
                  allocFunc = (CharSetAllocFunc)search_out_alloc_func("FB_CharSet_lookup",id,0);
            if (!allocFunc)
                  allocFunc = (CharSetAllocFunc)
                              intl_back_compat_alloc_func_lookup(type_charset,id,0);
            if (!allocFunc)
                  return NULL;
                  
            newCs = (*allocFunc)(*dbb->dbb_permanent, id, 0);
            if (!newCs)
                  return NULL;
            
            cs = FB_NEW(*dbb->dbb_permanent) CharSetContainer(*dbb->dbb_permanent, newCs);
            if (!cs)
            {
                  delete newCs;
                  return NULL;
            }
            
            dbb->dbb_charsets[id] = cs;
      }

      assert(cs != NULL);
      return cs;
}

CharSet *DLL_EXPORT INTL_charset_lookup(TDBB tdbb, SSHORT parm1, ISC_STATUS * status)
{
/**************************************
 *
 *      I N T L _ c h a r s e t _ l o o k u p
 *
 **************************************
 *
 * Functional description
 *
 *      Lookup a character set descriptor.
 *
 *      First, search the appropriate vector that hangs
 *      off the dbb.  If not found, then call the lower
 *      level lookup routine to allocate it, or return
 *          null if we don't know about the charset.
 *
 * Returns:
 *      *charset        - if no errors;
 *      <never>         - if error & err non NULL
 *      NULL            - if error & err NULL
 *
 **************************************/
      CharSetContainer *cs;

      cs = internal_charset_container_lookup(tdbb, parm1, status);
      if (!cs)
            return NULL;
      return cs->getCharSet();
}


TextType *DLL_EXPORT INTL_texttype_lookup(
                                                 TDBB tdbb,
                                                 SSHORT parm1, FPTR_VOID err, ISC_STATUS * status)
{
/**************************************
 *
 *      I N T L _ t e x t t y p e _ l o o k u p
 *
 **************************************
 *
 * Functional description
 *
 *      Lookup either a character set descriptor or
 *      texttype descriptor object.
 *
 *      First, search the appropriate vector that hangs
 *      off the dbb.  If not found, then call the lower
 *      level lookup routine to find it in the libraries.
 *
 * Returns:
 *      *object         - if no errors;
 *      <never>         - if error & err non NULL
 *      NULL            - if error & err NULL
 *
 **************************************/
      DBB dbb;
      TextType *cs_object;
      CharSetContainer *csc;
      USHORT id;

      SET_TDBB(tdbb);
      dbb = tdbb->tdbb_database;

      if (parm1 == ttype_dynamic)
            parm1 = MAP_CHARSET_TO_TTYPE(tdbb->tdbb_attachment->att_charset);

      csc = internal_charset_container_lookup(tdbb, parm1, status);
      if (!csc)
            return NULL;
      id = TTYPE_TO_COLLATION(parm1);

      cs_object = csc->collation(id);

      // allocate a new TextType object if needed
      if (!cs_object)
      {
            TextTypeAllocFunc allocFunc;
            
            allocFunc = INTL_texttype_alloc_func(parm1);
            if (!allocFunc)
                  allocFunc = (TextTypeAllocFunc)search_out_alloc_func("FB_texttype_lookup",parm1,0);
            if (!allocFunc)
                  allocFunc = (TextTypeAllocFunc)
                        intl_back_compat_alloc_func_lookup(type_texttype,parm1,0);
            if (!allocFunc)
                  return NULL;
                  
            cs_object = (*allocFunc)(*dbb->dbb_permanent,parm1,0);
            if (!cs_object)
                  return NULL;
            
            csc->setCollation(cs_object, id);
      }

      assert(cs_object != NULL);
      return (cs_object);
}

void DLL_EXPORT INTL_pad_spaces(TDBB tdbb, DSC * type, UCHAR * string, USHORT length)
{
/**************************************
 *
 *      I N T L _ p a d _ s p a c e s
 *
 **************************************
 *
 * Functional description
 *      Pad a buffer with spaces, using the character
 *      set's defined space character.
 *
 **************************************/
      USHORT charset;

      SET_TDBB(tdbb);

      assert(type != NULL);
      assert(IS_TEXT(type));
      assert(string != NULL);

      charset = INTL_charset(tdbb, type->dsc_ttype, NULL);
      pad_spaces(tdbb, charset, string, length);
}


USHORT DLL_EXPORT INTL_string_to_key(
                                                       TDBB tdbb,
                                                       USHORT idxType,
                                                       DSC * pString,
                                                       DSC * pByte, USHORT partial)
{
/**************************************
 *
 *      I N T L _ s t r i n g _ t o _ k e y
 *
 **************************************
 *
 * Functional description
 *      Given an input string, convert it to a byte string
 *      that will collate naturally (byte order).
 *
 *      Return the length of the resulting byte string.
 *
 **************************************/
      USHORT outlen;
      UCHAR buffer[MAX_KEY];
      UCHAR pad_char;
      TextType* obj;
      SSHORT ttype;

      SET_TDBB(tdbb);


      assert(idxType >= idx_first_intl_string || idxType == idx_string
               || idxType == idx_byte_array || idxType == idx_metadata);
      assert(pString != NULL);
      assert(pByte != NULL);
      assert(pString->dsc_address != NULL);
      assert(pByte->dsc_address != NULL);
      assert(pByte->dsc_dtype == dtype_text);

      switch (idxType) {
      case idx_string:
            pad_char = ' ';
            ttype = ttype_none;
            break;
      case idx_byte_array:
            pad_char = 0;
            ttype = ttype_binary;
            break;
      case idx_metadata:
            pad_char = ' ';
            ttype = ttype_metadata;
            break;
      default:
            pad_char = 0;
            ttype = INTL_INDEX_TO_TEXT(idxType);
            break;
      }

/* Make a string into the proper type of text */

      const char* src;
      USHORT len =
            CVT_make_string(pString, ttype, &src,
                                    reinterpret_cast<vary*>(buffer), sizeof(buffer),
                                    (FPTR_VOID) ERR_post);

      char* dest = reinterpret_cast<char*>(pByte->dsc_address);
      switch (ttype) {
      case ttype_metadata:
      case ttype_binary:
      case ttype_ascii:
      case ttype_none:
            while (len--)
                  *dest++ = *src++;
            /* strip off ending pad characters */
            while (dest > (const char*)pByte->dsc_address)
                  if (*(dest - 1) == pad_char)
                        dest--;
                  else
                        break;
            outlen = (dest - (const char*)pByte->dsc_address);
            break;
      default:
            obj = INTL_texttype_lookup(tdbb, ttype, (FPTR_VOID) ERR_post, NULL);
            outlen = obj->string_to_key(len,
                                                      reinterpret_cast<unsigned char*>(const_cast<char*>(src)),
                                                      pByte->dsc_length,
                                                      reinterpret_cast<unsigned char*>(dest),
                                                      partial);
            break;
      }

      return (outlen);
}


int DLL_EXPORT INTL_str_to_upper(TDBB tdbb, DSC * pString)
{
/**************************************
 *
 *      I N T L _ s t r _ t o _ u p p e r
 *
 **************************************
 *
 * Functional description
 *      Given an input string, convert it to uppercase 
 *
 **************************************/
      USHORT len;
      UCHAR *src, *dest;
      UCHAR buffer[MAX_KEY];
      USHORT ttype;
      TextType* obj;

      SET_TDBB(tdbb);

      assert(pString != NULL);
      assert(pString->dsc_address != NULL);

      len =
            CVT_get_string_ptr(pString, &ttype, &src,
                                       reinterpret_cast < vary * >(buffer),
                                       sizeof(buffer), (FPTR_VOID) ERR_post);
      switch (ttype) {
      case ttype_binary:
            /* cannot uppercase binary strings */
            break;

      case ttype_none:
      case ttype_ascii:
      case ttype_unicode_fss:
            dest = src;
            while (len--) {
                  *dest++ = UPPER7(*src);
                  src++;
            }
            break;

      default:
            obj = INTL_texttype_lookup(tdbb, ttype, (FPTR_VOID) ERR_post, NULL);
            obj->str_to_upper(len, src, len, src);
            break;
      }
/* 
 * Added to remove compiler errors. Callers are not checking
 * the return code from this function 4/5/95. 
*/
      return (0);
}


UCHAR DLL_EXPORT INTL_upper(TDBB tdbb, USHORT ttype, UCHAR ch)
{
/**************************************
 *
 *      I N T L _ u p p e r
 *
 **************************************
 *
 * Functional description
 *      Given an input character, convert it to uppercase 
 *
 **************************************/
      TextType* obj;

      SET_TDBB(tdbb);


      switch (ttype) {
      case ttype_binary:
            /* cannot uppercase binary strings */
            return (ch);

      case ttype_none:
      case ttype_ascii:
      case ttype_unicode_fss:
            return (UPPER7(ch));

      default:
            obj = INTL_texttype_lookup(tdbb, ttype, (FPTR_VOID) ERR_post, NULL);
            return obj->to_upper(ch);
      }

}


static BOOLEAN all_spaces(
                                      TDBB tdbb,
                                      CHARSET_ID charset,
                                      BYTE * ptr, USHORT len, USHORT offset)
{
/**************************************
 *
 *      a l l _ s p a c e s
 *
 **************************************
 *
 * Functional description
 *      determine if the string at ptr[offset] ... ptr[len] is entirely
 *      spaces, as per the space definition of (charset).
 *      The binary representation of a Space is character-set dependent.
 *      (0x20 for Ascii, 0x0020 for Unicode, 0x20 for SJIS, but must watch for 
 *      0x??20, which is NOT a space.
 **************************************/
      CharSet* obj;
      BYTE *p;
      BYTE *end;
      const unsigned char *space, *end_space;

      SET_TDBB(tdbb);

      assert(ptr != NULL);

      obj = INTL_charset_lookup(tdbb, charset, NULL);

      assert(obj != NULL);

/*
 * We are assuming offset points to the first byte which was not
 * consumed in a conversion.  And that offset is pointing
 * to a character boundary
 */

/* Single-octet character sets are optimized here */

      if (obj->getSpaceLength() == 1) {
            p = &ptr[offset];
            end = &ptr[len];
            while (p < end) {
                  if (*p++ != *obj->getSpace())
                        return (FALSE);
            }
            return (TRUE);
      }
      else {
            p = &ptr[offset];
            end = &ptr[len];
            space = obj->getSpace();
            end_space = &space[obj->getSpaceLength()];
            while (p < end) {
                  space = obj->getSpace();
                  while (p < end && space < end_space) {
                        if (*p++ != *space++)
                              return (FALSE);
                  }
            }
            return (TRUE);
      }
}
#ifdef NOT_USED_OR_REPLACED
static USHORT internal_keylength(TextType* obj, USHORT iLength)
{
/**************************************
 *
 *      i n t e r n a l _ k e y l e n g t h
 *
 **************************************
 *
 * Functional description
 *
 **************************************/

      return (iLength);
}

static USHORT nc_to_wc(CsConvert* obj, UCS2_CHAR * pWide, USHORT nWide, /* byte count */
                                 UCHAR * pNarrow, USHORT nNarrow, /* byte count */
                                 SSHORT * err_code, USHORT * err_position)
{
/**************************************
 *
 *      n c _ t o _ w c
 *
 **************************************
 *
 * Functional description
 *   Copies narrow chars buffer into wide chars buffer for charset NONE
 *
 **************************************/
      UCS2_CHAR *pStart;
      UCHAR *pNarrowStart;

      assert(obj != NULL);
      assert((pNarrow != NULL) || (pWide == NULL));
      assert(err_code != NULL);
      assert(err_position != NULL);

      *err_code = 0;
      if (pWide == NULL)
            return (sizeof(UCS2_CHAR) * nNarrow);     /* all cases */
      pStart = pWide;
      pNarrowStart = pNarrow;
      while (nWide-- > 1 && nNarrow) {
            /* YYY - Byte order issues here */
            *pWide++ = (UCS2_CHAR) * pNarrow++;
            nWide--;
            nNarrow--;
      }
      if (!*err_code && nNarrow) {
            *err_code = CS_TRUNCATION_ERROR;
      }
      *err_position = (pNarrow - pNarrowStart) * sizeof(*pNarrow);

      return ((pWide - pStart) * sizeof(*pWide));
}
#endif

static void pad_spaces(TDBB tdbb, CHARSET_ID charset, BYTE * ptr, USHORT len)
{                                               /* byte count */
/**************************************
 *
 *      p a d  _ s p a c e s
 *
 **************************************
 *
 * Functional description
 *      Pad a buffer with the character set defined space character.
 *      
 **************************************/
      CharSet* obj;
      BYTE *end;
      const unsigned char *space, *end_space;

      SET_TDBB(tdbb);

      assert(ptr != NULL);

      obj = INTL_charset_lookup(tdbb, charset, NULL);

      assert(obj != NULL);

/* Single-octet character sets are optimized here */
      if (obj->getSpaceLength() == 1) {
            end = &ptr[len];
            while (ptr < end)
                  *ptr++ = *obj->getSpace();
      }
      else {
            end = &ptr[len];
            space = obj->getSpace();
            end_space = &space[obj->getSpaceLength()];
            while (ptr < end) {
                  space = obj->getSpace();
                  while (ptr < end && space < end_space) {
                        *ptr++ = *space++;
                  }
                  /* This assert is checking that we didn't have a buffer-end
                   * in the middle of a space character
                   */
                  assert(!(ptr == end) || (space == end_space));
            }
      }
}

#ifdef NOT_USED_OR_REPLACED
#ifdef DEV_BUILD

/*
 *      Utility routines designed to be called from the debugger to
 *      print buffers, pointers, etc. which may contain text that
 *      the debugger doesn't consider visible.
 */
static void dump_hex(UCHAR * p, USHORT len)
{
/**************************************
 *
 *      d u m p _ h e x
 *
 **************************************
 *
 * Functional description
 *
 *************************************/

      while (len--)
            ib_printf("%02X ", *p++);
      ib_printf("\n");
}


static void dump_latin(UCHAR * p, USHORT len)
{
/**************************************
 *
 *      d u m p _ l a t i n
 *
 **************************************
 *
 * Functional description
 *
 *************************************/

      while (len--)
            if (isprintable(*p))
                  ib_printf("%c", *p++);
            else
                  ib_printf("\0x%02X", *p++);
      ib_printf("\n");
}
#endif
#endif

unsigned short TextTypeNC::to_wc(UCS2_CHAR *pWideUC,
                                                      unsigned short nWide,
                                                      unsigned char *pNarrow,
                                                      unsigned short nNarrow,
                                                      short *err_code,
                                                      unsigned short *err_position)
/**************************************
 *
 *      TextTypeNC::to_wc
 *
 **************************************
 *
 * Functional description
 *
 **************************************/
{
      UCS2_CHAR *pStart, *pWide = pWideUC;
      UCHAR *pNarrowStart;

      assert((pNarrow != NULL) || (pWide == NULL));
      assert(err_code != NULL);
      assert(err_position != NULL);

      *err_code = 0;
      if (pWide == NULL)
            return (sizeof(UCS2_CHAR) * nNarrow);     /* all cases */
      pStart = pWide;
      pNarrowStart = pNarrow;
      while (nWide-- > 1 && nNarrow) {
            /* YYY - Byte order issues here */
            *pWide++ = (UCS2_CHAR) * pNarrow++;
            nWide--;
            nNarrow--;
      }
      if (!*err_code && nNarrow) {
            *err_code = CS_TRUNCATION_ERROR;
      }
      *err_position = (pNarrow - pNarrowStart) * sizeof(*pNarrow);

      return ((pWide - pStart) * sizeof(*pWide));
}

unsigned short TextTypeNC::contains(TDBB a, unsigned char *b,
                                                      unsigned short c,
                                                      unsigned char *d,
                                                      unsigned short e)
{
            return EVL_nc_contains(a,this,b,c,d,e);
}

unsigned short TextTypeNC::like(TDBB a, unsigned char *b,
                                                short c,
                                                unsigned char *d,
                                                short e,
                                                short f)
{
      return EVL_nc_like(a,this,b,c,d,e,f);
}

unsigned short TextTypeNC::matches(TDBB a, unsigned char *b, short c,
                                                   unsigned char *d, short e)
{
      return EVL_nc_matches(a,this,b,c,d,e);
}

unsigned short TextTypeNC::sleuth_check(TDBB a, unsigned short b,
                                                            unsigned char *c,
                                                            unsigned short d,
                                                            unsigned char *e,
                                                            unsigned short f)
{
      return EVL_nc_sleuth_check(a,this,b,c,d,e,f);
}

unsigned short TextTypeNC::sleuth_merge(TDBB a, unsigned char *b,
                                                            unsigned short c,
                                                            unsigned char *d,
                                                            unsigned short e,
                                                            unsigned char *f,
                                                            unsigned short g)
{
      return EVL_nc_sleuth_merge(a,this,b,c,d,e,f,g);
}

unsigned short TextTypeNC::mbtowc(UCS2_CHAR *wc, unsigned char *ptr, unsigned short count)
/**************************************
 *
 *      i n t e r n a l _ n c _ m b t o w c 
 *
 **************************************
 *
 * Functional description
 *      Get the next character from the multibyte
 *      input stream.
 *      Narrow character version.
 *  Returns:
 *      Count of bytes consumed from the input stream.
 *
 **************************************/
{
      assert(ptr);

      if (count >= 1) {
            if (wc)
                  *wc = *ptr;
            return 1;
      }
      if (wc)
            *wc = 0;
      return (unsigned short)-1;                /* No more characters */
}

unsigned short TextTypeMB::contains(TDBB a, unsigned char *b,
                                                      unsigned short c,
                                                      unsigned char *d,
                                                      unsigned short e)
{
      return EVL_mb_contains(a,this,b,c,d,e);
}

unsigned short TextTypeMB::like(TDBB a, unsigned char *b,
                                                short c,
                                                unsigned char *d,
                                                short e,
                                                short f)
{
      return EVL_mb_like(a,this,b,c,d,e,f);
}

unsigned short TextTypeMB::matches(TDBB a, unsigned char *b, short c,
                                                   unsigned char *d, short e)
{
      return EVL_mb_matches(a,this,b,c,d,e);
}

unsigned short TextTypeMB::sleuth_check(TDBB a, unsigned short b,
                                                            unsigned char *c,
                                                            unsigned short d,
                                                            unsigned char *e,
                                                            unsigned short f)
{
      return EVL_mb_sleuth_check(a,this,b,c,d,e,f);
}

unsigned short TextTypeMB::sleuth_merge(TDBB a, unsigned char *b,
                                                            unsigned short c,
                                                            unsigned char *d,
                                                            unsigned short e,
                                                            unsigned char *f,
                                                            unsigned short g)
{
      return EVL_mb_sleuth_merge(a,this,b,c,d,e,f,g);
}

unsigned short TextTypeMB::mbtowc(UCS2_CHAR *wc, unsigned char *ptr, unsigned short count)
{
/**************************************
 *
 *      TextTypeMB::mbtowc 
 *
 **************************************
 *
 * Functional description
 *      Get the next character from the multibyte
 *      input stream.
 *      Multibyte version character version.
 *  Returns:
 *      Count of bytes consumed from the input stream.
 *
 **************************************/

      assert(ptr);

      if (count >= 2) {
            if (wc)
                  *wc = *(UCS2_CHAR *) ptr;
            return 2;
      }
      if (wc)
            *wc = 0;
      return (unsigned short)-1;                /* No more characters */
}

unsigned short TextTypeWC::to_wc(UCS2_CHAR *pDestUC,
                                                      unsigned short nDest,
                                                      unsigned char *pSrcUC,
                                                      unsigned short nSrc,
                                                      short *err_code,
                                                      unsigned short *err_position)
{
/**************************************
 *
 *      TextTypeWC::to_wc 
 *
 **************************************
 *
 * Functional description
 *
 *************************************/
      UCS2_CHAR *pStart, *pDest = pDestUC;
      UCS2_CHAR *pStart_src, *pSrc = (UCS2_CHAR*)pSrcUC;
      
      assert((pSrc != NULL) || (pDest == NULL));
      assert(err_code != NULL);
      assert(err_position != NULL);

      *err_code = 0;
      if (pDest == NULL)                  /* length estimate needed? */
            return (nSrc);

      pStart = pDest;
      pStart_src = pSrc;
      while (nDest > 1 && nSrc > 1) {
            *pDest++ = *pSrc++;
            nDest -= 2;
            nSrc -= 2;
      }
      if (!*err_code && nSrc) {
            *err_code = CS_TRUNCATION_ERROR;
      }
      *err_position = (pSrc - pStart_src) * sizeof(*pSrc);

      return ((pDest - pStart) * sizeof(*pDest));
}

unsigned short TextTypeWC::contains(TDBB a, unsigned char *b,
                                                      unsigned short c,
                                                      unsigned char *d,
                                                      unsigned short e)
{
      return EVL_wc_contains(a,this,(UCS2_CHAR*)b,c,(UCS2_CHAR*)d,e);
}

unsigned short TextTypeWC::like(TDBB a, unsigned char *b,
                                                short c,
                                                unsigned char *d,
                                                short e,
                                                short f)
{
      return EVL_wc_like(a,this,(UCS2_CHAR*)b,c,(UCS2_CHAR*)d,e,f);
}

unsigned short TextTypeWC::matches(TDBB a, unsigned char *b, short c,
                                                   unsigned char *d, short e)
{
      return EVL_wc_matches(a,this,(UCS2_CHAR*)b,c,(UCS2_CHAR*)d,e);
}

unsigned short TextTypeWC::sleuth_check(TDBB a, unsigned short b,
                                                            unsigned char *c,
                                                            unsigned short d,
                                                            unsigned char *e,
                                                            unsigned short f)
{
      return EVL_wc_sleuth_check(a,this,b,(UCS2_CHAR*)c,d,(UCS2_CHAR*)e,f);
}

unsigned short TextTypeWC::sleuth_merge(TDBB a, unsigned char *b,
                                                            unsigned short c,
                                                            unsigned char *d,
                                                            unsigned short e,
                                                            unsigned char *f,
                                                            unsigned short g)
{
      return EVL_wc_sleuth_merge(a,this,(UCS2_CHAR*)b,c,(UCS2_CHAR*)d,e,(UCS2_CHAR*)f,g);
}

unsigned short TextTypeWC::mbtowc(UCS2_CHAR *wc, unsigned char *ptr, unsigned short count)
{
/**************************************
 *
 *      TextTypeWC::mbtowc 
 *
 **************************************
 *
 * Functional description
 *      Get the next character from the multibyte
 *      input stream.
 *      Wide character version.
 *  Returns:
 *      Count of bytes consumed from the input stream.
 *
 **************************************/

      assert(ptr);

      if (count >= 2) {
            if (wc)
                  *wc = *(UCS2_CHAR *) ptr;
            return 2;
      }
      if (wc)
            *wc = 0;
      return (unsigned short)-1;                /* No more characters */
}


//===============================================================================
//===============================================================================
//===============================================================================
// Code to handle loading international charset plugins
// in the new c++ OO format.

static const char *INTL_PLUGIN_DIR = "intl";
static PluginManager intlPlugins;
static bool loaded = false;

static void* search_out_alloc_func(const char *sym, CHARSET_ID p1, CHARSET_ID p2)
{
      typedef void* (*lookupFuncType)(CHARSET_ID,CHARSET_ID);
      
      void* result = 0;
      lookupFuncType lookupFunc;
      Firebird::string entryPoint(sym);
      
      if (!loaded)
      {
            intlPlugins.addSearchPath(INTL_PLUGIN_DIR);
            intlPlugins.addIgnoreModule(INTL_MODULE1);
            intlPlugins.addIgnoreModule(INTL_MODULE2);
            intlPlugins.loadAllPlugins();
            loaded = true;
      }
      
      for(PluginManager::iterator itr = intlPlugins.begin();
                  result == 0 && itr != intlPlugins.end(); ++itr)
      {
            lookupFunc = (lookupFuncType)(*itr).lookupSymbol(entryPoint);
            if (!lookupFunc)
                  continue;
            result = (*lookupFunc)(p1,p2);
      }
      return result;
}

//===============================================================================
//===============================================================================
//===============================================================================
// This code handles backwards compatibility with the old internation
// character set plugin format.

// We need all the structure definitions from the old interface
#define INTL_ENGINE_INTERNAL
#include "../jrd/intlobj.h"

// storage for the loadable modules
static PluginManager intlBCPlugins;
static bool bcLoaded = false;

class CsConvert_BC : public CsConvert
{
public:
      CsConvert_BC(struct csconvert *csv, bool deleteMemory) :
            CsConvert(
                  csv->csconvert_id,
                  (const char*)csv->csconvert_name,
                  csv->csconvert_from,
                  csv->csconvert_to),
            cnvt(csv),
            deleteOnDestruct(deleteMemory)
      {}
      virtual ~CsConvert_BC() { if (deleteOnDestruct) delete cnvt; }

      unsigned short convert(unsigned char *a,
                                          unsigned short b,
                                          unsigned char *c,
                                          unsigned short d,
                                          short *e,
                                          unsigned short *f)
      {
            assert(cnvt != NULL);
            return (*(reinterpret_cast<USHORT (*)(struct csconvert*, UCHAR*,USHORT,
                              UCHAR*,USHORT,short*,USHORT*)>(cnvt->csconvert_convert)))
                                    (cnvt,a,b,c,d,e,f);
      }

private:
      struct csconvert *cnvt;
      bool deleteOnDestruct;
};

class CharSet_BC : public CharSet
{
public:
      CharSet_BC(MemoryPool &p, struct charset *csStruct) :
            CharSet(
                  csStruct->charset_id,
                  (const char*)csStruct->charset_name,
                  csStruct->charset_min_bytes_per_char,
                  csStruct->charset_max_bytes_per_char,
                  csStruct->charset_space_length,
                  (char*)csStruct->charset_space_character),
            cs(csStruct)
      {
            charset_to_unicode = FB_NEW(p) CsConvert_BC(&cs->charset_to_unicode, false);
            charset_from_unicode = FB_NEW(p) CsConvert_BC(&cs->charset_from_unicode, false);
      }
      
      ~CharSet_BC() { delete cs; }
private:
      struct charset *cs;
};

template <class T>
class TextType_BC : public T
{
public:
      TextType_BC(struct texttype *textt) :
                  T(
                        textt->texttype_type,
                        (char*)textt->texttype_name,
                        textt->texttype_character_set,
                        textt->texttype_country,
                        textt->texttype_bytes_per_char),
                  tt(textt)
            {}

      unsigned short key_length(unsigned short a)
      {
            assert(tt);
            assert(tt->texttype_fn_key_length);
            return (*(reinterpret_cast<USHORT (*)(TEXTTYPE,USHORT)>
                              (tt->texttype_fn_key_length)))(tt,a);
      }
      
      unsigned short string_to_key(unsigned short a,
                                                      unsigned char *b,
                                                      unsigned short c,
                                                      unsigned char *d,
                                                      unsigned short e)
      {
            assert(tt);
            assert(tt->texttype_fn_string_to_key);
            return (*(reinterpret_cast
                  <USHORT(*)(TEXTTYPE,USHORT,UCHAR*,USHORT,UCHAR*,USHORT)>
                        (tt->texttype_fn_string_to_key)))
                              (tt,a,b,c,d,e);
      }
      
      short compare(unsigned short a,
                                      unsigned char *b,
                                      unsigned short c,
                                      unsigned char *d)
      {
            assert(tt);
            assert(tt->texttype_fn_compare);
            return (*(reinterpret_cast
                  <short (*)(TEXTTYPE,USHORT,UCHAR*,USHORT,UCHAR*)>
                        (tt->texttype_fn_compare)))(tt,a,b,c,d);
      }
      
      unsigned short to_upper(unsigned short a)
      {
            assert(tt);
            assert(tt->texttype_fn_to_upper);
            return (*(reinterpret_cast
                  <short (*)(TEXTTYPE,USHORT)>
                        (tt->texttype_fn_to_upper)))(tt,a);
      }
      
      unsigned short to_lower(unsigned short a)
      {
            assert(tt);
            assert(tt->texttype_fn_to_lower);
            return (*(reinterpret_cast
                  <USHORT (*)(TEXTTYPE,USHORT)>
                        (tt->texttype_fn_to_lower)))(tt,a);
      }
      
      short str_to_upper(unsigned short a,
                                    unsigned char *b,
                                    unsigned short c,
                                    unsigned char *d)
      {
            assert(tt);
            assert(tt->texttype_fn_str_to_upper);
            return (*(reinterpret_cast
                              <short (*)(TEXTTYPE,USHORT,UCHAR*,USHORT,UCHAR*)>
                                    (tt->texttype_fn_str_to_upper)))
                                          (tt,a,b,c,d);
      }
      
      unsigned short to_wc(UCS2_CHAR *a,
                                     unsigned short b,
                                     unsigned char *c,
                                     unsigned short d,
                                     short *e,
                                     unsigned short *f)
      {
            assert(tt);
            assert(tt->texttype_fn_to_wc);
            return (*(reinterpret_cast
                              <USHORT (*)(TEXTTYPE,UCS2_CHAR*,USHORT,UCHAR*,USHORT,short*,USHORT*)>
                                    (tt->texttype_fn_to_wc)))
                                          (tt,a,b,c,d,e,f);
      }
                                                      
      unsigned short mbtowc(UCS2_CHAR *a, unsigned char *b, unsigned short c)
      {
            assert(tt);
            if (!tt->texttype_fn_mbtowc)
                  return T::mbtowc(a,b,c);
            return (*(reinterpret_cast<
                              USHORT (*)(TEXTTYPE, UCS2_CHAR*, UCHAR*, USHORT)>
                                    (tt->texttype_fn_mbtowc)))(tt,a,b,c);
      }

      unsigned short contains(TDBB a, unsigned char *b,
                                                      unsigned short c,
                                                      unsigned char *d,
                                                      unsigned short e)
      {
            assert(tt);
            if (!tt->texttype_fn_contains)
                  return T::contains(a,b,c,d,e);
            return (*(reinterpret_cast<
                              USHORT (*)(TDBB,TEXTTYPE,UCHAR*,USHORT,UCHAR*,USHORT)>
                                    (tt->texttype_fn_contains)))
                                          (a,tt,b,c,d,e);
      }
      
      unsigned short like(TDBB tdbb, unsigned char *a,
                                            short b,
                                            unsigned char *c,
                                            short d,
                                            short e)
      {
            assert(tt);
            if (!tt->texttype_fn_like)
                  return T::like(tdbb,a,b,c,d,e);
            else
                  return (*(reinterpret_cast<
                                    USHORT(*)(TDBB,TEXTTYPE,UCHAR*,short,UCHAR*,short,short)>
                                          (tt->texttype_fn_like)))(tdbb,tt,a,b,c,d,e);
      }
      
      unsigned short matches(TDBB tdbb, unsigned char *a, short b,
                                                   unsigned char *c, short d)
      {
            assert(tt);
            if (!tt->texttype_fn_matches)
                  return T::matches(tdbb,a,b,c,d);
            return (*(reinterpret_cast<
                              USHORT (*)(TDBB,TEXTTYPE,UCHAR*,short,UCHAR*,short)>
                                    (tt->texttype_fn_matches)))
                                          (tdbb,tt,a,b,c,d);
      }

      unsigned short sleuth_check(TDBB tdbb, unsigned short a,
                                                            unsigned char *b,
                                                            unsigned short c,
                                                            unsigned char *d,
                                                            unsigned short e)
      {
            assert(tt);
            if (!tt->texttype_fn_sleuth_check)
                  return T::sleuth_check(tdbb,a,b,c,d,e);
            return (*(reinterpret_cast<
                              USHORT(*)(TDBB,TEXTTYPE,USHORT,UCHAR*,USHORT,UCHAR*,USHORT)>
                                    (tt->texttype_fn_sleuth_check)))
                                          (tdbb,tt,a,b,c,d,e);
      }
      
      unsigned short sleuth_merge(TDBB tdbb, unsigned char *a,
                                                            unsigned short b,
                                                            unsigned char *c,
                                                            unsigned short d,
                                                            unsigned char *e,
                                                            unsigned short f)
      {
            assert(tt);
            if (!tt->texttype_fn_sleuth_merge)
                  return T::sleuth_merge(tdbb,a,b,c,d,e,f);
            return (*(reinterpret_cast<
                              USHORT(*)(TDBB,TEXTTYPE,UCHAR*,USHORT,UCHAR*,USHORT,UCHAR*,USHORT)>
                                    (tt->texttype_fn_sleuth_merge)))
                                          (tdbb,tt,a,b,c,d,e,f);
      }

private:
      struct texttype *tt;
};

static void* intl_back_compat_obj_init_lookup(
                                    USHORT type,
                                    SSHORT parm1,
                                    SSHORT parm2)
{
/**************************************
 *
 *      intl_back_compat_alloc_func_lookup
 *
 **************************************
 *
 * Functional description
 *      Find the allocator function for the requested international
 *          character set using the obsolete c/IB/FB 6.0 interface.
 *      Search algorithm is:
 *              Look in intllib
 *              Look in intllib2
 *              Look for a normal UDF entry
 *              Abort with an error.
 *
 * Returns:
 *      FALSE   - no errors
 *      TRUE    - error occurred, and parameter <err> was NULL;
 *      <never> - error occurred, and parameter <err> non-NULL;
 *
 *
 ***************************************/
      USHORT (*function)();

      if (!bcLoaded)
      {
            intlBCPlugins.addSearchPath(INTL_PLUGIN_DIR);
            bcLoaded = true;
      }

      PluginManager::Plugin intlMod1 = intlBCPlugins.findPlugin(INTL_MODULE1);
      PluginManager::Plugin intlMod2 = intlBCPlugins.findPlugin(INTL_MODULE2);

      USHORT(*lookup_fn) (USHORT, FPTR_SHORT *, SSHORT, SSHORT);

      INTL_TRACE(("INTL: looking for obj %d ttype %d\n", objtype, parm1));

      function = NULL;

#ifdef INTL_BUILTIN
      if (LD_lookup(type, &function, parm1, parm2) != 0)
            function = NULL;
      else
            return (void*)function;
#else
      /* Look for an InterBase supplied object to implement the text type */
      /* The flu.c uses searchpath which expects a file name not a path */
      INTL_TRACE(("INTL: trying %s %s\n", INTL_MODULE1, INTL_LOOKUP_ENTRY1));
      Firebird::string tempStr(INTL_LOOKUP_ENTRY1);
      if ( intlMod1 && (lookup_fn = (USHORT(*)(USHORT, USHORT(**)(), short, short))
            (intlMod1.lookupSymbol(tempStr))) ) {
            INTL_TRACE(("INTL: calling lookup %s %s\n", INTL_MODULE1,
                              INTL_LOOKUP_ENTRY1));
            if ((*lookup_fn) (type, &function, parm1, parm2) != 0) {
                  function = NULL;
            }
            else
            {
                  return (void*) function;
            }
      }
#endif

/* Still not found, check the set of supplimental international objects */
#ifdef INTL_BUILTIN
      if (LD2_lookup(type, &function, parm1, parm2) != 0)
            function = NULL;
      else
            return (void*)function;
#else
      /* Look for an InterBase supplied object to implement the text type */
      /* The flu.c uses searchpath which expects a file name not a path */
      INTL_TRACE(("INTL: trying %s %s\n", INTL_MODULE2, INTL_LOOKUP_ENTRY2));
      tempStr = INTL_LOOKUP_ENTRY2;
      if ( intlMod2 && (lookup_fn = (USHORT(*)(USHORT, USHORT(**)(), short, short))
            (intlMod2.lookupSymbol(tempStr))) ) {
            INTL_TRACE(("INTL: calling lookup %s %s\n", INTL_MODULE2,
                              INTL_LOOKUP_ENTRY2));
            if ((*lookup_fn) (type, &function, parm1, parm2) != 0) {
                  function = NULL;
            }
            else
            {
                  return (void*) function;
            }
      }
#endif

/* Still not found, check if there is a UDF in the database defined the right way */
      FUN function_block;
      USHORT argcount;
      char entry[48];

/* EKU: need a replacement for snprintf for systems like SINIX-Z!!! */
      switch (type) {
            case type_texttype:
#ifdef HAVE_SNPRINTF
                  snprintf(entry, sizeof(entry), INTL_USER_ENTRY, parm1);
#else
                  sprintf(entry, INTL_USER_ENTRY, parm1);
#endif
                  argcount = 2;
                  break;
            case type_charset:
#ifdef HAVE_SNPRINTF
                  snprintf(entry, sizeof(entry), "USER_CHARSET_%03d", parm1);
#else
                  sprintf(entry, "USER_CHARSET_%03d", parm1);
#endif
                  argcount = 2;
                  break;
            case type_csconvert:
#ifdef HAVE_SNPRINTF
                  snprintf(entry, sizeof(entry), "USER_TRANSLATE_%03d_%03d", parm1,
                              parm2);
#else
                  sprintf(entry, "USER_TRANSLATE_%03d_%03d", parm1,
                              parm2);
#endif
                  argcount = 3;
                  break;
            default:
                  BUGCHECK(1);
                  break;
      }
      INTL_TRACE(("INTL: trying user fn %s\n", entry));
      if ( (function_block = FUN_lookup_function((TEXT*)entry, false)) ) {
            INTL_TRACE(("INTL: found a user fn, validating\n"));
            if ((function_block->fun_count == argcount) &&
                  (function_block->fun_args == argcount) &&
                  (function_block->fun_return_arg == 0) &&
                  (function_block->fun_entrypoint != NULL) &&
                  (function_block->fun_rpt[0].fun_mechanism == FUN_value) &&
                  (function_block->fun_rpt[0].fun_desc.dsc_dtype == dtype_short)
                  && (function_block->fun_rpt[1].fun_desc.dsc_dtype ==
                        dtype_short)
                  && (function_block->fun_rpt[argcount - 1].
                        fun_desc.dsc_dtype == dtype_short)
                  && (function_block->fun_rpt[argcount].fun_mechanism ==
                        FUN_reference)
                  && (function_block->fun_rpt[argcount].fun_desc.dsc_dtype ==
                        dtype_text))
            {
                  function = (FPTR_SHORT) function_block->fun_entrypoint;
                  return (void*) function;
            }
      }
      return NULL;
}

static CharSet *BC_CharSetAllocFunc(MemoryPool &p, SSHORT cs_id, SSHORT unused)
{
      typedef USHORT (*CSInitFunc)(CHARSET, SSHORT, SSHORT);
      CSInitFunc csInitFunc;
      
      csInitFunc = (CSInitFunc) intl_back_compat_obj_init_lookup(type_charset, cs_id, unused);
      assert(csInitFunc != 0);
      CHARSET cs = FB_NEW(p) charset;
      memset(cs, 0, sizeof(charset));
      
      if (0 != (*csInitFunc)(cs, cs_id, unused))
      {
            delete cs;
            return 0;
      }
      
      CharSet *result = 0;
      try
      {
            result = FB_NEW(p) CharSet_BC(p, cs);
      }
      catch(std::exception&)
      {
            delete cs;
            throw;
      }
      
      return result;
}

static CsConvert *BC_CsConvertAllocFunc(MemoryPool &p, SSHORT from_id, SSHORT to_id)
{
      typedef USHORT (*CVTInitFunc)(CSCONVERT, SSHORT, SSHORT);
      CVTInitFunc cvtInitFunc;
      
      //cvtInitFunc = (CVTInitFunc) intl_back_compat_obj_init_lookup(type_csconvert, from_id, to_id);
      cvtInitFunc = (CVTInitFunc) intl_back_compat_obj_init_lookup(type_csconvert, to_id, from_id);
      assert(cvtInitFunc != 0);
      CSCONVERT cvt = FB_NEW(p) csconvert;
      memset(cvt, 0, sizeof(csconvert));
      
      //if (0 != (*cvtInitFunc)(cvt, from_id, to_id))
      if (0 != (*cvtInitFunc)(cvt, to_id, from_id))
      {
            delete cvt;
            return 0;
      }
      
      CsConvert *result = 0;
      try
      {
            result = FB_NEW(p) CsConvert_BC(cvt, true);
      }
      catch(std::exception&)
      {
            delete cvt;
            throw;
      }
      
      return result;
}

static TextType *BC_TextTypeAllocFunc(MemoryPool &p, SSHORT tt_id, SSHORT unused)
{
      typedef USHORT (*TTInitFunc)(TEXTTYPE, SSHORT, SSHORT);
      TTInitFunc ttInitFunc;
      
      ttInitFunc = (TTInitFunc) intl_back_compat_obj_init_lookup(type_texttype, tt_id, unused);
      assert(ttInitFunc != 0);
      TEXTTYPE tt = FB_NEW(p) texttype;
      memset(tt, 0, sizeof(texttype));
      
      if (0 != (*ttInitFunc)(tt, tt_id, unused))
      {
            delete tt;
            return 0;
      }
      
      TextType *result = 0;
      try
      {
            if (tt->texttype_bytes_per_char == 1 && tt->texttype_fn_to_wc == NULL)
                  result = FB_NEW(p) TextType_BC<TextTypeNC>(tt);
            else if (tt->texttype_bytes_per_char == 2 && tt->texttype_fn_to_wc == NULL)
                  result = FB_NEW(p) TextType_BC<TextTypeWC>(tt);
            else if (tt->texttype_fn_to_wc != NULL)
                  result = FB_NEW(p) TextType_BC<TextTypeMB>(tt);
            else
                  BUGCHECK(1);
      }
      catch(std::exception&)
      {
            delete tt;
            throw;
      }
      
      return result;
}

static void* intl_back_compat_alloc_func_lookup(
                                    USHORT type,
                                    CHARSET_ID parm1,
                                    CHARSET_ID parm2)
{
      if (NULL != intl_back_compat_obj_init_lookup(type,parm1,parm2))
      {
            switch(type)
            {
                  case type_charset:
                        return (void*)BC_CharSetAllocFunc;
                  case type_texttype:
                        return (void*)BC_TextTypeAllocFunc;
                  case type_csconvert:
                        return (void*)BC_CsConvertAllocFunc;
                  default:
                        BUGCHECK(1);
            }
      }
      
      return NULL;
}

Generated by  Doxygen 1.6.0   Back to index