From tobias at fresco.org Fri Aug 1 16:24:50 2003 From: tobias at fresco.org (Tobias Hunger) Date: Fri Feb 25 22:16:26 2005 Subject: [Fresco-changes] Fresco/Babylon/include/Babylon/internal - New directory Message-ID: Update of /cvs/fresco/Fresco/Babylon/include/Babylon/internal In directory purcel:/tmp/cvs-serv23587/internal Log Message: Directory /cvs/fresco/Fresco/Babylon/include/Babylon/internal added to the repository From tobias at fresco.org Fri Aug 1 16:47:14 2003 From: tobias at fresco.org (Tobias Hunger) Date: Fri Feb 25 22:16:26 2005 Subject: [Fresco-changes] Fresco/Babylon/include/Babylon/internal Blocks.hh,NONE,1.1 Boundaries.hh,NONE,1.1 Dictionary.hh,NONE,1.1 traits.hh,NONE,1.1 utfstrings.hh,NONE,1.1 utils.hh,NONE,1.1 Message-ID: Update of /cvs/fresco/Fresco/Babylon/include/Babylon/internal In directory purcel:/tmp/cvs-serv24256/Babylon/include/Babylon/internal Added Files: Blocks.hh Boundaries.hh Dictionary.hh traits.hh utfstrings.hh utils.hh Log Message: Babylon work over: * Use iconv to convert characters to/from Babylon * Updtae to Unicode Version 4.0 * Added unit tests (some fail for now, still need to work on that) * Removed the VisualTextBuffer from the CommandKit: I still like the idea, unfortunately it won't work out in the real world:-| * Updated code to the changed Babylon interfaces whereever needed. --- NEW FILE: Blocks.hh --- /*$Id: Blocks.hh,v 1.1 2003/08/01 16:47:11 tobias Exp $ * * This source file is a part of the Fresco Project. * Copyright (C) 1999-2003 Tobias Hunger * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, * MA 02139, USA. */ #ifndef _Babylon_internal_Blocks_hh_ #define _Babylon_internal_Blocks_hh_ #include #include namespace Babylon { class Block { // This class is subclassed by plugin libraries. // No method bodies here!! // These Blocks are build for speed, not safety! // The Dictionary has to make sure that it is // asking the right block about defined properties. // It can do so by calling the appropriate is_* public: Block() { } virtual ~Block() { } virtual void clean() { } virtual bool is_undef_block() const = 0; // returns 1 if this // block is meant to handle // all undefined characters // // The first undef_block // found will be used! // Query Functions: virtual bool is_defined(const UCS4) const = 0; virtual UCS4 uppercase(const UCS4) const = 0; virtual UCS4 lowercase(const UCS4) const = 0; virtual UCS4 titlecase(const UCS4) const = 0; virtual float numeric_value(const UCS4) const = 0; virtual bool is_Numeric(const UCS4) const = 0; virtual int dec_digit_value(const UCS4) const = 0; virtual bool is_Decimal_Digit(const UCS4) const = 0; virtual int digit_value(const UCS4) const = 0; virtual bool is_Digit(const UCS4) const = 0; virtual std::string blockname(const UCS4) const = 0; virtual Gen_Cat category(const UCS4) const = 0; virtual Can_Comb_Class comb_class(const UCS4) const = 0; virtual Bidir_Props bidir_props(const UCS4) const = 0; virtual Char_Decomp decomp_type(const UCS4) const = 0; virtual UTF32_string decompose(const UCS4) const = 0; virtual bool exclude_from_composition(const UCS4 uc) const = 0; virtual UCS4 compose(const UCS4, const UCS4) = 0; virtual bool must_mirror(const UCS4) const = 0; virtual EA_Width EA_width(const UCS4) const = 0; virtual Line_Break linebreak(const UCS4) const = 0; // Properties: virtual bool is_White_Space(const UCS4) const = 0; virtual bool is_Bidi_Control(const UCS4) const = 0; virtual bool is_Join_Control(const UCS4) const = 0; virtual bool is_Dash(const UCS4) const = 0; virtual bool is_Hyphen(const UCS4) const = 0; virtual bool is_Quotation_Mark(const UCS4) const = 0; virtual bool is_Terminal_Punctuation(const UCS4) const = 0; virtual bool is_Other_Math(const UCS4) const = 0; virtual bool is_Hex_Digit(const UCS4) const = 0; virtual bool is_ASCII_Hex_Digit(const UCS4) const = 0; virtual bool is_Other_Alphabetic(const UCS4) const = 0; virtual bool is_Ideographic(const UCS4) const = 0; virtual bool is_Diacritic(const UCS4) const = 0; virtual bool is_Extender(const UCS4) const = 0; virtual bool is_Other_Uppercase(const UCS4) const = 0; virtual bool is_Other_Lowercase(const UCS4) const = 0; virtual bool is_Noncharacter_Code_Point(const UCS4) const = 0; virtual bool is_Other_Grapheme_Extend(const UCS4) const = 0; virtual bool is_Grapheme_Link(const UCS4) const = 0; virtual bool is_IDS_Binary_Operator(const UCS4) const = 0; virtual bool is_IDS_Trinary_Operator(const UCS4) const = 0; virtual bool is_Radical(const UCS4) const = 0; virtual bool is_Unified_Ideograph(const UCS4) const = 0; virtual bool is_Other_Default_Ignorable_Code_Point(const UCS4) const = 0; virtual bool is_Deprecated(const UCS4) const = 0; virtual bool is_Soft_Dotted(const UCS4) const = 0; virtual bool is_Logical_Order_Exception(const UCS4) const = 0; virtual bool is_Other_ID_Start(const UCS4) const = 0; virtual UCS4 first_letter() const = 0; virtual UCS4 last_letter() const = 0; protected: private: }; // class Block }; // namespace Babylon #endif --- NEW FILE: Boundaries.hh --- /*$Id: Boundaries.hh,v 1.1 2003/08/01 16:47:11 tobias Exp $ * * This source file is a part of the Fresco Project. * Copyright (C) 1999-2003 Tobias Hunger * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, * MA 02139, USA. */ #ifndef _Babylon_internal_Boundaries_hh_ #define _Babylon_internal_Boundaries_hh_ namespace Babylon { class Char; bool is_graphem_cluster_boundary(const Char &, const Char &); bool is_word_boundary(const Char &, const Char &); bool is_sentense_boundary(const Char &, const Char &); }; // namespace Babylon #endif --- NEW FILE: Dictionary.hh --- /*$Id: Dictionary.hh,v 1.1 2003/08/01 16:47:11 tobias Exp $ * * This source file is a part of the Fresco Project. * Copyright (C) 1999-2003 Tobias Hunger * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, * MA 02139, USA. */ #ifndef _Babylon_internal_Dictionary_hh_ #define _Babylon_internal_Dictionary_hh_ #include #include #include #include #include namespace Babylon { class Block; //. Stores character data. class Dictionary { struct Dict_Guard { ~Dict_Guard() { delete Dictionary::my_dictionary; } }; friend struct Dict_Guard; public: //. Scans a directory for modules. void update(const std::string &) throw (std::runtime_error, Block_Error); //. Finds the current dictionary. //. If no dictionary exists it will create one. static Dictionary * instance(); // Queries for the datastructures stored in the Dictionary: //. Returns the first letter of the block (aka script) //. the given character belongs to. It returns //. UC_MAX_DEFINED if the character does not belong //. to a block. UCS4 first_letter_of_block(const UCS4) const throw (); //. Returns the last letter of the block (aka script) //. the given character belongs to. It returns //. UC_MAX_DEFINED if the character does not belong //. to a block UCS4 last_letter_of_block(const UCS4) const throw (); //. Returns the first letter of the next block (aka script) //. defined block after the given character. //. It returns UC_MAX_DEFINED if there is no block after //. the character. UCS4 start_of_next_block(const UCS4) const throw(); // Query functions: bool is_defined(const UCS4 uc) const throw (Block_Error); UCS4 uppercase(const UCS4 uc) const throw (Block_Error); UCS4 lowercase(const UCS4 uc) const throw (Block_Error); UCS4 titlecase(const UCS4 uc) const throw (Block_Error); float numeric_value(const UCS4 uc) const throw (Undefined_Property, Block_Error); bool is_Numeric(const UCS4 uc) const throw (Block_Error); int dec_digit_value(const UCS4 uc) const throw (Undefined_Property, Block_Error); bool is_Decimal_Digit(const UCS4 uc) const throw (Block_Error); int digit_value(const UCS4 uc) const throw (Undefined_Property, Block_Error); bool is_Digit(const UCS4 uc) const throw (Block_Error); std::string blockname(const UCS4 uc) const throw (Block_Error); Gen_Cat category(const UCS4) const throw (Undefined_Property, Block_Error); Can_Comb_Class comb_class(const UCS4) const throw (Undefined_Property, Block_Error); Bidir_Props bidir_props(const UCS4) const throw (Undefined_Property, Block_Error); Char_Decomp decomp_type(const UCS4) const throw (Undefined_Property, Block_Error); UTF32_string decompose(const UCS4) const throw (Block_Error); UTF32_string recursive_decompose(const bool compat, const UCS4 uc) const throw (Block_Error); bool exclude_from_composition(const UCS4) const throw (Block_Error); UCS4 compose(const UCS4 starter, const UCS4 last) const throw (Block_Error); bool must_mirror(const UCS4 uc) const throw (Block_Error); EA_Width EA_width(const UCS4 uc) const throw (Block_Error); Line_Break linebreak(const UCS4 uc) const throw (Block_Error); // Properties: bool is_White_Space(const UCS4) const throw (Block_Error); bool is_Bidi_Control(const UCS4) const throw (Block_Error); bool is_Join_Control(const UCS4) const throw (Block_Error); bool is_Dash(const UCS4) const throw (Block_Error); bool is_Hyphen(const UCS4) const throw (Block_Error); bool is_Quotation_Mark(const UCS4) const throw (Block_Error); bool is_Terminal_Punctuation(const UCS4) const throw (Block_Error); bool is_Other_Math(const UCS4) const throw (Block_Error); bool is_Hex_Digit(const UCS4) const throw (Block_Error); bool is_ASCII_Hex_Digit(const UCS4) const throw (Block_Error); bool is_Other_Alphabetic(const UCS4) const throw (Block_Error); bool is_Ideographic(const UCS4) const throw (Block_Error); bool is_Diacritic(const UCS4) const throw (Block_Error); bool is_Extender(const UCS4) const throw (Block_Error); bool is_Other_Lowercase(const UCS4) const throw (Block_Error); bool is_Other_Uppercase(const UCS4) const throw (Block_Error); bool is_Noncharacter_Code_Point(const UCS4) const throw (Block_Error); bool is_Other_Grapheme_Extend(const UCS4) const throw (Block_Error); bool is_Grapheme_Link(const UCS4) const throw (Block_Error); bool is_IDS_Binary_Operator(const UCS4) const throw (Block_Error); bool is_IDS_Trinary_Operator(const UCS4) const throw (Block_Error); bool is_Radical(const UCS4) const throw (Block_Error); bool is_Unified_Ideograph(const UCS4) const throw (Block_Error); bool is_Other_Default_Ignorable_Code_Point(const UCS4) const throw (Block_Error); bool is_Deprecated(const UCS4) const throw (Block_Error); bool is_Soft_Dotted(const UCS4) const throw (Block_Error); bool is_Logical_Order_Exception(const UCS4) const throw (Block_Error); bool is_Other_ID_Start(const UCS4) const throw (Block_Error); // Derived Properties: bool is_Math(const UCS4) const throw (Block_Error); bool is_Alphabetic(const UCS4) const throw (Block_Error); bool is_Lowercase(const UCS4) const throw (Block_Error); bool is_Uppercase(const UCS4) const throw (Block_Error); bool is_ID_Start(const UCS4) const throw (Block_Error); bool is_ID_Continue(const UCS4) const throw (Block_Error); bool is_XID_Start(const UCS4) const throw (Block_Error); bool is_XID_Continue(const UCS4) const throw (Block_Error); bool is_Default_Ignorable_Code_Point(const UCS4) const throw (Block_Error); bool is_Grapheme_Extend(const UCS4) const throw (Block_Error); bool is_Grapheme_Base(const UCS4) const throw (Block_Error); bool is_FC_NFKC_Closure(const UCS4) const throw (Block_Error); bool is_Full_Composition_Exclusion(const UCS4) const throw (Block_Error); bool is_NFD_QuickCheck(const UCS4) const throw (Block_Error); bool is_NFC_QuickCheck(const UCS4) const throw (Block_Error); bool is_NFKD_QuickCheck(const UCS4) const throw (Block_Error); bool is_NFKC_QuickCheck(const UCS4) const throw (Block_Error); bool is_Expands_On_NFD(const UCS4) const throw (Block_Error); bool is_Expands_On_NFC(const UCS4) const throw (Block_Error); bool is_Expands_On_NFKD(const UCS4) const throw (Block_Error); bool is_Expands_On_NFKC(const UCS4) const throw (Block_Error); // Further Properties: bool is_Space(const UCS4) const throw (Block_Error); bool is_Punctuation(const UCS4) const throw (Block_Error); bool is_Line_Separator(const UCS4) const throw (Block_Error); bool is_Paragraph_Separator(const UCS4) const throw (Block_Error); bool is_Currency_Symbol(const UCS4) const throw (Block_Error); bool is_Bidi_Left_to_Right(const UCS4) const throw (Block_Error); bool is_Bidi_European_Digit(const UCS4) const throw (Block_Error); bool is_Bidi_Eur_Num_Separator(const UCS4) const throw (Block_Error); bool is_Bidi_Eur_Num_Terminator(const UCS4) const throw (Block_Error); bool is_Bidi_Arabic_Digit(const UCS4) const throw (Block_Error); bool is_Bidi_Common_Separator(const UCS4) const throw (Block_Error); bool is_Bidi_Block_Separator(const UCS4) const throw (Block_Error); bool is_Bidi_Segment_Separator(const UCS4) const throw (Block_Error); bool is_Bidi_Whitespace(const UCS4) const throw (Block_Error); bool is_Bidi_Non_spacing_Mark(const UCS4) const throw (Block_Error); bool is_Bidi_Boundary_Neutral(const UCS4) const throw (Block_Error); bool is_Bidi_PDF(const UCS4) const throw (Block_Error); bool is_Bidi_Embedding_or_Override(const UCS4) const throw (Block_Error); bool is_Bidi_Other_Neutral(const UCS4) const throw (Block_Error); bool is_Virama(const UCS4) const throw (Block_Error); bool is_Printable(const UCS4) const throw (Block_Error); bool is_Titlecase(const UCS4) const throw (Block_Error); bool is_Private_Use(const UCS4) const throw (Block_Error); private: struct Data { UCS4 my_start; UCS4 my_end; std::string my_file; int operator < (const Data & data) const { return my_start < data.my_start; } bool my_can_remove; Prague::Plugin * my_block; Data(UCS4 start, UCS4 end) { my_start = start; my_end = end; my_file = ""; my_can_remove = 0; my_block = 0; } }; // struct Data class DataLess { public: bool operator() (const Data & d1, const Data & d2) { return d1.my_end < d2.my_start; } }; // class DataLess Prague::Plugin * my_undef_block; Babylon::Block * find_char(const UCS4) const throw (Block_Error); Dictionary(); Dictionary(const Dictionary &) {} ~Dictionary(); void clean(); static Dictionary * my_dictionary; static Dict_Guard my_guard; static Prague::Mutex my_singleton_mutex; mutable std::vector my_data; mutable Prague::RWLock my_rw_lock; }; // class Dictionary }; // namespace Babylon #endif --- NEW FILE: traits.hh --- /*$Id: traits.hh,v 1.1 2003/08/01 16:47:11 tobias Exp $ * * This source file is a part of the Fresco Project. * Copyright (C) 1999-2003 Tobias Hunger * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, * MA 02139, USA. */ #ifndef _Babylon_internal_traits_hh #define _Babylon_internal_traits_hh #if __GNUC__ >= 3 #include namespace std { // char_traits template<> struct char_traits< ::Babylon::Char > { typedef ::Babylon::Char char_type; static void assign(char_type& c1, const char_type& c2) { c1 = c2; } // integer repressentation of characters typedef ::Babylon::UCS4 int_type; static char_type to_char_type(const int_type& i) { return ::Babylon::Char(i); } static int_type to_int_type(const char_type& c) { return(c.value()); } static bool eq_int_type(const int_type& i1, const int_type& i2) { return(i1 == i2); } // char_type comparison static bool eq(const char_type& c1, const char_type& c2) { return(c1 == c2); } static bool lt(const char_type& c1, const char_type& c2) { return(c1 < c2); } // operations on s[n] arrays static char_type* move(char_type* s, const char_type * s2, size_t n); static char_type* copy(char_type* s, const char_type * s2, size_t n); static char_type* assign(char_type* s, size_t n, char_type a); static int compare(const char_type * s, const char_type * s2, size_t n); static size_t length(const char_type * s); static const char_type * find(const char_type * s, size_t n, const char_type& c); // I/O related: typedef size_t pos_type; typedef size_t off_type; typedef mbstate_t state_type; static int_type eof() { return(::Babylon::UC_NULL); } static int_type not_eof(const int_type &); static state_type get_state(pos_type p) {} // FIXME: What should go here? }; }; // namespace std #endif // __GNUC__ >= 3 #endif --- NEW FILE: utfstrings.hh --- /*$Id: utfstrings.hh,v 1.1 2003/08/01 16:47:11 tobias Exp $ * * This source file is a part of the Fresco Project. * Copyright (C) 1999-2003 Tobias Hunger * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, * MA 02139, USA. */ #ifndef _Babylon_internal_utfstrings_hh #define _Babylon_internal_utfstrings_hh /* String class * This class stores and manipulates strings of characters defined * according to ISO10646. */ #include #include #include namespace Babylon { class String; typedef std::basic_string UTF16_string; typedef std::basic_string UTF32_string; typedef std::string UTF8_string; // This function is necessary to hide UTF32_string, // which is used internally, from the user and the API he // sees in the Char and String classes. inline void get_from_UTF32(UTF32_string in, String & out) { out.resize(in.length()); Babylon::UTF32_string::const_iterator j = in.begin(); for (Babylon::String::iterator i = out.begin(); i != out.end(); ++i, ++j) i->operator=(*j); } /* class Char_Mapping : public std::basic_string { public: Char_Mapping(size_t start, size_t length) { resize(length); for (size_t i = 0; i < length; ++i) (*this)[i] = i + start; } Char_Mapping() {} }; */ }; #endif --- NEW FILE: utils.hh --- /*$Id: utils.hh,v 1.1 2003/08/01 16:47:11 tobias Exp $ * * This source file is a part of the Fresco Project. * Copyright (C) 1999-2003 Tobias Hunger * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, * MA 02139, USA. */ #ifndef _Babylon_internal_utils_hh #define _Babylon_internal_utils_hh // --------------------------------------------------------------------------- // Helperfunctions for Bidir: // --------------------------------------------------------------------------- namespace Babylon { inline bool bidir_is_strong(const Babylon::Bidir_Props & p) { return (p & Babylon::BIDIR_MASK_STRONG); } inline bool bidir_is_neutral(const Babylon::Bidir_Props & p) { return (p & Babylon::BIDIR_MASK_NEUTRAL); } inline bool bidir_is_explicit_or_bn(const Babylon::Bidir_Props & p) { return (p & (BIDIR_MASK_EXPLICIT | BIDIR_MASK_BN)); } inline bool bidir_is_letter(const Babylon::Bidir_Props & p) { return (p & Babylon::BIDIR_MASK_LETTER); } inline bool bidir_is_number(const Babylon::Bidir_Props & p) { return (p & Babylon::BIDIR_MASK_NUMBER); } inline bool bidir_is_override(const Babylon::Bidir_Props & p) { return (p & Babylon::BIDIR_MASK_OVERRIDE); } inline bool bidir_is_ES_or_CS(const Babylon::Bidir_Props & p) { return (p & (Babylon::BIDIR_MASK_ES | Babylon::BIDIR_MASK_CS)); } inline bool bidir_is_separator(const Babylon::Bidir_Props & p) { return (p & Babylon::BIDIR_MASK_SEPARATOR); } inline Babylon::Bidir_Props bidir_change_number_to_RTL(const Babylon::Bidir_Props &p) { return (bidir_is_number(p) ? Babylon::BIDIR_R : p); } inline bool bidir_is_explicit_or_separator_or_BN_or_WS(const Babylon::Bidir_Props & p) { return (p & (Babylon::BIDIR_MASK_EXPLICIT | Babylon::BIDIR_MASK_SEPARATOR | Babylon::BIDIR_MASK_BN | Babylon::BIDIR_MASK_WS)); } inline bool bidir_is_number_separator_or_terminator(const Babylon::Bidir_Props & p) { return (p & Babylon::BIDIR_MASK_NUMSEPTER); } inline bool bidir_is_PDF(const Babylon::Bidir_Props & p) { return (p == BIDIR_PDF); } inline unsigned char bidir_to_level(const Babylon::Bidir_Props& p) { return (p & 1); } inline Babylon::Bidir_Props level_to_bidir(const unsigned char lev) { return (Babylon::Bidir_Props(Babylon::BIDIR_L | (lev & 1))); } inline Babylon::Bidir_Props bidir_explicit_to_override_dir(const Babylon::Bidir_Props & p) { return (bidir_is_override(p) ? level_to_bidir(bidir_to_level(p)) : BIDIR_ON); } inline bool compact(const Babylon::Embedding_Level & a, const Babylon::Embedding_Level & b) { return (a.bidir_type == b.bidir_type && a.level == b.level); } inline bool compact_neutrals(const Babylon::Embedding_Level & a, const Babylon::Embedding_Level & b) { return (a.level == b.level && (a.bidir_type == b.bidir_type || bidir_is_neutral(a.bidir_type) && bidir_is_neutral(b.bidir_type))); } inline Babylon::Bidir_Props change_number_to_rtl(Babylon::Bidir_Props p) { return (bidir_is_number(p) ? Babylon::BIDIR_R : p); } Embedding_Levels::iterator compact(const Embedding_Levels::iterator & start, const Embedding_Levels::iterator & end) { Prague::Trace trace("Babylon::compact(...)"); if (start == end) return(end); Embedding_Levels::iterator last_used = start; Embedding_Levels::iterator current = start; ++current; while(current != end) { if (last_used->level == current->level && last_used->bidir_type == current->bidir_type) last_used->increment_length(current->length()); else { ++last_used; std::iter_swap(last_used, current); } ++current; } return (++last_used); } Embedding_Levels::iterator compact_neutrals(const Embedding_Levels::iterator & start, const Embedding_Levels::iterator & end) { Prague::Trace trace("Babylon::compact_neutrals(...)"); if (start == end) return (end); Embedding_Levels::iterator last_used = start; Embedding_Levels::iterator current = start; ++current; while(current != end) { if (last_used->level == current->level && (last_used->bidir_type == current->bidir_type || bidir_is_neutral(last_used->bidir_type) && bidir_is_neutral(current->bidir_type))) last_used->increment_length(current->length()); else { ++last_used; std::iter_swap(last_used, current); } ++current; } return(++last_used); } Embedding_Levels override_lists(const Embedding_Levels & base, const Embedding_Levels & over) { Prague::Trace trace("Babylon::override_lists(...)"); if (base.empty()) return (over); if (over.empty()) return (base); Embedding_Levels::const_iterator over_it = over.begin(); Embedding_Levels::const_iterator base_it = base.begin(); Embedding_Levels result; while(!(over_it == over.end() && base_it == base.end())) { // One list is empty, copy the other over: if (over_it == over.end()) { // copy base std::copy(base_it, base.end(), std::back_inserter(result)); base_it = base.end(); continue; } if (base_it == base.end()) { // copy over std::copy(over_it, over.end(), std::back_inserter(result)); over_it = over.end(); continue; } // skip invalid entries if (over_it->length() == 0) { ++over_it; continue; } if (base_it->length() == 0) { ++base_it; continue; } size_t max_current = base_it->ends_at(); // copying base: if (max_current < over_it->starts_at()) { result.push_back(*base_it); ++base_it; continue; } // inserting over_it into base_it if (max_current >= over_it->starts_at()) { Babylon::Embedding_Level current = *base_it; // Insert first part of base if not empty: current.length(over_it->starts_at() - current.starts_at()); if (current.length() != 0) // current.length can't become < 0 as // max_current >= over_it.start result.push_back(current); // Insert over_it (we allways need to do this!) result.push_back(*over_it); // over_it reaches into the next base_it: while (base_it != base.end() && base_it->ends_at() < over_it->ends_at()) ++base_it; if (base_it == base.end()) continue; current = *base_it; current.length(current.ends_at() - over_it->ends_at()); current.starts_at(over_it->ends_at() + 1); ++base_it; ++over_it; continue; } } // while return (result); } }; // namespace Babylon #endif From tobias at fresco.org Fri Aug 1 16:47:23 2003 From: tobias at fresco.org (Tobias Hunger) Date: Fri Feb 25 22:16:26 2005 Subject: [Fresco-changes] Fresco/Fresco-C++-demos/src EditTextDemo.cc,1.13,1.14 TextDemo.cc,1.10,1.11 demo.cc,1.23,1.24 pinyin_demo.cc,1.17,1.18 Message-ID: Update of /cvs/fresco/Fresco/Fresco-C++-demos/src In directory purcel:/tmp/cvs-serv24256/Fresco-C++-demos/src Modified Files: EditTextDemo.cc TextDemo.cc demo.cc pinyin_demo.cc Log Message: Babylon work over: * Use iconv to convert characters to/from Babylon * Updtae to Unicode Version 4.0 * Added unit tests (some fail for now, still need to work on that) * Removed the VisualTextBuffer from the CommandKit: I still like the idea, unfortunately it won't work out in the real world:-| * Updated code to the changed Babylon interfaces whereever needed. Index: EditTextDemo.cc =================================================================== RCS file: /cvs/fresco/Fresco/Fresco-C++-demos/src/EditTextDemo.cc,v retrieving revision 1.13 retrieving revision 1.14 diff -u -d -r1.13 -r1.14 --- EditTextDemo.cc 8 Oct 2002 18:21:23 -0000 1.13 +++ EditTextDemo.cc 1 Aug 2003 16:47:20 -0000 1.14 @@ -1,4 +1,4 @@ - /*$Id$ +/*$Id$ * * This source file is a part of the Fresco Project. * Copyright (C) 1999 Stefan Seefeld @@ -44,14 +44,13 @@ 0xad6d, 0xc5b4 }; - Babylon::String str(34, chars); + Babylon::String str(chars, 34); TextBuffer_var buf = commands->text(); - TextBuffer_var vis_buf = buf->get_visual_buffer(); - Graphic_var txt = text->simple_viewer(vis_buf); + Graphic_var txt = text->simple_viewer(buf); ToolKit::FrameSpec spec; spec.brightness(0.5); spec._d(ToolKit::inset); Graphic_var frame = tools->frame(Graphic_var(layout->margin(Graphic_var(layout->hfixed(Graphic_var(tools->rgb(txt, 0., 0., 0.)), 4000)), 50.)), 20., spec, true); buf->insert_string(Unicode::to_CORBA(str)); - application->append(Controller_var(tools->text_input(frame, vis_buf)), Babylon::String("editable text")); + application->append(Controller_var(tools->text_input(frame, buf)), Babylon::String("editable text")); }; Index: TextDemo.cc =================================================================== RCS file: /cvs/fresco/Fresco/Fresco-C++-demos/src/TextDemo.cc,v retrieving revision 1.10 retrieving revision 1.11 diff -u -d -r1.10 -r1.11 --- TextDemo.cc 17 Nov 2002 13:14:24 -0000 1.10 +++ TextDemo.cc 1 Aug 2003 16:47:20 -0000 1.11 @@ -39,7 +39,7 @@ 0xad6d, 0xc5b4 }; - Babylon::String str(34, chars); + Babylon::String str(chars, 34); Graphic_var txt = text->chunk(Unicode::to_CORBA(str)); Controller_var group = tools->group(Graphic_var(tools->rgb(txt, 0.2, 0.3, 0.5))); application->append(group, Babylon::String("text")); Index: demo.cc =================================================================== RCS file: /cvs/fresco/Fresco/Fresco-C++-demos/src/demo.cc,v retrieving revision 1.23 retrieving revision 1.24 diff -u -d -r1.23 -r1.24 --- demo.cc 21 Dec 2002 04:18:38 -0000 1.23 +++ demo.cc 1 Aug 2003 16:47:20 -0000 1.24 @@ -95,7 +95,7 @@ std::auto_ptr focus(create_demo(application)); std::auto_ptr viewport(create_demo(application)); std::auto_ptr document(create_demo(application)); - std::auto_ptr terminal(create_demo(application)); + // std::auto_ptr terminal(create_demo(application)); application->run(); delete application; Index: pinyin_demo.cc =================================================================== RCS file: /cvs/fresco/Fresco/Fresco-C++-demos/src/pinyin_demo.cc,v retrieving revision 1.17 retrieving revision 1.18 diff -u -d -r1.17 -r1.18 --- pinyin_demo.cc 14 Apr 2003 00:25:06 -0000 1.17 +++ pinyin_demo.cc 1 Aug 2003 16:47:21 -0000 1.18 @@ -47,6 +47,7 @@ #include #include +#include #ifdef DATADIR const std::string data_dir = DATADIR; @@ -94,7 +95,7 @@ // I use insert_string() since the chinese chars might have // a codepoint above 0xFFFF which insert_char() is not // able to handle. - output->insert_string(Unicode::to_CORBA(Babylon::String(select_from[last.value() - 'A']))); + output->insert_string(Unicode::to_CORBA(Babylon::String(1, select_from[last.value() - 'A']))); //XXX input->clear(); // this clears select too: // it results in a call to this function From tobias at fresco.org Fri Aug 1 16:47:24 2003 From: tobias at fresco.org (Tobias Hunger) Date: Fri Feb 25 22:16:26 2005 Subject: [Fresco-changes] Fresco/Fresco-IDL/share/idl/Fresco TextBuffer.idl,1.16,1.17 Message-ID: Update of /cvs/fresco/Fresco/Fresco-IDL/share/idl/Fresco In directory purcel:/tmp/cvs-serv24256/Fresco-IDL/share/idl/Fresco Modified Files: TextBuffer.idl Log Message: Babylon work over: * Use iconv to convert characters to/from Babylon * Updtae to Unicode Version 4.0 * Added unit tests (some fail for now, still need to work on that) * Removed the VisualTextBuffer from the CommandKit: I still like the idea, unfortunately it won't work out in the real world:-| * Updated code to the changed Babylon interfaces whereever needed. Index: TextBuffer.idl =================================================================== RCS file: /cvs/fresco/Fresco/Fresco-IDL/share/idl/Fresco/TextBuffer.idl,v retrieving revision 1.16 retrieving revision 1.17 diff -u -d -r1.16 -r1.17 --- TextBuffer.idl 29 May 2002 06:57:01 -0000 1.16 +++ TextBuffer.idl 1 Aug 2003 16:47:21 -0000 1.17 @@ -109,21 +109,6 @@ void remove_forward(in unsigned long d); //. Clear the buffer. It will be empty afterwards. void clear(); - - //. The order in which the String in this TextBuffer is stored. - readonly attribute StringOrder order; - - //. Get the accociated TextBuffer that contains the String stored - //. in here in memory order. This operation returns a reference to - //. _this() if order() returns StringOrder::memory_order or a - //. reference to the associated buffer. - TextBuffer get_memory_buffer(); - - //. Get the accociated TextBuffer that contains the String stored - //. in here in visual order. This operation returns a reference to - //. _this() if order() returns StringOrder::visual_order or a - //. reference to the associated buffer. - TextBuffer get_visual_buffer(); }; }; From tobias at fresco.org Fri Aug 1 16:47:24 2003 From: tobias at fresco.org (Tobias Hunger) Date: Fri Feb 25 22:16:26 2005 Subject: [Fresco-changes] Fresco/config compiler.m4,1.5,1.6 Message-ID: Update of /cvs/fresco/Fresco/config In directory purcel:/tmp/cvs-serv24256/config Modified Files: compiler.m4 Log Message: Babylon work over: * Use iconv to convert characters to/from Babylon * Updtae to Unicode Version 4.0 * Added unit tests (some fail for now, still need to work on that) * Removed the VisualTextBuffer from the CommandKit: I still like the idea, unfortunately it won't work out in the real world:-| * Updated code to the changed Babylon interfaces whereever needed. Index: compiler.m4 =================================================================== RCS file: /cvs/fresco/Fresco/config/compiler.m4,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- compiler.m4 4 Jun 2003 22:38:59 -0000 1.5 +++ compiler.m4 1 Aug 2003 16:47:22 -0000 1.6 @@ -182,6 +182,15 @@ CFLAGS="$CFLAGS $OPT_CFLAGS") ;; esac + case $host_cpu in + i686*|686*) + OPT_CFLAGS='-march=i686 -msse -mfpmath=sse' + FRESCO_COMPILER_OPTION(coptimize_686, + [-m for 686 CPU], + $OPT_CFLAGS, + CFLAGS="$CFLAGS $OPT_CFLAGS") + ;; + esac AC_LANG_RESTORE else CFLAGS="$CFLAGS -O" @@ -209,6 +218,14 @@ CXXFLAGS="$CXXFLAGS $OPT_CXXFLAGS") ;; esac + case $host_cpu in + i686*|686*) OPT_CFLAGS='-march=i686 -msse -mfpmath=sse' + FRESCO_COMPILER_OPTION(cxxoptimize_686, + [-m for 686 CPU], + $OPT_CFLAGS, + CFLAGS="$CFLAGS $OPT_CFLAGS") + ;; + esac AC_LANG_RESTORE else CXXFLAGS="$CXXFLAGS -O" From tobias at fresco.org Fri Aug 1 16:47:25 2003 From: tobias at fresco.org (Tobias Hunger) Date: Fri Feb 25 22:16:26 2005 Subject: [Fresco-changes] Fresco/contrib/daVinci/src main.cc,1.9,1.10 Message-ID: Update of /cvs/fresco/Fresco/contrib/daVinci/src In directory purcel:/tmp/cvs-serv24256/contrib/daVinci/src Modified Files: main.cc Log Message: Babylon work over: * Use iconv to convert characters to/from Babylon * Updtae to Unicode Version 4.0 * Added unit tests (some fail for now, still need to work on that) * Removed the VisualTextBuffer from the CommandKit: I still like the idea, unfortunately it won't work out in the real world:-| * Updated code to the changed Babylon interfaces whereever needed. Index: main.cc =================================================================== RCS file: /cvs/fresco/Fresco/contrib/daVinci/src/main.cc,v retrieving revision 1.9 retrieving revision 1.10 diff -u -d -r1.9 -r1.10 --- main.cc 11 Apr 2003 00:09:35 -0000 1.9 +++ main.cc 1 Aug 2003 16:47:22 -0000 1.10 @@ -76,7 +76,8 @@ Server_var s = resolve_server(getopt, orb); - ClientContextImpl *client_impl = new ClientContextImpl("daVinci"); + ClientContextImpl *client_impl = + new ClientContextImpl(Babylon::String("daVinci")); ClientContext_var client = client_impl->_this(); ServerContext_var server = s->create_server_context(client); From tobias at fresco.org Fri Aug 1 16:47:41 2003 From: tobias at fresco.org (Tobias Hunger) Date: Fri Feb 25 22:16:26 2005 Subject: [Fresco-changes] Fresco Makefile.in,1.43,1.44 Message-ID: Update of /cvs/fresco/Fresco In directory purcel:/tmp/cvs-serv24256 Modified Files: Makefile.in Log Message: Babylon work over: * Use iconv to convert characters to/from Babylon * Updtae to Unicode Version 4.0 * Added unit tests (some fail for now, still need to work on that) * Removed the VisualTextBuffer from the CommandKit: I still like the idea, unfortunately it won't work out in the real world:-| * Updated code to the changed Babylon interfaces whereever needed. Index: Makefile.in =================================================================== RCS file: /cvs/fresco/Fresco/Makefile.in,v retrieving revision 1.43 retrieving revision 1.44 diff -u -d -r1.43 -r1.44 --- Makefile.in 14 May 2003 22:42:09 -0000 1.43 +++ Makefile.in 1 Aug 2003 16:47:08 -0000 1.44 @@ -175,6 +175,12 @@ Fresco-C++: Babylon Clients-C++: Babylon endif +ifneq ($(findstring Fresco-IDL, $(subdirs)),) +Fresco-C++: Fresco-IDL +Fresco-Java: Fresco-IDL +Fresco-Python: Fresco-IDL +Fresco-Perl: Fresco-IDL +endif ifneq ($(findstring Fresco-C++, $(subdirs)),) Berlin: Fresco-C++ Fresco-C++-demos: Fresco-C++ @@ -184,5 +190,8 @@ GGI: Berlin SDL: Berlin Fresco-C++-demos: Berlin #FIXME !!! +endif +ifneq ($(findstring Fresco-Python, $(subdirs)),) +Fresco-Python-demos: Fresco-Python endif From tobias at fresco.org Fri Aug 1 16:47:42 2003 From: tobias at fresco.org (Tobias Hunger) Date: Fri Feb 25 22:16:26 2005 Subject: [Fresco-changes] Fresco/Babylon Makefile.in,1.15,1.16 configure.ac,1.24,1.25 Message-ID: Update of /cvs/fresco/Fresco/Babylon In directory purcel:/tmp/cvs-serv24256/Babylon Modified Files: Makefile.in configure.ac Log Message: Babylon work over: * Use iconv to convert characters to/from Babylon * Updtae to Unicode Version 4.0 * Added unit tests (some fail for now, still need to work on that) * Removed the VisualTextBuffer from the CommandKit: I still like the idea, unfortunately it won't work out in the real world:-| * Updated code to the changed Babylon interfaces whereever needed. Index: Makefile.in =================================================================== RCS file: /cvs/fresco/Fresco/Babylon/Makefile.in,v retrieving revision 1.15 retrieving revision 1.16 diff -u -d -r1.15 -r1.16 --- Makefile.in 6 Apr 2003 16:15:44 -0000 1.15 +++ Makefile.in 1 Aug 2003 16:47:09 -0000 1.16 @@ -26,7 +26,7 @@ distdir := @PACKAGE_NAME@-@PACKAGE_VERSION@ -subdirs := src modules test +subdirs := src modules demo test # insert inter-directory dependencies below # Overridden by the clean-targets, allowing the same subdirs-rule to be used @@ -41,6 +41,8 @@ # insert inter-directory dependencies here modules: src +demo: src +test: demo $(subdirs): @echo making $(action) in $@ Index: configure.ac =================================================================== RCS file: /cvs/fresco/Fresco/Babylon/configure.ac,v retrieving revision 1.24 retrieving revision 1.25 diff -u -d -r1.24 -r1.25 --- configure.ac 14 May 2003 22:42:09 -0000 1.24 +++ configure.ac 1 Aug 2003 16:47:09 -0000 1.25 @@ -89,7 +89,7 @@ AC_CONFIG_FILES([config/Babylon-config], [chmod +x config/Babylon-config]) AC_CONFIG_FILES([bin/Babylon-config:config/Babylon-build-config.in], [chmod +x bin/Babylon-config]) AC_CONFIG_FILES([Makefile src/Makefile modules/Makefile]) -AC_CONFIG_FILES([test/Makefile]) +AC_CONFIG_FILES([test/Makefile demo/Makefile]) mkdir -p lib mkdir -p modules From tobias at fresco.org Fri Aug 1 16:47:43 2003 From: tobias at fresco.org (Tobias Hunger) Date: Fri Feb 25 22:16:26 2005 Subject: [Fresco-changes] Fresco/Babylon/include/Babylon exceptions.hh,NONE,1.1 Babylon.hh,1.4,1.5 Char.hh,1.13,1.14 String.hh,1.11,1.12 defs.hh,1.17,1.18 Dictionary.hh,1.11,NONE traits.hh,1.1,NONE utils.hh,1.11,NONE vis_iterator.hh,1.4,NONE Message-ID: Update of /cvs/fresco/Fresco/Babylon/include/Babylon In directory purcel:/tmp/cvs-serv24256/Babylon/include/Babylon Modified Files: Babylon.hh Char.hh String.hh defs.hh Added Files: exceptions.hh Removed Files: Dictionary.hh traits.hh utils.hh vis_iterator.hh Log Message: Babylon work over: * Use iconv to convert characters to/from Babylon * Updtae to Unicode Version 4.0 * Added unit tests (some fail for now, still need to work on that) * Removed the VisualTextBuffer from the CommandKit: I still like the idea, unfortunately it won't work out in the real world:-| * Updated code to the changed Babylon interfaces whereever needed. --- NEW FILE: exceptions.hh --- /*$Id: exceptions.hh,v 1.1 2003/08/01 16:47:10 tobias Exp $ * * This source file is a part of the Fresco Project. * Copyright (C) 1999-2003 Tobias Hunger * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, * MA 02139, USA. */ /* This file supports Unicode version 3.1 beta */ #ifndef _Babylon_exceptions_hh #define _Babylon_exceptions_hh #include #include #include namespace Babylon { // Classes to throw around as exceptions: class Undefined_Property : std::exception { public: Undefined_Property(UCS4 uc, const Char_Props prop) : my_error_uc(uc), my_error_prop(prop) { } const char * what() const throw(); public: UCS4 my_error_uc; Char_Props my_error_prop; }; // class Undefined_Property class Transfer_Error : std::exception { public: Transfer_Error(Trans_Error transError) : my_error(transError) { } ~Transfer_Error() throw() { } const char * what() const throw(); private: Trans_Error my_error; }; // Transfer_Error class Block_Error : std::exception { public: Block_Error(UCS4 startUC, UCS4 endUC, const std::string &em) : my_block_start(startUC), my_block_end(endUC), my_error_message(em) { } ~Block_Error() throw() {} const char * what() const throw(); private: UCS4 my_block_start; UCS4 my_block_end; std::string my_error_message; }; // class Block_Error }; // namespace #endif Index: Babylon.hh =================================================================== RCS file: /cvs/fresco/Fresco/Babylon/include/Babylon/Babylon.hh,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- Babylon.hh 30 Dec 2001 01:47:10 -0000 1.4 +++ Babylon.hh 1 Aug 2003 16:47:10 -0000 1.5 @@ -1,8 +1,8 @@ /*$Id$ * - * This source file is a part of the Berlin Project. - * Copyright (C) 1999,2000 Tobias Hunger - * http://www.berlin-consortium.org + * This source file is a part of the Fresco Project. + * Copyright (C) 1999-2003 Tobias Hunger + * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -22,9 +22,6 @@ #ifndef _Babylon_hh #define _Babylon_hh -#include -#include #include -#include #endif // _Babylon_hh Index: Char.hh =================================================================== RCS file: /cvs/fresco/Fresco/Babylon/include/Babylon/Char.hh,v retrieving revision 1.13 retrieving revision 1.14 diff -u -d -r1.13 -r1.14 --- Char.hh 5 Mar 2003 23:28:16 -0000 1.13 +++ Char.hh 1 Aug 2003 16:47:10 -0000 1.14 @@ -1,8 +1,8 @@ /*$Id$ * - * This source file is a part of the Berlin Project. - * Copyright (C) 1999,2000 Tobias Hunger - * http://www.berlin-consortium.org + * This source file is a part of the Fresco Project. + * Copyright (C) 1999-2003 Tobias Hunger + * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public [...1095 lines suppressed...] + Char &operator * (char c) { my_value *= c; return *this; } + Char &operator / (Char uc) { my_value /= uc.my_value; return *this; } + Char &operator / (UCS4 uc) { my_value /= uc; return *this; } + Char &operator / (int i) { my_value /= i; return *this; } + Char &operator / (char c) { my_value /= c; return *this; } + private: - UCS4 m_value; - - }; // class Char + void iconv_wrapper(const std::string &, const char *, size_t &, + const std::string &, char *, size_t &) const; + + UCS4 my_value; + }; // class Char -} // namespace Babylon +}; // namespace Babylon #endif // _Babylon_Char_hh Index: String.hh =================================================================== RCS file: /cvs/fresco/Fresco/Babylon/include/Babylon/String.hh,v retrieving revision 1.11 retrieving revision 1.12 diff -u -d -r1.11 -r1.12 --- String.hh 19 Feb 2002 15:58:54 -0000 1.11 +++ String.hh 1 Aug 2003 16:47:10 -0000 1.12 @@ -1,8 +1,8 @@ /*$Id$ * - * This source file is a part of the Berlin Project. - * Copyright (C) 1999,2000 Tobias Hunger - * http://www.berlin-consortium.org + * This source file is a part of the Fresco Project. + * Copyright (C) 1999-2003 Tobias Hunger + * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -25,148 +25,377 @@ /* String class * This class stores and manipulates strings of characters defined - * acordding to ISO10646. + * according to ISO10646. */ -#include -#include #include -namespace Babylon { - typedef std::basic_string Char_Mapping; - struct Paragraph; - typedef std::vector Paragraphs; +namespace Babylon +{ - // g++ does not yet support char_traits :-( - class String : public std::basic_string { - public: - // CONSTRUCTORS: - /// Creates a string of the length 0. - String(); + class Bidir; - /// Creates a string of the length 1 containing - String(const Char, const Norm norm = NORM_NONE); - String(const UCS4, const Norm norm = NORM_NONE); + class String + { + // Do not inherit from std::basic_string. It's not meant to be derived + // from:-( - // Creates a string out of other types of strings - String(const UTF8_string & s, const Norm norm = NORM_NONE); - String(const char * s, const Norm norm = NORM_NONE); - String(const UTF32_string &, const Norm norm = NORM_NONE); - String(size_t len, Char * data, const Norm norm = NORM_NONE); - String(const String &); + public: + typedef std::basic_string string_type; + typedef Babylon::Char char_type; + // import types from string_type: + typedef string_type::traits_type traits_type; + typedef string_type::value_type value_type; + typedef string_type::size_type size_type; + typedef string_type::difference_type difference_type; + typedef string_type::reference reference; + typedef string_type::const_reference const_reference; + typedef string_type::pointer pointer; + typedef string_type::const_pointer const_pointer; + typedef string_type::iterator iterator; + typedef string_type::const_iterator const_iterator; + typedef string_type::reverse_iterator reverse_iterator; + typedef string_type::const_reverse_iterator const_reverse_iterator; - // Transformators: - void utf8(const UTF8_string &, const Norm norm = NORM_NONE) - throw (Trans_Error); - void utf8(const char * s, const Norm norm = NORM_NONE) - throw (Trans_Error) { utf8(UTF8_string(s), norm); } - void utf16(const UTF16_string &, const Norm norm = NORM_NONE) - throw (Trans_Error); - void utf16(const UCS2 * s, const Norm norm = NORM_NONE) - throw (Trans_Error) { utf16(UTF16_string(s), norm); } - void utf32(const UTF32_string &, const Norm norm = NORM_NONE); - UTF8_string utf8() const throw (Trans_Error); - UTF16_string utf16() const throw(Trans_Error); - UTF32_string utf32() const throw(Trans_Error); + static const size_type npos = string_type::npos; - void swap(String &); - - // normalizes a String. - void normalize(const Norm); - - // returns the norm the string is in - Norm norm() const { return m_current_norm; } - - // sets the norm the String is in. Does NOT change the - // string itself, so USE WITH CAUTION! - void override_norm(const Norm norm) { m_current_norm = norm; } - - // returns the normalized form of a string without changing it. - String norm(const Norm norm) const; + // Constructors: + String(); + String(const String &, + const size_type = 0, const size_type = npos); + String(const char_type *, const Norm = NORM_NONE); + String(const char_type *, const size_type, const Norm = NORM_NONE); + String(const size_type, const Char, const Norm = NORM_NONE); + template + String(InputIterator, InputIterator, const Norm = NORM_NONE); + // additional constructors: + String(const string_type &); + + // Destructor: + ~String(); + + // Size Operations: - std::vector get_defined(); - std::vector get_Spaces(); - std::vector get_ISO_Controls(); - std::vector get_Punctuations(); - std::vector get_Line_Separators(); - std::vector get_Paragraph_Separators(); - std::vector get_Currency_Symbols(); - std::vector get_Bidi_Left_to_Rights(); - std::vector get_Bidi_European_Digits(); - std::vector get_Bidi_Eur_Num_Separators(); - std::vector get_Bidi_Eur_Num_Terminators(); - std::vector get_Bidi_Arabic_Digits(); - std::vector get_Bidi_Common_Separator(); - std::vector get_Bidi_Block_Separator(); - std::vector get_Bidi_Segment_Separator(); - std::vector get_Bidi_Whitespaces(); - std::vector get_Bidi_Non_spacing_Marks(); - std::vector get_Bidi_Boundary_Neutrals(); - std::vector get_Bidi_PDFs(); - std::vector get_Bidi_Embedding_or_Overrides(); - std::vector get_Bidi_Other_Neutrals(); - std::vector get_Viramas(); - std::vector get_Printables(); - std::vector get_Not_a_Characters(); - std::vector get_Maths(); - std::vector get_Alphabetics(); - std::vector get_Lowercases(); - std::vector get_Uppercases(); - std::vector get_Titlecases(); - std::vector get_ID_Starts(); - std::vector get_ID_Continues(); - std::vector get_XID_Starts(); - std::vector get_XID_Continues(); - std::vector get_Decimals(); - std::vector get_Digits(); - std::vector get_Numerics(); - std::vector get_Private_Uses(); + size_type size() const { return my_data.size(); } + size_type length() const { return my_data.length(); } + bool empty() const { return my_data.empty(); } + size_type max_size() const { return my_data.max_size(); } + + // Capacity Operations: + + size_type capacity() const { return my_data.capacity(); } + void reserve() { my_data.reserve(); } + void reserve(const size_type l) { my_data.reserve(l); } + + // Comparisons: + int compare(const String & s) const { return compare(0, npos, s); } + int compare(const size_type, const size_type, + const String &) const; + int compare(const size_type, const size_type, const String &, + const size_type, const size_type) const; + int compare(const char_type *) const; + int compare(const size_type, const size_type, const char_type *) const; + int compare(const size_type, const size_type, + const char_type *, const size_type) const; + + // Character Access + char_type & operator[](const size_type p); + char_type operator[](const size_type p) const { return my_data[p]; } + char_type & at(const size_type p); + char_type at(const size_type p) const { return my_data.at(p); } + + // Generating C-Strings and Character Arrays + + // Modify Operations: + String & assign(const String & s) + { return assign(s, 0, npos); } + String & assign(const String &, + const size_type, const size_type); + String & assign(const char_type *, const Norm = NORM_NONE); + String & assign(const char_type *, const size_type, + const Norm = NORM_NONE); + String & assign(const size_type, const char_type); + + String & operator=(const String s) { return assign(s); } + String & operator=(const char_type * c) { return assign(c); } + String & operator=(char_type c) { return assign(1, c); } + + void swap(String &) throw (); + + String & append(const String & s) + { return append(s, 0, npos); } + String & append(const String &, + const size_type, const size_type); + String & append(const char_type *, const size_type, + const Norm = NORM_NONE); + String & append(const char_type *, const Norm = NORM_NONE); + String & append(const size_type, const char_type); + template + String & append(InputIterator, InputIterator); + + void push_back(char_type c) { append(1, c); } + + String & operator+=(const String & s) { return append(s); } + String & operator+=(const char_type * c) { return append(c); } + String & operator+=(const char_type c) { return append(1, c); } + + String & insert(const size_type p, const String & s) + { return insert(p, s, 0, npos); } + String & insert(const size_type, const String &, + const size_type, const size_type); + String & insert(const size_type, + const char_type *, const size_type); + String & insert(const size_type, const char_type *); + String & insert(const size_type, const size_type, const char_type); + void insert(iterator, const size_type, const char_type); + iterator insert(iterator, const char_type); + void insert(iterator, iterator, iterator); + + void clear(); + String & erase(); + String & erase(const size_type); + String & erase(const size_type, const size_type); + String & erase(iterator, iterator); + + void resize(const size_type); + void resize(const size_type, char_type); + + String & replace(const size_type p1, const size_type p2, + const String & s) + { return replace(p1, p2, s, 0, npos); } + String & replace(iterator, iterator, const String &); + String & replace(const size_type, const size_type, + const String &, + const size_type, const size_type); + String & replace(const size_type, const size_type, + const char_type *, const size_type); + String & replace(iterator, iterator, + const char_type *, const size_type); + String & replace(const size_type, const size_type, + const char_type *); + String & replace(iterator, iterator, const char_type *); + String & replace(const size_type, const size_type, + const size_type, char_type); + String & replace(iterator, iterator, + const size_type, char_type); + template + String & replace(iterator, iterator, + InputIterator, InputIterator); + + size_type find(const char_type c) const { return my_data.find(c); } + size_type find(const char_type c, const size_type i) + { return my_data.find(c, i); } + size_type rfind(const char_type c) const { return my_data.rfind(c); } + size_type rfind(const char_type c, const size_type i) + { return my_data.rfind(c, i); } + + size_type find(const String & s) const + { return my_data.find(s.my_data); } + size_type find(const String & s, const size_type i) + { return my_data.find(s.my_data, i); } + size_type rfind(const String & s) const + { return my_data.rfind(s.my_data); } + size_type rfind(const String & s, const size_type i) + { return my_data.rfind(s.my_data, i); } + + size_type find(const char_type * c) const { return my_data.find(c); } + size_type find(const char_type * c, const size_type i) + { return my_data.find(c, i); } + size_type rfind(const char_type * c) const { return my_data.rfind(c); } + size_type rfind(const char_type * c, const size_type i) + { return my_data.rfind(c, i); } + + size_type find(const char_type * c, + const size_type i, const size_type l) const + { return my_data.find(c, i, l); } + size_type rfind(const char_type * c, + const size_type i, const size_type l) + { return my_data.rfind(c, i, l); } + + size_type find_first_of(const String & s) const + { return my_data.find_first_of(s.my_data); } + size_type find_first_of(const String & s, const size_type i) + { return my_data.find_first_of(s.my_data, i); } + size_type find_first_not_of(const String & s) const + { return my_data.find_first_not_of(s.my_data); } + size_type find_first_not_of(const String & s, const size_type i) + { return my_data.find_first_not_of(s.my_data, i); } + + size_type find_first_of(const char_type * c) const + { return my_data.find_first_of(c); } + size_type find_first_of(const char_type * c, const size_type i) + { return my_data.find_first_of(c, i); } + size_type find_first_not_of(const char_type * c) const + { return my_data.find_first_not_of(c); } + size_type find_first_not_of(const char_type * c, const size_type i) + { return my_data.find_first_not_of(c, i); } + + size_type find_first_of(const char_type * c, const size_type i, + const size_type l) const + { return my_data.find_first_of(c, i, l); } + size_type find_first_not_of(const char_type * c, const size_type i, + const size_type l) const + { return my_data.find_first_not_of(c, i, l); } + + size_type find_first_of(const char_type c) const + { return my_data.find_first_of(c); } + size_type find_first_of(const char_type c, const size_type i) + { return my_data.find_first_of(c, i); } + size_type find_first_not_of(const char_type c) const + { return my_data.find_first_not_of(c); } + size_type find_first_not_of(const char_type c, const size_type i) + { return my_data.find_first_not_of(c, i); } + + size_type find_last_of(const String & s) const + { return my_data.find_last_of(s.my_data); } + size_type find_last_of(const String & s, const size_type i) + { return my_data.find_last_of(s.my_data, i); } + size_type find_flast_not_of(const String & s) const + { return my_data.find_last_not_of(s.my_data); } + size_type find_last_not_of(const String & s, const size_type i) + { return my_data.find_last_not_of(s.my_data, i); } + + size_type find_last_of(const char_type * c) const + { return my_data.find_last_of(c); } + size_type find_last_of(const char_type * c, const size_type i) + { return my_data.find_last_of(c, i); } + size_type find_last_not_of(const char_type * c) const + { return my_data.find_last_not_of(c); } + size_type find_last_not_of(const char_type * c, const size_type i) + { return my_data.find_last_not_of(c, i); } + + size_type find_last_of(const char_type * c, const size_type i, + const size_type l) const + { return my_data.find_last_of(c, i, l); } + size_type find_last_not_of(const char_type * c, const size_type i, + const size_type l) const + { return my_data.find_last_not_of(c, i, l); } + + size_type find_last_of(const char_type c) const + { return my_data.find_last_of(c); } + size_type find_last_of(const char_type c, const size_type i) + { return my_data.find_last_of(c, i); } + size_type find_last_not_of(const char_type c) const + { return my_data.find_last_not_of(c); } + size_type find_last_not_of(const char_type c, const size_type i) + { return my_data.find_last_not_of(c, i); } + + // Substrings and String Concatenation: + + String substr() const { return String(my_data.substr()); } + String substr(const size_type i) const + { return String(my_data.substr(i)); } + String substr(const size_type i, const size_type l) const + { return String(my_data.substr(i, l)); } + + // Generating Iterators: - // OPERATORS: - - // UTILITIES: - // void erase(); + iterator begin() { return my_data.begin(); } + const_iterator begin() const { return my_data.begin(); } + iterator end() { return my_data.end(); } + const_iterator end() const { return my_data.end(); } + reverse_iterator rbegin() { return my_data.rbegin(); } + const_reverse_iterator rbegin() const { return my_data.rbegin(); } + reverse_iterator rend() { return my_data.rend(); } + const_reverse_iterator rend() const { return my_data.rend(); } - //. Get a list of paragraphs, their beginnings and ends etc. - Paragraphs get_paragraphs(); + // ------------------------------------------------------------ + // methods *NOT* in basic_string: + // ------------------------------------------------------------ - // DESTRUCTOR: - ~String(); // nothing special needed... + // Constructors: + String(const std::string &, const size_t pos = 0, + const std::string format = Babylon::UTF8_format, + const Babylon::Norm norm = NORM_NONE); + String(const char *, + const std::string format = Babylon::UTF8_format, + const Babylon::Norm norm = NORM_NONE); + String(const char *, const size_t len, + const std::string format = Babylon::UTF8_format, + const Babylon::Norm norm = NORM_NONE); + - protected: - private: - Babylon::Norm m_current_norm; - Prague::Mutex _mutex; - }; // class String + // Conversion: - struct Paragraph { - size_t begin; - size_t end; - Embedding_Levels levels; + std::string + convert(const std::string & format = Babylon::UTF8_format) const + throw (Transfer_Error); + + size_t convert(const std::string &, + const size_t = 0, + const std::string& = Babylon::UTF8_format, + const Babylon::Norm norm = NORM_NONE) + throw (Transfer_Error, std::length_error); + + //. returns the norm the string is in + Norm norm() const { return my_norm; } + //. normalizes the string to the given norm. + void norm(const Norm); + //. sets the norm the String is in. Does NOT change the + //. string itself, so USE WITH CARE! + void override_norm(const Norm n) { my_norm = n; } + //. returns the normalized form of a string without changing it. + String get_normalized(const Norm) const; - Paragraph() : begin(0), end(0), levels() {} - Paragraph(size_t b, size_t e) : begin(b), end(e), levels() {} - }; + void debug_dump() const; - class Paragraph_lt { - public: - Paragraph_lt() {} - bool operator() (const Paragraph &, const Paragraph &); - }; + protected: + private: + Babylon::Norm my_norm; + string_type my_data; + }; // class String + + inline bool operator==(const String & s1, const String & s2) + { return 0 == s1.compare(s2); } + inline bool operator!=(const String & s1, const String & s2) + { return 0 != s1.compare(s2); } + inline bool operator<(const String & s1, const String & s2) + { return 0 > s1.compare(s2); } + inline bool operator>(const String & s1, const String & s2) + { return 0 < s1.compare(s2); } + inline bool operator<=(const String & s1, const String & s2) + { return 0 >= s1.compare(s2); } + inline bool operator>=(const String & s1, const String & s2) + { return 0 <=s1.compare(s2); } - class Paragraph_eq { - public: - Paragraph_eq() {} - bool operator() (const Paragraph &, const Paragraph &); - }; + inline String operator+(const String & s1, const String & s2) + { + String r(s1); + return r.append(s2); + } + inline String operator+(const String & s1, String::char_type * c) + { + String r(s1); + return r.append(c); + } + inline String operator+(String::char_type * c, const String & s1) + { + String r(c); + return r.append(s1); + } + inline String operator+(const String & s1, String::char_type c) + { + String r(s1); + r.push_back(c); + return r; + } + inline String operator+(String::char_type c, const String & s1) + { + String r(1, c); + return r.append(s1); + } + +}; // namespace Babylon -} // namespace Babylon +namespace std +{ -namespace std { - template<> - inline void swap(Babylon::String & a, Babylon::String & b) { - a.swap(b); - } -} + template<> + inline void swap(Babylon::String & a, Babylon::String & b) + { + a.swap(b); + } + +}; // namespace std #endif // _Babylon_String_hh Index: defs.hh =================================================================== RCS file: /cvs/fresco/Fresco/Babylon/include/Babylon/defs.hh,v retrieving revision 1.17 retrieving revision 1.18 diff -u -d -r1.17 -r1.18 --- defs.hh 20 Jan 2003 22:18:49 -0000 1.17 +++ defs.hh 1 Aug 2003 16:47:10 -0000 1.18 @@ -1,8 +1,8 @@ /*$Id$ * - * This source file is a part of the Berlin Project. - * Copyright (C) 1999,2000 Tobias Hunger - * http://www.berlin-consortium.org + * This source file is a part of the Fresco Project. + * Copyright (C) 1999-2003 Tobias Hunger + * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public [...1000 lines suppressed...] - ~Block_Error() throw() {} - const char * what() const throw() - { - std::ostringstream res; - res << std::hex << std::setw(4) << std::setfill('0'); - res << "(" << m_block_start << "-" << m_block_end << "): " - << m_error_message; - return res.str().c_str(); - } - }; // class Block_Error - + MEMORY_ORDER = 0, + VISUAL_ORDER = 1 + }; + typedef enum string_order_enum String_Order; + } // namespace Babylon; -#endif // _Babylon_Defs_hh +#endif --- Dictionary.hh DELETED --- --- traits.hh DELETED --- --- utils.hh DELETED --- --- vis_iterator.hh DELETED --- From tobias at fresco.org Fri Aug 1 16:47:48 2003 From: tobias at fresco.org (Tobias Hunger) Date: Fri Feb 25 22:16:26 2005 Subject: [Fresco-changes] Fresco/Babylon/test ucd.pl,NONE,1.1 Makefile.in,1.5,1.6 Message-ID: Update of /cvs/fresco/Fresco/Babylon/test In directory purcel:/tmp/cvs-serv24256/Babylon/test Modified Files: Makefile.in Added Files: ucd.pl Log Message: Babylon work over: * Use iconv to convert characters to/from Babylon * Updtae to Unicode Version 4.0 * Added unit tests (some fail for now, still need to work on that) * Removed the VisualTextBuffer from the CommandKit: I still like the idea, unfortunately it won't work out in the real world:-| * Updated code to the changed Babylon interfaces whereever needed. --- NEW FILE: ucd.pl --- #!/usr/bin/perl -w # use strict; use FileHandle; use IPC::Open2; my $ftp_path = "ftp://ftp.unicode.org/Public/UNIDATA/"; my @registered_tests = ("test_ucd", "test_blocks", "test_linebreak", "test_eawidth", "test_compexclude", "test_properties", "test_coreproperties", "test_normalize"); $MAKE_TOP_BUILDDIR = "@MAKE_TOP_BUILDDIR@"; $dump_ucd = "$MAKE_TOP_BUILDDIR/bin/dump_ucd $MAKE_TOP_BUILDDIR/modules"; $dump_blocks = "$MAKE_TOP_BUILDDIR/bin/dump_blocks $MAKE_TOP_BUILDDIR/modules"; $dump_lb = "$MAKE_TOP_BUILDDIR/bin/dump_lb $MAKE_TOP_BUILDDIR/modules"; $dump_ea = "$MAKE_TOP_BUILDDIR/bin/dump_ea $MAKE_TOP_BUILDDIR/modules"; $dump_compexclude = "$MAKE_TOP_BUILDDIR/bin/dump_compexclude $MAKE_TOP_BUILDDIR/modules"; $dump_props = "$MAKE_TOP_BUILDDIR/bin/dump_props $MAKE_TOP_BUILDDIR/modules"; $normalize = "$MAKE_TOP_BUILDDIR/bin/normalize interactive $MAKE_TOP_BUILDDIR/modules"; # -------------------- my $running_test = ""; sub fail { my $cause = shift; my $descr = shift; print "Test: $running_test\n"; print "Result: FAIL\n"; print "Cause: $cause\n"; print "Description: $descr\n\n"; } sub pass { print "Test: $running_test\n"; print "Result: PASS\n\n"; } sub abort { my $cause = shift; my $descr = shift; my $testname = "INVALID"; $testname = $running_test if ($running_test); print "Test: $testname\n"; print "Result: ERROR\n"; print "Cause: $cause\n"; print "Description: $descr\n\n"; } sub strip_WS { my $line = shift; $line =~ s/\n*$// if ($line); $line =~ s/\s*$// if ($line); return $line; } sub numerically { $a <=> $b; } # -------------------- # This test makes sure the data in Babylon is consistent with # the datafiles found at ftp://ftp.unicode.org/ sub test_ucd { my $unicode_ucd = "UnicodeData.txt"; system("if [ ! -f $unicode_ucd ] ; then wget $ftp_path$unicode_ucd ; fi"); unless (open UCD, "./$unicode_ucd") { abort("Can't open '$unicode_ucd'", "Error opening file '$unicode_ucd'."); return; } unless (open BABYLON, "$dump_ucd |") { abort("Can't find dump_ucd demo.", "Error opening pipe."); return; } my $diff = " NOTHING COMPARED."; while() { # remove comments and names from UCD file, eval digit values: my @parts = split ';', strip_WS($_), 15; $parts[1] = ""; if ($parts[6] ne "") { $parts[6] = sprintf "%.6f", eval "$parts[6]"; } if ($parts[7] ne "") { $parts[7] = sprintf "%.6f", eval "$parts[7]"; } if ($parts[8] ne "") { $parts[8] = sprintf "%.6f", eval "$parts[8]"; } $parts[10] = ""; $parts[11] = ""; if ($parts[12] eq $parts[0]) { $parts[12] = ""; } if ($parts[13] eq $parts[0]) { $parts[13] = ""; } if ($parts[14] eq $parts[0]) { $parts[14] = ""; } my $ucd_line = strip_WS(join ";", @parts); next unless ($ucd_line); # get line from dump: my $babylon_line = ; $babylon_line = strip_WS($babylon_line); # compare if ($ucd_line ne $babylon_line) { $diff = " UCD : '$ucd_line'\n BABYLON: '$babylon_line'"; last; } $diff = ""; } close BABYLON; close UCD; pass() unless ($diff); fail("dump_ucd output differs from the file downloaded.", "First difference:\n$diff") if ($diff); } sub test_blocks { my $unicode_blocks = "Blocks.txt"; system ("if [ ! -f $unicode_blocks ] ; then wget $ftp_path$unicode_blocks ; fi"); unless (open UCD, $unicode_blocks) { abort("Can't open $unicode_blocks", "Error opening file $unicode_blocks"); return; } unless (open BABYLON, "$dump_blocks |") { abort("Can't find dump_blocks demo.", "Error opening pipe."); return; } my $diff = " NOTHING COMPARED."; while() { # remove comments my $block_line = strip_WS($_); $block_line =~ s/#.*$//; next unless ($block_line); # get line from dump: my $babylon_line = ; $babylon_line = strip_WS($babylon_line); # compare if ($block_line ne $babylon_line) { $diff = " UCD : '$block_line'\n BABYLON: '$babylon_line'"; last; } $diff = ""; } close BABYLON; close UCD; pass() unless ($diff); fail("dump_blocks output differs from the file downloaded.", "First difference:\n$diff") if ($diff); } sub test_linebreak { my $unicode_lb = "LineBreak.txt"; system ("if [ ! -f $unicode_lb ] ; then wget $ftp_path$unicode_lb ; fi"); unless (open UCD, $unicode_lb) { abort("Can't open $unicode_lb", "Error opening file $unicode_lb"); return; } unless (open BABYLON, "$dump_lb |") { abort("Can't find dump_lb demo.", "Error opening pipe."); return; } my $diff = " NOTHING COMPARED."; while() { # remove comments my $lb_line = strip_WS($_); $lb_line =~ s/#.*$//; $lb_line = strip_WS($lb_line); next unless ($lb_line); # get line from dump: my $babylon_line = ; $babylon_line = strip_WS($babylon_line); # compare if ($lb_line ne $babylon_line) { $diff = " UCD : '$lb_line'\n BABYLON: '$babylon_line'"; last; } $diff = ""; } close BABYLON; close UCD; pass() unless ($diff); fail("dump_lb output differs from the file downloaded.", "First difference:\n$diff") if ($diff); } sub test_eawidth { my $unicode_ea = "EastAsianWidth.txt"; system ("if [ ! -f $unicode_ea ] ; then wget $ftp_path$unicode_ea ; fi"); unless (open UCD, $unicode_ea) { abort("Can't open $unicode_ea", "Error opening file $unicode_ea"); return; } unless (open BABYLON, "$dump_ea |") { abort("Can't find dump_ea demo.", "Error opening pipe."); return; } my %eawidth; while() { if (/([A-F0-9]+)\.\.([A-F0-9]+);([A-Za-z]+)/) { for (my $i = hex($1); $i <= hex($2); $i++) { $eawidth{$i} = $3; } next; } if (/^([A-F0-9]+);([A-Za-z]+)/) { $eawidth{hex($1)} = $3; next; } } while() { chop; if (/^([A-F0-9]+);([A-Za-z]+)/) { $eawidth{hex($1)} .= ":$2"; } } close BABYLON; close UCD; my $diff_str = " UCD BABYLON\n --------------------\n"; my $diff = 0; # compute diff: foreach my $item (sort numerically keys %eawidth) { if ($eawidth{$item} =~ /([A-Za-z]+):([A-Za-z]+)/) { $diff = 1 if ($1 ne $2); $diff_str .= sprintf " %8X\n", $item if ($1 eq ""); $diff_str .= sprintf " %8X\n", $item if ($2 eq ""); } } pass() unless ($diff); fail("dump_ea output differs from the file downloaded.", "First difference:\n$diff_str") if ($diff); } sub test_compexclude { my $unicode_compexclude = "CompositionExclusions.txt"; system ("if [ ! -f $unicode_compexclude ] ; then wget $ftp_path$unicode_compexclude ; fi"); unless (open UCD, $unicode_compexclude) { abort("Can't open $unicode_compexclude", "Error opening file $unicode_compexclude"); return; } unless (open BABYLON, "$dump_compexclude |") { abort("Can't find dump_compexclude demo.", "Error opening pipe."); return; } my %exclude; while() { if (/^(\# )?([A-F0-9]+)\.\.([A-F0-9]+)\s/) { for (my $i = hex($2); $i <= hex($3); $i++) { $exclude{$i} = "UCD" unless ($1); $exclude{$i} = "*UCD" if ($1); } next; } if (/^(\# )?([A-F0-9]+)\s/) { $exclude{hex($2)} = "UCD" unless ($1); $exclude{hex($2)} = "*UCD" if ($1); next; } } while() { chop; if (/^(\# )?([A-F0-9]+)/) { $exclude{hex($2)} .= "BABYLON" unless ($1); $exclude{hex($2)} .= "BABYLON*" if ($1); } } close BABYLON; close UCD; my $diff_str = " UCD BABYLON\n --------------------\n"; my $diff = 0; # compute diff: foreach my $item (sort numerically keys %exclude) { if ($exclude{$item} eq "UCD") { $diff_str .= sprintf " %8X\n", $item; $diff = 1; next; } if ($exclude{$item} eq "*UCD") { $diff_str .= sprintf " #%8X\n", $item; $diff = 1; next; } if ($exclude{$item} eq "BABYLON") { $diff_str .= sprintf " %8X\n", $item; $diff = 1; next; } if ($exclude{$item} eq "BABYLON*") { $diff_str .= sprintf " #%8X\n", $item; $diff = 1; next; } } pass() unless ($diff); fail("dump_compexclude output differs from the file downloaded.", "Difference:\n$diff_str") if ($diff); } sub properties_compare { my $prop = shift; my $data = shift; my $results = shift; unless (open BABYLON, "$dump_props $prop |") { abort("Can't find dump_props demo.", "Error opening pipe."); return; } while () { chop; $data->{hex($_)} .= "BABYLON" if (/^([A-F0-9]+)$/); } close BABYLON; # compute diff: my $diff_str = " $prop:\n"; foreach my $item (sort numerically keys %$data) { if ($data->{$item} eq $prop) { $diff_str .= sprintf " %8X\n", $item; next; } if ($data->{$item} eq "BABYLON") { $diff_str .= sprintf " %8X\n", $item; next; } # $diff_str .= sprintf " %8X--%8X\n", $item, $item; } $results->{$prop} = $diff_str; } sub properties_tester { my $unicode_props = shift; system ("if [ ! -f $unicode_props ] ; then wget $ftp_path$unicode_props ; fi"); unless (open UCD, $unicode_props) { abort("Can't open $unicode_props", "Error opening file $unicode_props"); return; } my %prop; my %results; my $current_prop = ""; while() { chop: if (/^([A-F0-9]+)\.\.([A-F0-9]+)\s*; ([A-Za-z_]+)/) { if ($current_prop && $3 ne $current_prop) { properties_compare($current_prop, \%prop, \%results); %prop = (); } $current_prop = $3; for (my $i = hex($1); $i <= hex($2); $i++) { $prop{$i} = $current_prop; } next; } if (/^([A-F0-9]+)\s*; ([A-Za-z_]+)/) { if ($current_prop && $2 ne $current_prop) { properties_compare($current_prop, \%prop, \%results); %prop = (); } $current_prop = $2; $prop{hex($1)} = $current_prop; next; } } close UCD; my $diff = 0; my $diff_str = " UCD BABYLON\n --------------------\n"; # calculate results: foreach my $item (sort keys %results) { if ($results{$item} eq " $item:\n") { $diff_str .= " $item: no difference.\n"; next; } $diff = 1; $diff_str .= $results{$item}; if ($results{$item} eq $item) { $diff_str .= sprintf " %8X\n", $item; next; } if ($results{$item} eq "BABYLON") { $diff_str .= sprintf " %8X\n", $item; next; } } pass() unless ($diff); fail("dump_props output differs from the file downloaded.", "Difference:\n$diff_str") if ($diff); } sub test_properties { properties_tester("PropList.txt"); } sub test_coreproperties { properties_tester("DerivedCoreProperties.txt"); } sub test_normalize { my $unicode_normalize = "NormalizationTest.txt"; system ("if [ ! -f $unicode_normalize ] ; then wget $ftp_path$unicode_normalize ; fi"); unless (open UCD, $unicode_normalize) { abort("Can't open $unicode_ea", "Error opening file $unicode_ea"); return; } my $pid; unless ($pid = open2( \*Reader, \*Writer, "$normalize")) { abort("Can't open pipes to $normalize", "Error opening pipes."); return; } my $diff = ""; my $nok = 0; my $total = 0; while() { my $orig = $_; $orig =~ s/# \(.*\) (.*)\n$/# $1/; s/\s*#.*$//; next unless (/^[A-F0-9 ]+;[A-F0-9 ]+;[A-F0-9 ]+;[A-F0-9 ]+;[A-F0-9 ]+;/); $total++; my @c = split /;/, $_, 6; print Writer "n$c[0]\n"; my @c1 = split /;/, , 6; print Writer "c$c[1]\n"; my @c2 = split /;/, , 6; print Writer "d$c[2]\n"; my @c3 = split /;/, , 6; print Writer "kc$c[3]\n"; my @c4 = split /;/, , 6; print Writer "kd$c[4]\n"; my @c5 = split /;/, , 6; if ( # NFD ($c[2] ne $c1[2]) || ($c[2] ne $c2[2]) || ($c[2] ne $c3[2]) || ($c[4] ne $c4[2]) || ($c[4] ne $c5[2]) ) { $nok++; $diff .= " $orig (NFD wrong)\n"; $diff .= " c3>$c[2]< == D(c1)>$c1[2]< == D(c2)>$c2[2]< == D(c3)>$c3[2]<\n"; $diff .= " c5>$c[4]< == D(c4)>$c4[2]< == D(c5)>$c5[2]<\n"; } elsif ( # NFKC ($c[3] ne $c1[3]) || ($c[3] ne $c2[3]) || ($c[3] ne $c3[3]) || ($c[3] ne $c4[3]) || ($c[3] ne $c5[3]) ) { $nok++; $diff .= " $orig (NFKC wrong)\n"; $diff .= " c4>$c[3]< == KC(c1)>$c1[3]< == KC(c2)>$c2[3]< == KC(c3)>$c3[3]< == KC(c4)>$c4[3]< == KC(c5)>$c5[3]<\n"; } elsif ( # NFC ($c[1] ne $c1[1]) || ($c[1] ne $c2[1]) || ($c[1] ne $c3[1]) || ($c[3] ne $c4[1]) || ($c[3] ne $c5[1]) ) { $nok++; $diff .= " $orig (NFC wrong)\n"; $diff .= " c2>$c[1]< == C(c1)>$c1[1]< == C(c2)>$c2[1]< == C(c3)>$c3[1]<\n"; $diff .= " c4>$c[3]< == C(c4)>$c4[1]< == C(c5)>$c5[1]<\n"; } elsif ( # NFKC ($c[3] ne $c1[3]) || ($c[3] ne $c2[3]) || ($c[3] ne $c3[3]) || ($c[3] ne $c4[3]) || ($c[3] ne $c5[3]) ) { $nok++; $diff .= " $orig (NFKC wrong)\n"; $diff .= " c4>$c[3]< == KC(c1)>$c1[3]< == KC(c2)>$c2[3]< == KC(c3)>$c3[3]< == KC(c4)>$c4[3]< == KC(c5)>$c5[3]<\n"; } if ($diff) { print "-- $orig\n"; print $diff; exit 11; } } close *Reader; close *Writer; close UCD; pass() unless ($diff); fail("dump_ea output differs from the file downloaded.", "First difference:\n$diff\n Total: $total, not OK: $nok\n") if ($diff); } # -------------------- sub run_test { my $run_this = shift; foreach $valid (@registered_tests) { next if ($valid ne $run_this); $running_test = $run_this; &$running_test(); } abort("Invalid testname given!", "$run_this is not a valid test.") unless ($running_test); $running_test = ""; } my $command = shift @ARGV; die("No comamnd given.\n") if (not $command); if ($command eq "list") { print join "\n", @registered_tests; print "\n"; } if ($command eq "run") { if (not scalar(@ARGV)) { foreach my $test (@registered_tests) { run_test($test); } } else { while(my $param = shift @ARGV) { run_test($param); } } } exit 0; Index: Makefile.in =================================================================== RCS file: /cvs/fresco/Fresco/Babylon/test/Makefile.in,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- Makefile.in 6 Apr 2003 16:15:45 -0000 1.5 +++ Makefile.in 1 Aug 2003 16:47:16 -0000 1.6 @@ -34,7 +34,7 @@ # All tests ending in .cc will be build, the rest will be run on # "make run-tests". -TESTS := +TESTS := ucd.pl CXX_TESTS := $(filter %.cc, $(TESTS)) HDR := @@ -46,6 +46,7 @@ TARGETS := $(patsubst %.cc, ./%, $(CXX_TESTS)) SCRIPTS := $(filter-out %.cc, $(TESTS)) SCRIPT_SOURCES := $(patsubst %, $(srcdir)/%, $(SCRIPTS)) +SCRIPT_TARGETS := $(patsubst %, ./%, $(SCRIPTS)) MANUAL := $(docdir)/html vpath %.hh $(hdir) @@ -58,15 +59,16 @@ all: ifdef FRESCO_TEST_CONFIG -build-tests: $(TARGETS) +build-tests: $(TARGETS) $(SCRIPTS) else build-tests: @echo "you need 'Fresco-Test' in order to build the tests" endif ifdef FRESCO_TEST_CONFIG -run-tests: build-tests $(SCRIPT_SOURCES) - LD_LIBRARY_PATH=`$(FRESCO_TEST_CONFIG) --lib-dir`:$(LD_LIBRARY_PATH) `$(FRESCO_TEST_CONFIG) --test-runner` $(TARGETS) $(SCRIPT_SOURCES) +run-tests: build-tests + @echo Running tests, please wait... + LD_LIBRARY_PATH=`$(FRESCO_TEST_CONFIG) --lib-dir`:$(top_builddir)/lib:$(DESTDIR)$(libdir):$(LD_LIBRARY_PATH) `$(FRESCO_TEST_CONFIG) --test-runner` $(TARGETS) $(SCRIPT_TARGETS) else run-tests: @echo "you need 'Fresco-Test' in order to build the tests" @@ -76,6 +78,17 @@ @echo linking $(@F) $(CXX) `$(FRESCO_TEST_CONFIG) --libs` -o $@ $< +$(SCRIPTS): + @echo Running sed on scripts... + for THIS_SCRIPT in $(SCRIPTS) ; do \ + cat $(srcdir)/$$THIS_SCRIPT | \ + sed -e "s#@MAKE_BINDIR@#$(bindir)#g" | \ + sed -e "s#@MAKE_SRCDIR@#$(srcdir)#g" | \ + sed -e "s#@MAKE_TOP_BUILDDIR@#$(abs_top_builddir)#g" \ + > $$THIS_SCRIPT ; \ + chmod 755 $$THIS_SCRIPT ; \ + done + depend: $(DEP) doc: # not yet @@ -87,8 +100,8 @@ uninstall: #not yet clean: - rm -f $(TARGETS) - rm -f $(OBJ) $(DEP) $(SYN) $(HARNESS) + rm -f $(TARGETS) $(SCRIPT_TARGETS) + rm -f $(OBJ) $(DEP) $(SYN) $(HARNESS) test.log distclean: clean From tobias at fresco.org Fri Aug 1 16:47:49 2003 From: tobias at fresco.org (Tobias Hunger) Date: Fri Feb 25 22:16:26 2005 Subject: [Fresco-changes] Fresco/Babylon/utils UnicodePluginGenerator.pl,1.6,1.7 Message-ID: Update of /cvs/fresco/Fresco/Babylon/utils In directory purcel:/tmp/cvs-serv24256/Babylon/utils Modified Files: UnicodePluginGenerator.pl Log Message: Babylon work over: * Use iconv to convert characters to/from Babylon * Updtae to Unicode Version 4.0 * Added unit tests (some fail for now, still need to work on that) * Removed the VisualTextBuffer from the CommandKit: I still like the idea, unfortunately it won't work out in the real world:-| * Updated code to the changed Babylon interfaces whereever needed. Index: UnicodePluginGenerator.pl =================================================================== RCS file: /cvs/fresco/Fresco/Babylon/utils/UnicodePluginGenerator.pl,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- UnicodePluginGenerator.pl 31 May 2002 23:42:14 -0000 1.6 +++ UnicodePluginGenerator.pl 1 Aug 2003 16:47:16 -0000 1.7 @@ -1,9 +1,10 @@ #!/usr/bin/perl -w use Carp; use strict; -use UnicodePluginGenerator qw( Defined Category CombClass Bidir DecompClass DecompString DecDigitVal - DigitVal NumericVal Mirror Upper Lower Title Linebreak EAWidth Compositions - Block Prop); +use UnicodePluginGenerator qw( Defined Category CombClass Bidir DecompClass + DecompString DecDigitVal DigitVal NumericVal + Mirror Upper Lower Title Linebreak EAWidth + Compositions CompExclude Block Prop); my $UCD_File = "UnicodeData.txt"; my $Block_File = "Blocks.txt"; @@ -11,11 +12,20 @@ my $LB_File = "LineBreak.txt"; my $Exclude_File = "CompositionExclusions.txt"; my $Prop_File = "PropList.txt"; -my $Prefix = "./blocks/"; +my $Prefix = "./modules/"; # make directory if it doesnt exist system ("if [ ! -d $Prefix ] ; then mkdir $Prefix ; fi"); +my $ftp_path = "ftp://ftp.unicode.org/Public/UNIDATA/"; + +# Get files if necessary from ftp.unicode.org +foreach my $file ( $Block_File, $UCD_File, $EA_File, $LB_File, + $Exclude_File, $Prop_File) +{ + system ("if [ ! -f $file ] ; then wget $ftp_path$file ; fi"); +} + ############################################################################ print "Reading data...\n"; @@ -41,7 +51,8 @@ # reading data from the files... print " ...compositions\n"; -my $COMP = UnicodePluginGenerator::Compositions->new($UCD_File, $Exclude_File); print " ...props\n"; +my $COMP = UnicodePluginGenerator::Compositions->new($UCD_File); print " ...composition excludes\n"; +my $EXCL = UnicodePluginGenerator::CompExclude->new($Exclude_File); print " ...props\n"; my $PROPS = UnicodePluginGenerator::Props->new($Prop_File); print " ...categories\n"; my $CAT = UnicodePluginGenerator::Category->new($UCD_File); print " ...defines\n"; my $DEF = UnicodePluginGenerator::Defined->new($UCD_File); print " ...combining classes\n"; @@ -59,6 +70,8 @@ my $LB = UnicodePluginGenerator::Linebreak->new($LB_File); print " ...EA width properties\n"; my $EA = UnicodePluginGenerator::EAWidth->new($EA_File); +my @MODULES = ( $DEF, $UPPER, $LOWER, $TITLE, $DDVAL, $DVAL, $NVAL, $CAT, $CCLASS, $BIDIR, $DCLASS, $DSTR, $MIRROR, $LB, $EA, $COMP, $EXCL, $PROPS ); + print "Creating plugins...\n"; # ########################################################################## @@ -84,8 +97,8 @@ /*\$Id$filename * * This source file is a part of the Berlin Project - * Copyright (C) 1999 Tobias Hunger - * http://www.berlin-consortium.org + * Copyright (C) 1999-2003 Tobias Hunger + * http://www.fresco.org * * It was automatically created from the files available at * ftp.unicode.org on $date. @@ -107,39 +120,42 @@ */ #include -#include +#include #include #include END - foreach my $obj ( $DEF, $UPPER, $LOWER, $TITLE, $DDVAL, $DVAL, $NVAL, $CAT, $CCLASS, $BIDIR, $DCLASS, $DSTR, $MIRROR, $LB, $EA, $COMP, $PROPS ) { + foreach my $obj ( @MODULES ) { print PLUGIN $obj->include($start, $end); } print PLUGIN <init($start, $end); } print PLUGIN <function($start, $end, $classname); } @@ -176,16 +196,17 @@ # ######################################################################## print PLUGIN <var_def($start, $end); } @@ -194,11 +215,11 @@ # ######################################################################## print PLUGIN <var($start, $end, $classname); } @@ -207,9 +228,10 @@ # ######################################################################## print PLUGIN < Update of /cvs/fresco/Fresco/Babylon/src In directory purcel:/tmp/cvs-serv24256/Babylon/src Modified Files: Char.cc Dictionary.cc Makefile.in String.cc traits.cc Added Files: Boundaries.cc exceptions.cc Removed Files: vis_iterator.cc Log Message: Babylon work over: * Use iconv to convert characters to/from Babylon * Updtae to Unicode Version 4.0 * Added unit tests (some fail for now, still need to work on that) * Removed the VisualTextBuffer from the CommandKit: I still like the idea, unfortunately it won't work out in the real world:-| * Updated code to the changed Babylon interfaces whereever needed. --- NEW FILE: Boundaries.cc --- /*$Id: Boundaries.cc,v 1.1 2003/08/01 16:47:15 tobias Exp $ * * This source file is a part of the Berlin Project. * Copyright (C) 1999-2003 Tobias Hunger * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, * MA 02139, USA. */ #include #include bool Babylon::is_graphem_cluster_boundary(const Babylon::Char & f, const Babylon::Char & s) { // Break after Start_Of_Text and before End_Of_Text if (UC_START_OF_TEXT == f.value() || UC_END_OF_TEXT == s.value()) return 1; // Don't break between CR and LF if (UC_CARRIAGE_RETURN == f.value() && UC_LINE_FEED == s.value()) return 0; // Break before/after (Control | CR | LF) if (f.is_Control() || s.is_Control()) return 1; // Do not break Hangul syllable sequences. if ((f.is_hangul_L() && (s.is_hangul_L() || s.is_hangul_V() || s.is_hangul_LV() || s.is_hangul_LVT())) || ((f.is_hangul_LV() || f.is_hangul_V()) && (s.is_hangul_V() || s.is_hangul_T())) || ((f.is_hangul_LVT() || f.is_hangul_T()) && s.is_hangul_T())) return 0; // Don't break before extending characters. if (s.is_Grapheme_Extend()) return 0; // Otherwise break everywhere. return 1; } bool Babylon::is_word_boundary(const Babylon::Char & f, const Babylon::Char & s) { if (UC_START_OF_TEXT == f.value() || UC_END_OF_TEXT == s.value()) return 1; return 0; } bool Babylon::is_sentense_boundary(const Babylon::Char & f, const Babylon::Char & s) { if (UC_START_OF_TEXT == f.value() || UC_END_OF_TEXT == s.value()) return 1; return 0; } --- NEW FILE: exceptions.cc --- /*$Id: exceptions.cc,v 1.1 2003/08/01 16:47:15 tobias Exp $ * * This source file is a part of the Fresco Project. * Copyright (C) 1999-2003 Tobias Hunger * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, * MA 02139, USA. */ #include #include #include const char * Babylon::Undefined_Property::what() const throw() { std::ostringstream res; res << std::setw(4) << std::setfill('0') << std::hex; switch (my_error_prop) { case PROP_CHARACTER: res << "(" << my_error_uc << " Character is undefined"; break; case PROP_UNICODE_VALUE: res << "(" << my_error_uc << ") Character has no unicode value.. how did this happen?"; break; case PROP_GEN_CAT: res << "(" << my_error_uc << ") Character has no general category... how did this happen?"; break; case PROP_CHAR_DECOMP: res << "(" << my_error_uc << ") Character has no decomposition"; break; case PROP_COMB_CLASS : res << "(" << my_error_uc << ") Character has no canonical combining class."; break; case PROP_BIDIR_PROPS: res << "(" << my_error_uc << ") Character has no bidir property."; break; case PROP_DEC_DIGIT_VALUE: res << "(" << my_error_uc << ") Character has no decimal digit value."; break; case PROP_DIGIT_VALUE: res << "(" << my_error_uc << ") Character has no digit value."; break; case PROP_NUMERIC_VALUE: res << "(" << my_error_uc << ") Character has no numeric value."; break; case PROP_IS_MIRRORED: res << "(" << my_error_uc << ") Mirroring property missing... how did this happen?"; break; case PROP_UPPER_EQUIV: res << "(" << my_error_uc << ") Uppercase equivalent missing... how did this happen?"; break; case PROP_LOWER_EQUIV: res << "(" << my_error_uc << ") Lowercase equivalent missing... how did this happen?"; break; case PROP_TITLE_EQUIV: res << "(" << my_error_uc << ") Titlecase equivalent missing... how did this happen?"; break; case PROP_SCRIPT: res << "(" << my_error_uc << ") Character belongs to no script... how did this happen?"; break; case PROP_EA_WIDTH: res << "(" << my_error_uc << ") EA width property missing... how did this happen?"; break; case PROP_LINE_BREAKING: res << "(" << my_error_uc << ") linebreak property missing... how did this happen?"; break; case PROP_MAX: res << "(" << my_error_uc << ") PROP_MAX throw... how did this happen?"; break; } return res.str().c_str(); } const char * Babylon::Transfer_Error::what() const throw() { switch (my_error) { case TRANS_ICONV_FAILED: return("iconv failed to transform some characters."); case TRANS_CAN_NOT_ENCODE: return("Can not encode from Babylon to foreign format."); default: return("Can not decode from foreign format to Babylon."); } } const char * Babylon::Block_Error::what() const throw() { std::ostringstream res; res << std::hex << std::setw(4) << std::setfill('0'); res << "(" << my_block_start << "-" << my_block_end << "): " << my_error_message; return res.str().c_str(); } Index: Char.cc =================================================================== RCS file: /cvs/fresco/Fresco/Babylon/src/Char.cc,v retrieving revision 1.8 retrieving revision 1.9 diff -u -d -r1.8 -r1.9 --- Char.cc 18 Nov 2002 16:19:27 -0000 1.8 +++ Char.cc 1 Aug 2003 16:47:15 -0000 1.9 @@ -1,9 +1,9 @@ /* *$Id$ * - * This source file is a part of the Berlin Project. - * Copyright (C) 1999,2000 Tobias Hunger - * http://www.berlin-consortium.org + * This source file is a part of the Fresco Project. + * Copyright (C) 1999-2003 Tobias Hunger + * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -21,228 +21,646 @@ * MA 02139, USA. */ -#include #include -#include - -// Conversion: -Babylon::UTF8_string Babylon::Char::utf8() const throw (Trans_Error) { - Prague::Trace trace("Babylon::Char::utf8()"); - unsigned int chars_needed; - - UCS4 c = m_value; - UTF8_string res; +#include +#include +#include - if (c <= 0x0000007F) chars_needed = 0; - else if (c <= 0x000007FF) chars_needed = 1; - else if (c <= 0x0000FFFF) chars_needed = 2; - else if (c <= 0x001FFFFF) chars_needed = 3; - else if (c <= 0x03FFFFFF) chars_needed = 4; - else if (c <= 0x7FFFFFFF) chars_needed = 5; - else throw Trans_Error(TRANS_CAN_NOT_ENCODE); +#include - for (unsigned int i = chars_needed; i > 0; --i) { - UCS1 t = 0x80; - t |= UCS1(c & 0x3f); - c = c >> 6; - res = t + res; - } +#include - if ( !chars_needed ) { - res = UCS1(c & 0x7F); - } - else { - UCS1 t = 0xFE << (6 - chars_needed); - t |= UCS1(c & 0xFF); - res = t + res; - } - return res; +extern "C" +{ +#include } -Babylon::UTF16_string Babylon::Char::utf16() const throw (Trans_Error) { - Prague::Trace trace("Babylon::Char::utf16()"); - UTF16_string res; - UCS4 c = m_value; - if (c > 0x0010FFFF) - throw Trans_Error(TRANS_CAN_NOT_ENCODE); - if (c < 0x00010000) - res += UCS2(c); - else { - c -= 0x00010000; - UCS2 h = 0xD800; - UCS2 l = 0xDC00; - res += (h | (c >> 10)); - res += (l | (c & 0x3FF)); - } +// Conversion: - return res; -} +std::string Babylon::Char::convert(const std::string format) const + throw (Transfer_Error) +{ + Prague::Trace trace("Babylon::Char::convert(OUT OF Babylon)"); -Babylon::UTF32_string Babylon::Char::utf32() const throw (Trans_Error) { - Prague::Trace trace("Babylon::Char::utf32()"); - if (m_value > 0x10FFFF) - throw Trans_Error(TRANS_CAN_NOT_ENCODE); - UTF32_string res;//(m_value, Babylon::NORM_NONE); - return res += m_value; -} + // prepare input buffer + size_t in_bound(sizeof(UCS4)); -Babylon::UTF8_string::const_iterator -Babylon::Char::utf8(const Babylon::UTF8_string & s, - Babylon::UTF8_string::const_iterator it) - throw (Trans_Error) { - Prague::Trace trace("Babylon::Char::utf8(...)"); - - // rfc2279.txt: The trasfromation of UCS2 to UCS1 should be: - // UCS2 ---> UCS4 ---> UCS1, so surrogates of UCS2 are removed + // prepare output buffer + // 16Byte should be more then enough to encode two characters;-) + std::auto_ptr out_buf(new char[16]); + size_t out_bound(16); - UCS4 c = 0; - unsigned int chars_needed; + iconv_wrapper(Babylon::internal_format, + reinterpret_cast(&my_value), in_bound, + format, out_buf.get(), out_bound); - if ((*it & 0x80) == 0) chars_needed = 0; // *s_it == 0xxx xxxx - else if ((*it & 0x40) == 0) // *s_it == 10xx xxxx, should only - // happen after a character - // starting with 11xx xxxx - throw Trans_Error(TRANS_CAN_NOT_DECODE); - else if ((*it & 0x20) == 0) chars_needed = 1; // *s_it == 110x xxxx - else if ((*it & 0x10) == 0) chars_needed = 2; // *s_it == 1110 xxxx - else if ((*it & 0x08) == 0) chars_needed = 3; // *s_it == 1111 0xxx - else if ((*it & 0x04) == 0) chars_needed = 4; // *s_it == 1111 10xx - else if ((*it & 0x02) == 0) chars_needed = 5; // *s_it == 1111 110x - else throw Trans_Error(TRANS_CAN_NOT_DECODE); // *s_it == 1111 111x, - // should not happen in - // a sequence of UTF8-Characters + // put result into a string + std::string result(out_buf.get(), 16 - out_bound); + + return result; +} - if ( !chars_needed ) { - c = UCS4(*it); +size_t Babylon::Char::convert(const std::string & s, + const size_t pos, + const std::string format) + throw (Transfer_Error) +{ + Prague::Trace trace("Babylon::Char::convert(INTO babylon)"); + + // check wether start position is valid. + if (pos >= s.length()) + { + my_value = Babylon::UC_NULL; + return(std::string::npos); } - else { - c = (*it) & (0x3F >> chars_needed); - for (int i = 1; i <= chars_needed; ++i) { - if ( (++it == s.end()) || ((*it & 0xc0) != 0x80) ) - // either we are at the end of the UTF8-sequence or the current - // character is not 10xx xxxx. - throw Trans_Error(TRANS_CAN_NOT_DECODE); - c = c << 6; - c |= UCS4(*it & 0x3F); - } + // prepare input buffer + size_t in_bound(s.length() - pos); + + // prepare output buffer + UCS4 c; + size_t out_bound(sizeof(UCS4)); - // Now we check the range of the value decodifed, to avoid problems - // of seccurity (Ex: C0 80 is the NULL char). - switch (chars_needed) { - case 1: - if (c < 0x80) throw Trans_Error(TRANS_CAN_NOT_DECODE); - break; - case 2: - if (c < 0x800) throw Trans_Error(TRANS_CAN_NOT_DECODE); - break; - case 3: - if (c < 0x10000) throw Trans_Error(TRANS_CAN_NOT_DECODE); - break; - case 4: - if (c < 0x200000) throw Trans_Error(TRANS_CAN_NOT_DECODE); - break; - case 5: - if (c < 0x4000000) throw Trans_Error(TRANS_CAN_NOT_DECODE); - break; - default: - throw Trans_Error(TRANS_CAN_NOT_DECODE); - } - } - m_value = UCS4(c); - return ++it; -} + iconv_wrapper(format, s.data() + pos, in_bound, + Babylon::internal_format, + reinterpret_cast(&c), out_bound); -Babylon::UTF16_string::const_iterator -Babylon::Char::utf16(const Babylon::UTF16_string & s, - Babylon::UTF16_string::const_iterator it) - throw (Trans_Error) { - Prague::Trace trace("Babylon::Char::utf16(...)"); - UCS4 c = *it; - if (c >= 0xD800 && c <= 0xDFFF) { - // we found part of a surrogate pair... - if (c >= 0xDC00) - // it was a low surrogate... - throw Trans_Error(TRANS_CAN_NOT_DECODE); - ++it; - if (it == s.end() || *it < 0xDC00 || *it > 0xDFFF) - // didn't find a corresponding low surrogate... - throw Trans_Error(TRANS_CAN_NOT_DECODE); - c = (((c & 0x3FF) << 10) | (*it & 0x3FF)) + 0x10000; - } - m_value = c; - return ++it; + my_value = c; + return (0 == in_bound) ? std::string::npos : s.length() - in_bound; } -Babylon::UTF32_string::const_iterator -Babylon::Char::utf32(const Babylon::UTF32_string & s, - Babylon::UTF32_string::const_iterator it) - throw (Trans_Error) { - Prague::Trace trace("Babylon::Char::utf32(...)"); - if (*it > 0x10FFFF) - throw Trans_Error(TRANS_CAN_NOT_ENCODE); - m_value = *it; - return ++it; -} +void Babylon::Char::iconv_wrapper(const std::string & in_format, + const char * in_buf, + size_t & in_buf_size, + const std::string & out_format, + char * out_buf, + size_t & out_buf_size) const +{ + // Setup iconv + iconv_t cd = iconv_open(out_format.c_str(), in_format.c_str()); -bool Babylon::Char::is_Alphabetic() const throw (Block_Error) { - Prague::Trace trace("Babylon::Char::is_Alphabetic()"); - Gen_Cat cat = Dictionary::instance()->category(m_value); - return (cat == CAT_Ll || - cat == CAT_Lu || - cat == CAT_Lt || - cat == CAT_Lm || - cat == CAT_Lo || - Dictionary::instance()->is_Other_Alphabetic(m_value)); -} + if (iconv_t(-1) == cd) + throw Trans_Error(Babylon::TRANS_ICONV_FAILED); -bool Babylon::Char::is_ID_Start() const throw (Block_Error) { - Prague::Trace trace("Babylon::Char::is_ID_Start()"); - Gen_Cat cat = Dictionary::instance()->category(m_value); - return (cat == CAT_Ll || - cat == CAT_Lu || - cat == CAT_Lt || - cat == CAT_Lm || - cat == CAT_Lo || - cat == CAT_Nl); -} + // convert + size_t retval = iconv(cd, + const_cast(&in_buf), &in_buf_size, + &out_buf, &out_buf_size); -bool Babylon::Char::is_ID_Continue() const throw (Block_Error) { - Prague::Trace trace("Babylon::Char::is_ID_Continue()"); - Gen_Cat cat = Dictionary::instance()->category(m_value); - return (cat == CAT_Ll || - cat == CAT_Lu || - cat == CAT_Lt || - cat == CAT_Lm || - cat == CAT_Lo || - cat == CAT_Nl || - cat == CAT_Mn || - cat == CAT_Mc || - cat == CAT_Nd || - cat == CAT_Pc); + if (-1 == retval) + { + if (E2BIG != errno) throw Trans_Error(Babylon::TRANS_ICONV_FAILED); + } + + // clean up iconv + iconv_close(cd); } // TRANSFORMATIONS: void Babylon::Char::to_lower() - throw (Block_Error) { + throw (Block_Error) +{ Prague::Trace trace("Babylon::Char::to_lower()"); *this = this->lowercase(); } // to_lowercase void Babylon::Char::to_upper() - throw (Block_Error) { + throw (Block_Error) +{ Prague::Trace trace("Babylon::Char::to_upper()"); + *this = this->uppercase(); } // to_uppercase void Babylon::Char::to_title() - throw (Block_Error) { + throw (Block_Error) +{ Prague::Trace trace("Babylon::Char::to_title()"); + *this = this->titlecase(); } // to_titlecase Babylon::String Babylon::Char::decompose() const - throw (Undefined_Property, Block_Error) { - Prague::Trace trace("Babylon::Char::decompose()"); - return String(Dictionary::instance()->decompose(m_value)); + throw (Undefined_Property, Block_Error) +{ + String r; + get_from_UTF32(Dictionary::instance()->decompose(my_value), r); + return r; } // decompose + +bool Babylon::Char::exclude_from_composition() const + throw (Block_Error) +{ + return Dictionary::instance()->exclude_from_composition(my_value); +} + +Babylon::Char Babylon::Char::uppercase() const throw (Block_Error) +{ + return Dictionary::instance()->uppercase(my_value); +} + +Babylon::Char Babylon::Char::lowercase() const throw (Block_Error) +{ + return Dictionary::instance()->lowercase(my_value); +} + +Babylon::Char Babylon::Char::titlecase() const throw (Block_Error) +{ + return Dictionary::instance()->titlecase(my_value); +} + +float Babylon::Char::numeric_value() const + throw (Undefined_Property, Block_Error) +{ + return Dictionary::instance()->numeric_value(my_value); +} + +int Babylon::Char::digit_value() const + throw (Undefined_Property, Block_Error) +{ + return Dictionary::instance()->digit_value(my_value); +} + +int Babylon::Char::dec_digit_value() const + throw (Undefined_Property, Block_Error) +{ + return Dictionary::instance()->dec_digit_value(my_value); +} + +bool Babylon::Char::is_Digit() const throw (Block_Error) +{ + return Dictionary::instance()->is_Digit(my_value); +} + +bool Babylon::Char::is_Numeric() const throw (Block_Error) +{ + return Dictionary::instance()->is_Numeric(my_value); +} + +bool Babylon::Char::is_Decimal_Digit() const throw (Block_Error) +{ + return Dictionary::instance()->is_Decimal_Digit(my_value); +} + +std::string Babylon::Char::blockname() const throw (Block_Error) +{ + return Dictionary::instance()->blockname(my_value); +} + +Babylon::Gen_Cat Babylon::Char::category() const + throw (Undefined_Property, Block_Error) +{ + return Dictionary::instance()->category(my_value); +} + +Babylon::Bidir_Props Babylon::Char::direction() const + throw (Undefined_Property, Block_Error) +{ + return Dictionary::instance()->bidir_props(my_value); +} + +Babylon::Can_Comb_Class Babylon::Char::comb_class() const + throw (Undefined_Property, Block_Error) +{ + return Dictionary::instance()->comb_class(my_value); +} + +Babylon::Char_Decomp Babylon::Char::decomp_type() const + throw (Undefined_Property, Block_Error) +{ + return Dictionary::instance()->decomp_type(my_value); +} + +bool Babylon::Char::must_mirror() const + throw (Undefined_Property, Block_Error) +{ + return Dictionary::instance()->must_mirror(my_value); +} + +Babylon::EA_Width Babylon::Char::EA_width() const + throw (Undefined_Property, Block_Error) +{ + return Dictionary::instance()->EA_width(my_value); +} + +Babylon::Line_Break Babylon::Char::linebreak() const + throw (Undefined_Property, Block_Error) +{ + return Dictionary::instance()->linebreak(my_value); +} + +bool Babylon::Char::is_defined() const + throw (Block_Error) +{ + return Dictionary::instance()->is_defined(my_value); +} + +// Properties +bool Babylon::Char::is_White_Space() const throw (Block_Error) +{ + return Dictionary::instance()->is_White_Space(my_value); +} + +bool Babylon::Char::is_Bidi_Control() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_Control(my_value); +} + +bool Babylon::Char::is_Join_Control() const throw (Block_Error) +{ + return Dictionary::instance()->is_Join_Control(my_value); +} + +bool Babylon::Char::is_Dash() const throw (Block_Error) +{ + return Dictionary::instance()->is_Dash(my_value); +} + +bool Babylon::Char::is_Hyphen() const throw (Block_Error) +{ + return Dictionary::instance()->is_Hyphen(my_value); +} + +bool Babylon::Char::is_Quotation_Mark() const throw (Block_Error) +{ + return Dictionary::instance()->is_Quotation_Mark(my_value); +} + +bool Babylon::Char::is_Terminal_Punctuation() const throw (Block_Error) +{ + return Dictionary::instance()->is_Terminal_Punctuation(my_value); +} + +bool Babylon::Char::is_Other_Math() const throw (Block_Error) +{ + return Dictionary::instance()->is_Other_Math(my_value); +} + +bool Babylon::Char::is_Hex_Digit() const throw (Block_Error) +{ + return Dictionary::instance()->is_Hex_Digit(my_value); +} + +bool Babylon::Char::is_ASCII_Hex_Digit() const throw (Block_Error) +{ + return Dictionary::instance()->is_ASCII_Hex_Digit(my_value); +} + +bool Babylon::Char::is_Other_Alphabetic() const throw (Block_Error) +{ + return Dictionary::instance()->is_Other_Alphabetic(my_value); +} + +bool Babylon::Char::is_Ideographic() const throw (Block_Error) +{ + return Dictionary::instance()->is_Ideographic(my_value); +} + +bool Babylon::Char::is_Diacritic() const throw (Block_Error) +{ + return Dictionary::instance()->is_Diacritic(my_value); +} + +bool Babylon::Char::is_Extender() const throw (Block_Error) +{ + return Dictionary::instance()->is_Extender(my_value); +} + +bool Babylon::Char::is_Other_Lowercase() const throw (Block_Error) +{ + return Dictionary::instance()->is_Other_Lowercase(my_value); +} + +bool Babylon::Char::is_Other_Uppercase() const throw (Block_Error) +{ + return Dictionary::instance()->is_Other_Uppercase(my_value); +} + +bool Babylon::Char::is_Noncharacter_Code_Point() const throw (Block_Error) +{ + return Dictionary::instance()->is_Noncharacter_Code_Point(my_value); +} + +bool Babylon::Char::is_Other_Grapheme_Extend() const throw (Block_Error) +{ + return Dictionary::instance()->is_Other_Grapheme_Extend(my_value); +} + +bool Babylon::Char::is_Grapheme_Link() const throw (Block_Error) +{ + return Dictionary::instance()->is_Grapheme_Link(my_value); +} + +bool Babylon::Char::is_IDS_Binary_Operator() const throw (Block_Error) +{ + return Dictionary::instance()->is_IDS_Binary_Operator(my_value); +} + +bool Babylon::Char::is_IDS_Trinary_Operator() const throw (Block_Error) +{ + return Dictionary::instance()->is_IDS_Trinary_Operator(my_value); +} + +bool Babylon::Char::is_Radical() const throw (Block_Error) +{ + return Dictionary::instance()->is_Radical(my_value); +} + +bool Babylon::Char::is_Unified_Ideograph() const throw (Block_Error) +{ + return Dictionary::instance()->is_Unified_Ideograph(my_value); +} + +bool Babylon::Char::is_Other_Default_Ignorable_Code_Point() const + throw (Block_Error) +{ + return Dictionary::instance()-> + is_Other_Default_Ignorable_Code_Point(my_value); +} + +bool Babylon::Char::is_Deprecated() const throw (Block_Error) +{ + return Dictionary::instance()->is_Deprecated(my_value); +} + +bool Babylon::Char::is_Soft_Dotted() const throw (Block_Error) +{ + return Dictionary::instance()->is_Soft_Dotted(my_value); +} + +bool Babylon::Char::is_Logical_Order_Exception() const throw (Block_Error) +{ + return Dictionary::instance()->is_Logical_Order_Exception(my_value); +} + +bool Babylon::Char::is_Other_ID_Start() const throw (Block_Error) +{ + return Dictionary::instance()->is_Other_ID_Start(my_value); +} + +// Derived Properties +bool Babylon::Char::is_Math() const throw (Block_Error) +{ + return Dictionary::instance()->is_Math(my_value); +} + +bool Babylon::Char::is_Alphabetic() const throw (Block_Error) +{ + return Dictionary::instance()->is_Alphabetic(my_value); +} + +bool Babylon::Char::is_Lowercase() const throw (Block_Error) +{ + return Dictionary::instance()->is_Lowercase(my_value); +} + +bool Babylon::Char::is_Uppercase() const throw (Block_Error) +{ + return Dictionary::instance()->is_Uppercase(my_value); +} + +bool Babylon::Char::is_ID_Start() const throw (Block_Error) +{ + return Dictionary::instance()->is_ID_Start(my_value); +} + +bool Babylon::Char::is_ID_Continue() const throw (Block_Error) +{ + return Dictionary::instance()->is_ID_Continue(my_value); +} + +bool Babylon::Char::is_XID_Start() const throw (Block_Error) +{ + return Dictionary::instance()->is_XID_Start(my_value); +} + +bool Babylon::Char::is_XID_Continue() const throw (Block_Error) +{ + return Dictionary::instance()->is_XID_Continue(my_value); +} + +bool Babylon::Char::is_Default_Ignorable_Code_Point() const + throw (Block_Error) +{ + return Dictionary::instance()->is_Default_Ignorable_Code_Point(my_value); +} + +bool Babylon::Char::is_Grapheme_Extend() const throw (Block_Error) +{ + return Dictionary::instance()->is_Grapheme_Extend(my_value); +} + +bool Babylon::Char::is_Grapheme_Base() const throw (Block_Error) +{ + return Dictionary::instance()->is_Grapheme_Base(my_value); +} + +bool Babylon::Char::is_FC_NFKC_Closure() const throw (Block_Error) +{ + return Dictionary::instance()->is_FC_NFKC_Closure(my_value); +} + +bool Babylon::Char::is_Full_Composition_Exclusion() const throw (Block_Error) +{ + return Dictionary::instance()->is_Full_Composition_Exclusion(my_value); +} + +bool Babylon::Char::is_NFD_QuickCheck() const throw (Block_Error) +{ + return Dictionary::instance()->is_NFD_QuickCheck(my_value); +} + +bool Babylon::Char::is_NFC_QuickCheck() const throw (Block_Error) +{ + return Dictionary::instance()->is_NFC_QuickCheck(my_value); +} + +bool Babylon::Char::is_NFKD_QuickCheck() const throw (Block_Error) +{ + return Dictionary::instance()->is_NFKD_QuickCheck(my_value); +} + +bool Babylon::Char::is_NFKC_QuickCheck() const throw (Block_Error) +{ + return Dictionary::instance()->is_NFKC_QuickCheck(my_value); +} + +bool Babylon::Char::is_Expands_On_NFD() const throw (Block_Error) +{ + return Dictionary::instance()->is_Expands_On_NFD(my_value); +} + +bool Babylon::Char::is_Expands_On_NFC() const throw (Block_Error) +{ + return Dictionary::instance()->is_Expands_On_NFC(my_value); +} + +bool Babylon::Char::is_Expands_On_NFKD() const throw (Block_Error) +{ + return Dictionary::instance()->is_Expands_On_NFKD(my_value); +} + +bool Babylon::Char::is_Expands_On_NFKC() const throw (Block_Error) +{ + return Dictionary::instance()->is_Expands_On_NFKC(my_value); +} + +// Further Properties +bool Babylon::Char::is_Space() const throw (Block_Error) +{ + return Dictionary::instance()->is_Space(my_value); +} + +bool Babylon::Char::is_Punctuation() const throw (Block_Error) +{ + return Dictionary::instance()->is_Punctuation(my_value); +} + +bool Babylon::Char::is_Line_Separator() const throw (Block_Error) +{ + return Dictionary::instance()->is_Line_Separator(my_value); +} + +bool Babylon::Char::is_Paragraph_Separator() const throw (Block_Error) +{ + return Dictionary::instance()->is_Paragraph_Separator(my_value); +} + +bool Babylon::Char::is_Currency_Symbol() const throw (Block_Error) +{ + return Dictionary::instance()->is_Currency_Symbol(my_value); +} + +bool Babylon::Char::is_Bidi_Left_to_Right() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_Left_to_Right(my_value); +} + +bool Babylon::Char::is_Bidi_European_Digit() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_European_Digit(my_value); +} + +bool Babylon::Char::is_Bidi_Eur_Num_Separator() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_Eur_Num_Separator(my_value); +} + +bool Babylon::Char::is_Bidi_Eur_Num_Terminator() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_Eur_Num_Terminator(my_value); +} + +bool Babylon::Char::is_Bidi_Arabic_Digit() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_Arabic_Digit(my_value); +} + +bool Babylon::Char::is_Bidi_Common_Separator() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_Common_Separator(my_value); +} + +bool Babylon::Char::is_Bidi_Block_Separator() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_Block_Separator(my_value); +} + +bool Babylon::Char::is_Bidi_Segment_Separator() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_Segment_Separator(my_value); +} + +bool Babylon::Char::is_Bidi_Whitespace() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_Whitespace(my_value); +} + +bool Babylon::Char::is_Bidi_Non_spacing_Mark() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_Non_spacing_Mark(my_value); +} + +bool Babylon::Char::is_Bidi_Boundary_Neutral() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_Boundary_Neutral(my_value); +} + +bool Babylon::Char::is_Bidi_PDF() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_PDF(my_value); +} + +bool Babylon::Char::is_Bidi_Embedding_or_Override() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_Embedding_or_Override(my_value); +} + +bool Babylon::Char::is_Bidi_Other_Neutral() const throw (Block_Error) +{ + return Dictionary::instance()->is_Bidi_Other_Neutral(my_value); +} + +bool Babylon::Char::is_Virama() const throw (Block_Error) +{ + return Dictionary::instance()->is_Virama(my_value); +} + +bool Babylon::Char::is_Printable() const throw (Block_Error) +{ + return Dictionary::instance()->is_Printable(my_value); +} + +bool Babylon::Char::is_Titlecase() const throw (Block_Error) +{ + return Dictionary::instance()->is_Titlecase(my_value); +} + +bool Babylon::Char::is_Private_Use() const throw (Block_Error) +{ + return Dictionary::instance()->is_Private_Use(my_value); +} + +bool Babylon::Char::is_Control() const throw (Block_Error) +{ + Gen_Cat c = Dictionary::instance()->category(my_value); + return (CAT_Zl == c || CAT_Zp == c || CAT_Cc == c || CAT_Cf == c); +} + + +bool Babylon::Char::is_hangul_L() const throw (Block_Error) +{ + if ((0x1100 <= my_value && 0x1159 >= my_value) || + 0x115f == my_value) return 1; + return 0; +} + +bool Babylon::Char::is_hangul_V() const throw (Block_Error) +{ + if (0x1160 <= my_value && 0x11A2 >= my_value) return 1; + return 0; +} + +bool Babylon::Char::is_hangul_T() const throw (Block_Error) +{ + if (0x11A8 <= my_value && 0x11F9 >= my_value) return 1; + return 0; +} + +bool Babylon::Char::is_hangul_LV() const throw (Block_Error) +{ + if (!(0xAC00 <= my_value && 0xD788 >= my_value)) return 0; + return (Dictionary::instance()->decompose(my_value).length() == 2); +} + +bool Babylon::Char::is_hangul_LVT() const throw (Block_Error) +{ + if (!(0xAC01 <= my_value && 0xD7A3 >= my_value)) return 0; + return (Dictionary::instance()->decompose(my_value).length() == 3); +} + +Babylon::Char::Char(const std::string & s, size_t & pos, + const std::string format) +{ + pos = convert(s, pos, format); +} Index: Dictionary.cc =================================================================== RCS file: /cvs/fresco/Fresco/Babylon/src/Dictionary.cc,v retrieving revision 1.15 retrieving revision 1.16 diff -u -d -r1.15 -r1.16 --- Dictionary.cc 29 Mar 2003 01:51:43 -0000 1.15 +++ Dictionary.cc 1 Aug 2003 16:47:15 -0000 1.16 @@ -1,8 +1,8 @@ /*$Id$ * * This source file is a part of the Berlin Project. - * Copyright (C) 1999,2000 Tobias Hunger - * http://www.berlin-consortium.org + * Copyright (C) 1999-2003 Tobias Hunger + * http://www.fresco.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -20,438 +20,722 @@ [...1345 lines suppressed...] + i != my_data.end(); + ++i) + if (i->my_block) + delete i->my_block;