You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
concurrentqueue/benchmarks/dlib/unicode/unicode_abstract.h

234 lines
6.9 KiB
C++

// Copyright (C) 2007 Davis E. King (davis@dlib.net), and Nils Labugt
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_UNICODe_ABSTRACT_H_
#ifdef DLIB_UNICODe_ABSTRACT_H_
#include "../uintn.h"
#include "../error.h"
#include <string>
#include <fstream>
namespace dlib
{
// ----------------------------------------------------------------------------------------
// a typedef for an unsigned 32bit integer to hold our UNICODE characters
typedef uint32 unichar;
// a typedef for a string object to hold our UNICODE strings
typedef std::basic_string<unichar> ustring;
// ----------------------------------------------------------------------------------------
template <typename T>
bool is_combining_char(
const T ch_
);
/*!
ensures
- if (ch_ is a unicode combining character) then
- returns true
- else
- returns false
!*/
bool is_surrogate(
unichar ch
);
/*!
ensures
- if (ch is a unicode surrogate character) then
- returns true
- else
- returns false
!*/
unichar surrogate_pair_to_unichar(
unichar first,
unichar second
);
/*!
requires
- 0xD800 <= first < 0xDC00
- 0xDC00 <= second < 0xE000
- is_surrogate(first) == true
- is_surrogate(second) == true
ensures
- converts two surrogates into one unicode character
!*/
void unichar_to_surrogate_pair(
unichar ch,
unichar& first,
unichar& second
);
/*!
requires
- ch >= 0x10000 (i.e. is not in Basic Multilingual Plane)
ensures
- surrogate_pair_to_unichar(#first,#second) == ch
(i.e. converts ch into two surrogate characters)
!*/
// ----------------------------------------------------------------------------------------
class invalid_utf8_error : public error
{
public:
invalid_utf8_error():error(EUTF8_TO_UTF32) {}
};
const ustring convert_utf8_to_utf32 (
const std::string& str
);
/*!
ensures
- if (str is a valid UTF-8 encoded string) then
- returns a copy of str that has been converted into a
unichar string
- else
- throws invalid_utf8_error
!*/
// ----------------------------------------------------------------------------------------
const ustring convert_wstring_to_utf32 (
const std::wstring &wstr
);
/*!
requires
- wstr is a valid UTF-16 string when sizeof(wchar_t) == 2
- wstr is a valid UTF-32 string when sizeof(wchar_t) == 4
ensures
- converts wstr into UTF-32 string
!*/
// ----------------------------------------------------------------------------------------
const std::wstring convert_utf32_to_wstring (
const ustring &str
);
/*!
requires
- str is a valid UTF-32 encoded string
ensures
- converts str into wstring whose encoding is UTF-16 when sizeof(wchar_t) == 2
- converts str into wstring whose encoding is UTF-32 when sizeof(wchar_t) == 4
!*/
// ----------------------------------------------------------------------------------------
const std::wstring convert_mbstring_to_wstring (
const std::string &str
);
/*!
requires
- str is a valid multibyte string whose encoding is same as current locale setting
ensures
- converts str into wstring whose encoding is UTF-16 when sizeof(wchar_t) == 2
- converts str into wstring whose encoding is UTF-32 when sizeof(wchar_t) == 4
!*/
// ----------------------------------------------------------------------------------------
const std::string convert_wstring_to_mbstring (
const std::wstring &src
);
/*!
requires
- str is a valid wide character string string whose encoding is same as current
locale setting
ensures
- returns a multibyte encoded version of the given string
!*/
// ----------------------------------------------------------------------------------------
template <
typename charT
>
class basic_utf8_ifstream : public std::basic_istream<charT>
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents an input file stream much like the
normal std::ifstream except that it knows how to read UTF-8
data. So when you read characters out of this stream it will
automatically convert them from the UTF-8 multibyte encoding
into a fixed width wide character encoding.
!*/
public:
basic_utf8_ifstream (
);
/*!
ensures
- constructs an input stream that isn't yet associated with
a file.
!*/
basic_utf8_ifstream (
const char* file_name,
std::ios_base::openmode mode = std::ios::in
);
/*!
ensures
- tries to open the given file for reading by this stream
- mode is interpreted exactly the same was as the open mode
argument used by std::ifstream.
!*/
basic_utf8_ifstream (
const std::string& file_name,
std::ios_base::openmode mode = std::ios::in
);
/*!
ensures
- tries to open the given file for reading by this stream
- mode is interpreted exactly the same was as the open mode
argument used by std::ifstream.
!*/
void open(
const std::string& file_name,
std::ios_base::openmode mode = std::ios::in
);
/*!
ensures
- tries to open the given file for reading by this stream
- mode is interpreted exactly the same was as the open mode
argument used by std::ifstream.
!*/
void open (
const char* file_name,
std::ios_base::openmode mode = std::ios::in
);
/*!
ensures
- tries to open the given file for reading by this stream
- mode is interpreted exactly the same was as the open mode
argument used by std::ifstream.
!*/
void close (
);
/*!
ensures
- any file opened by this stream has been closed
!*/
};
typedef basic_utf8_ifstream<unichar> utf8_uifstream;
typedef basic_utf8_ifstream<wchar_t> utf8_wifstream;
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_UNICODe_ABSTRACT_H_