首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >如何在unicode16中将字符串从十进制代码转换为西里尔字母/ C++?

如何在unicode16中将字符串从十进制代码转换为西里尔字母/ C++?
EN

Stack Overflow用户
提问于 2021-02-03 21:54:33
回答 1查看 184关注 0票数 0

在我的c++程序中,我有一个“十进制代码字符串”,我想用unicode字符在一个文件中打印它。

  • 这是十进制代码字符串:квартира
  • 这是西里尔解码的字符串:Квартира
  • 这是我的密码:
代码语言:javascript
复制
#include "stdafx.h"
#include<iostream>
#include<algorithm>
#include<cmath>
#include<cstdio>

using namespace std;

int _tmain(int argc, _TCHAR* argv[])
{
// string I have:
string decimal_code_string = "&#1082;&#1074;&#1072;&#1088;&#1090;&#1080;&#1088;&#1072;";

FILE *stream;
fopen_s( &stream, "C:\\Users\\Luze\\Desktop\\myFile.txt", "w+, ccs=UTF-8" );
//printf("--> %s", decimal_code_string.c_str()); 
// string I would like to write: "Квартира"
fwprintf(stream, L"%hs", decimal_code_string.c_str());
fclose(stream);

return 0;
}

编辑

我尝试过@MarkRansom解决方案,但它似乎不起作用:

代码语言:javascript
复制
#include "stdafx.h"
#include<iostream>
#include<algorithm>
#include<cmath>
#include<cstdio>

using namespace std;

std::string wchar_to_UTF8(const wchar_t * in)
{
    std::string out;
    unsigned int codepoint = 0;
    for (in;  *in != 0;  ++in)
    {
        if (*in >= 0xd800 && *in <= 0xdbff)
            codepoint = ((*in - 0xd800) << 10) + 0x10000;
        else
        {
            if (*in >= 0xdc00 && *in <= 0xdfff)
                codepoint |= *in - 0xdc00;
            else
                codepoint = *in;

            if (codepoint <= 0x7f)
                out.append(1, static_cast<char>(codepoint));
            else if (codepoint <= 0x7ff)
            {
                out.append(1, static_cast<char>(0xc0 | ((codepoint >> 6) & 0x1f)));
                out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
            }
            else if (codepoint <= 0xffff)
            {
                out.append(1, static_cast<char>(0xe0 | ((codepoint >> 12) & 0x0f)));
                out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
                out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
            }
            else
            {
                out.append(1, static_cast<char>(0xf0 | ((codepoint >> 18) & 0x07)));
                out.append(1, static_cast<char>(0x80 | ((codepoint >> 12) & 0x3f)));
                out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
                out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
            }
            codepoint = 0;
        }
    }
    return out;
}

std::wstring UTF8_to_wchar(const char * in)
{
    std::wstring out;
    unsigned int codepoint;
    while (*in != 0)
    {
        unsigned char ch = static_cast<unsigned char>(*in);
        if (ch <= 0x7f)
            codepoint = ch;
        else if (ch <= 0xbf)
            codepoint = (codepoint << 6) | (ch & 0x3f);
        else if (ch <= 0xdf)
            codepoint = ch & 0x1f;
        else if (ch <= 0xef)
            codepoint = ch & 0x0f;
        else
            codepoint = ch & 0x07;
        ++in;
        if (((*in & 0xc0) != 0x80) && (codepoint <= 0x10ffff))
        {
            if (sizeof(wchar_t) > 2)
                out.append(1, static_cast<wchar_t>(codepoint));
            else if (codepoint > 0xffff)
            {
                out.append(1, static_cast<wchar_t>(0xd800 + (codepoint >> 10)));
                out.append(1, static_cast<wchar_t>(0xdc00 + (codepoint & 0x03ff)));
            }
            else if (codepoint < 0xd800 || codepoint >= 0xe000)
                out.append(1, static_cast<wchar_t>(codepoint));
        }
    }
    return out;
}

int _tmain(int argc, _TCHAR* argv[])
{
string decimal_code_string = "&#1082;&#1074;&#1072;&#1088;&#1090;&#1080;&#1088;&#1072;";
printf("decimal_code: %s\n", decimal_code_string.c_str());
 
// string I would like to write: "Квартира"

wstring temp_string = wstring(decimal_code_string.begin(), decimal_code_string.end());
const wchar_t* result = temp_string.c_str();
string utf8_string=  wchar_to_UTF8( result );
printf("wchar_to_UTF8: %s\n", utf8_string);

const char* resultcp = decimal_code_string.c_str();
wstring wide_string = UTF8_to_wchar( resultcp );
printf("UTF8_to_wchar: %s\n", wide_string);
 
cout << "End\n";
return 0;
}
EN

回答 1

Stack Overflow用户

发布于 2022-01-09 15:44:00

只需提取每个字符内的数字实体:这正是该字符的Unicode/UTF32 32代码。

一旦您提取了该字符串,您就可以使用任何UTF8 32到-UTF8 8字符串转换算法,例如实验性的StringSuite C++20库。

票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/66036280

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档