首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >编码问题...windows-1255到utf 8?

编码问题...windows-1255到utf 8?
EN

Stack Overflow用户
提问于 2013-03-24 07:46:38
回答 4查看 3.6K关注 0票数 1

从windows-1255到utf-8的编码转换在我知道之前就被问过了,但我仍然得到了不同的结果,我无法解决它。

第一个问题是“php iconv()或mb_convert_encoding()是否支持windows-1255?”在测试时,它返回几个输出(使用//ignore & //translate),但它根本不能很好地工作。

我正在查看mb_list_encodings()输出,它不包括window-1255...使用windows-1255输入(从网上抓取)播放和测试mb_detect_encoding()不会返回正确的字符集...

EN

回答 4

Stack Overflow用户

回答已采纳

发布于 2013-03-24 12:59:19

您应该能够使用带有要转换的字符的关联数组的strtr (数据为available from MSDN,并在下面将其转换为PHP数组)。请注意,在此代码中,保留字节值被替换为U+FFFD替换字符("\xef\xbf\xbd")。

代码语言:javascript
复制
function win1255ToUtf8($str) {
    static $tbl = null;
    if (!$tbl) {
        $tbl = array_combine(range("\x80", "\xff"), array(
            "\xe2\x82\xac", "\xef\xbf\xbd", "\xe2\x80\x9a", "\xc6\x92",
            "\xe2\x80\x9e", "\xe2\x80\xa6", "\xe2\x80\xa0", "\xe2\x80\xa1",
            "\xcb\x86", "\xe2\x80\xb0", "\xef\xbf\xbd", "\xe2\x80\xb9",
            "\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd",
            "\xef\xbf\xbd", "\xe2\x80\x98", "\xe2\x80\x99", "\xe2\x80\x9c",
            "\xe2\x80\x9d", "\xe2\x80\xa2", "\xe2\x80\x93", "\xe2\x80\x94",
            "\xcb\x9c", "\xe2\x84\xa2", "\xef\xbf\xbd", "\xe2\x80\xba",
            "\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd",
            "\xc2\xa0", "\xc2\xa1", "\xc2\xa2", "\xc2\xa3", "\xe2\x82\xaa",
            "\xc2\xa5", "\xc2\xa6", "\xc2\xa7", "\xc2\xa8", "\xc2\xa9",
            "\xc3\x97", "\xc2\xab", "\xc2\xac", "\xc2\xad", "\xc2\xae",
            "\xc2\xaf", "\xc2\xb0", "\xc2\xb1", "\xc2\xb2", "\xc2\xb3",
            "\xc2\xb4", "\xc2\xb5", "\xc2\xb6", "\xc2\xb7", "\xc2\xb8",
            "\xc2\xb9", "\xc3\xb7", "\xc2\xbb", "\xc2\xbc", "\xc2\xbd",
            "\xc2\xbe", "\xc2\xbf", "\xd6\xb0", "\xd6\xb1", "\xd6\xb2",
            "\xd6\xb3", "\xd6\xb4", "\xd6\xb5", "\xd6\xb6", "\xd6\xb7",
            "\xd6\xb8", "\xd6\xb9", "\xef\xbf\xbd", "\xd6\xbb", "\xd6\xbc",
            "\xd6\xbd", "\xd6\xbe", "\xd6\xbf", "\xd7\x80", "\xd7\x81",
            "\xd7\x82", "\xd7\x83", "\xd7\xb0", "\xd7\xb1", "\xd7\xb2",
            "\xd7\xb3", "\xd7\xb4", "\xef\xbf\xbd", "\xef\xbf\xbd",
            "\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd",
            "\xef\xbf\xbd", "\xd7\x90", "\xd7\x91", "\xd7\x92", "\xd7\x93",
            "\xd7\x94", "\xd7\x95", "\xd7\x96", "\xd7\x97", "\xd7\x98",
            "\xd7\x99", "\xd7\x9a", "\xd7\x9b", "\xd7\x9c", "\xd7\x9d",
            "\xd7\x9e", "\xd7\x9f", "\xd7\xa0", "\xd7\xa1", "\xd7\xa2",
            "\xd7\xa3", "\xd7\xa4", "\xd7\xa5", "\xd7\xa6", "\xd7\xa7",
            "\xd7\xa8", "\xd7\xa9", "\xd7\xaa", "\xef\xbf\xbd", "\xef\xbf\xbd",
            "\xe2\x80\x8e", "\xe2\x80\x8f", "\xef\xbf\xbd",
        ));
    }
    return strtr($str, $tbl);
}

我用这个PHP脚本生成了上面的代码:

代码语言:javascript
复制
function win1255ToUtf8($str) {
    static $tbl = null;
    if (!$tbl) {
        $tbl = array_combine(range("\x80", "\xff"), array(
            <?php

        function encodeString($str) {
            return '"' . preg_replace('/../', '\x$0', bin2hex($str)) . '"';
        }

        function codepointToUtf8($n) {
            return mb_convert_encoding(pack('V', $n), 'UTF-8', 'UTF-32LE');
        }

        $text = strip_tags( file_get_contents( 'http://msdn.microsoft.com/en-us/goglobal/cc305148.aspx') );
        preg_match_all('/([0-9A-F]{2}) = U\+([0-9A-F]{4})/', $text, $matches, PREG_SET_ORDER);

        $table = array_fill(0, 128, "\xef\xbf\xbd");
        foreach ($matches as $match) {
            $input = hexdec($match[1]) - 128;
            if ($input >= 0) {
                $table[$input] = codepointToUtf8(hexdec($match[2]));
            }
        }

        $buf = '';
        foreach ($table as $from => $to) {
            $buf .= encodeString($to) . ', ';
        }
        echo wordwrap(substr($buf, 0, -1), 68, "\n            "), "\n";

?>
        ));
    }
    return strtr($str, $tbl);
}
票数 3
EN

Stack Overflow用户

发布于 2015-05-19 16:43:01

找到了这个,它对我很有效

代码语言:javascript
复制
$content = iconv("CP1255", "UTF-8", $content);

来源:Expert Exchange

票数 3
EN

Stack Overflow用户

发布于 2013-03-24 08:51:25

代码语言:javascript
复制
function cp1251_to_utf8 ($txt)  {
    $in_arr = array (
        chr(208), chr(192), chr(193), chr(194),
        chr(195), chr(196), chr(197), chr(168),
        chr(198), chr(199), chr(200), chr(201),
        chr(202), chr(203), chr(204), chr(205),
        chr(206), chr(207), chr(209), chr(210),
        chr(211), chr(212), chr(213), chr(214),
        chr(215), chr(216), chr(217), chr(218),
        chr(219), chr(220), chr(221), chr(222),
        chr(223), chr(224), chr(225), chr(226),
        chr(227), chr(228), chr(229), chr(184),
        chr(230), chr(231), chr(232), chr(233),
        chr(234), chr(235), chr(236), chr(237),
        chr(238), chr(239), chr(240), chr(241),
        chr(242), chr(243), chr(244), chr(245),
        chr(246), chr(247), chr(248), chr(249),
        chr(250), chr(251), chr(252), chr(253),
        chr(254), chr(255), chr(170), chr(186), chr(434), chr(435), chr(431), chr(447)
    );   

    $out_arr = array (
        chr(208).chr(160), chr(208).chr(144), chr(208).chr(145),
        chr(208).chr(146), chr(208).chr(147), chr(208).chr(148),
        chr(208).chr(149), chr(208).chr(129), chr(208).chr(150),
        chr(208).chr(151), chr(208).chr(152), chr(208).chr(153),
        chr(208).chr(154), chr(208).chr(155), chr(208).chr(156),
        chr(208).chr(157), chr(208).chr(158), chr(208).chr(159),
        chr(208).chr(161), chr(208).chr(162), chr(208).chr(163),
        chr(208).chr(164), chr(208).chr(165), chr(208).chr(166),
        chr(208).chr(167), chr(208).chr(168), chr(208).chr(169),
        chr(208).chr(170), chr(208).chr(171), chr(208).chr(172),
        chr(208).chr(173), chr(208).chr(174), chr(208).chr(175),
        chr(208).chr(176), chr(208).chr(177), chr(208).chr(178),
        chr(208).chr(179), chr(208).chr(180), chr(208).chr(181),
        chr(209).chr(145), chr(208).chr(182), chr(208).chr(183),
        chr(208).chr(184), chr(208).chr(185), chr(208).chr(186),
        chr(208).chr(187), chr(208).chr(188), chr(208).chr(189),
        chr(208).chr(190), chr(208).chr(191), chr(209).chr(128),
        chr(209).chr(129), chr(209).chr(130), chr(209).chr(131),
        chr(209).chr(132), chr(209).chr(133), chr(209).chr(134),
        chr(209).chr(135), chr(209).chr(136), chr(209).chr(137),
        chr(209).chr(138), chr(209).chr(139), chr(209).chr(140),
        chr(209).chr(141), chr(209).chr(142), chr(209).chr(143),
        chr(209).chr(148), chr(209).chr(404), chr(209).chr(150),
        chr(209).chr(406), chr(209).chr(151), chr(209).chr(407)
    );   

    //$txt = str_replace($in_arr,$out_arr,$txt);
    $txtNew='';
    for($C=0;$C<StrLen($txt);$C++){
    If(In_Array($txt[$C],$in_arr)){
    $txtNew.=$out_arr[Array_Search($txt[$C],$in_arr)];
    }Else{
    $txtNew.=Utf8_Encode($txt[$C]);
    }


    }
    //return $txt;
    return $txtNew;
 }

摘自http://forums.adelavida.com/?Idx=php&Status=1291168368

票数 1
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/15593394

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档