从windows-1255到utf-8的编码转换在我知道之前就被问过了,但我仍然得到了不同的结果,我无法解决它。
第一个问题是“php iconv()或mb_convert_encoding()是否支持windows-1255?”在测试时,它返回几个输出(使用//ignore & //translate),但它根本不能很好地工作。
我正在查看mb_list_encodings()输出,它不包括window-1255...使用windows-1255输入(从网上抓取)播放和测试mb_detect_encoding()不会返回正确的字符集...
发布于 2013-03-24 12:59:19
您应该能够使用带有要转换的字符的关联数组的strtr (数据为available from MSDN,并在下面将其转换为PHP数组)。请注意,在此代码中,保留字节值被替换为U+FFFD替换字符("\xef\xbf\xbd")。
function win1255ToUtf8($str) {
static $tbl = null;
if (!$tbl) {
$tbl = array_combine(range("\x80", "\xff"), array(
"\xe2\x82\xac", "\xef\xbf\xbd", "\xe2\x80\x9a", "\xc6\x92",
"\xe2\x80\x9e", "\xe2\x80\xa6", "\xe2\x80\xa0", "\xe2\x80\xa1",
"\xcb\x86", "\xe2\x80\xb0", "\xef\xbf\xbd", "\xe2\x80\xb9",
"\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd",
"\xef\xbf\xbd", "\xe2\x80\x98", "\xe2\x80\x99", "\xe2\x80\x9c",
"\xe2\x80\x9d", "\xe2\x80\xa2", "\xe2\x80\x93", "\xe2\x80\x94",
"\xcb\x9c", "\xe2\x84\xa2", "\xef\xbf\xbd", "\xe2\x80\xba",
"\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd",
"\xc2\xa0", "\xc2\xa1", "\xc2\xa2", "\xc2\xa3", "\xe2\x82\xaa",
"\xc2\xa5", "\xc2\xa6", "\xc2\xa7", "\xc2\xa8", "\xc2\xa9",
"\xc3\x97", "\xc2\xab", "\xc2\xac", "\xc2\xad", "\xc2\xae",
"\xc2\xaf", "\xc2\xb0", "\xc2\xb1", "\xc2\xb2", "\xc2\xb3",
"\xc2\xb4", "\xc2\xb5", "\xc2\xb6", "\xc2\xb7", "\xc2\xb8",
"\xc2\xb9", "\xc3\xb7", "\xc2\xbb", "\xc2\xbc", "\xc2\xbd",
"\xc2\xbe", "\xc2\xbf", "\xd6\xb0", "\xd6\xb1", "\xd6\xb2",
"\xd6\xb3", "\xd6\xb4", "\xd6\xb5", "\xd6\xb6", "\xd6\xb7",
"\xd6\xb8", "\xd6\xb9", "\xef\xbf\xbd", "\xd6\xbb", "\xd6\xbc",
"\xd6\xbd", "\xd6\xbe", "\xd6\xbf", "\xd7\x80", "\xd7\x81",
"\xd7\x82", "\xd7\x83", "\xd7\xb0", "\xd7\xb1", "\xd7\xb2",
"\xd7\xb3", "\xd7\xb4", "\xef\xbf\xbd", "\xef\xbf\xbd",
"\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd",
"\xef\xbf\xbd", "\xd7\x90", "\xd7\x91", "\xd7\x92", "\xd7\x93",
"\xd7\x94", "\xd7\x95", "\xd7\x96", "\xd7\x97", "\xd7\x98",
"\xd7\x99", "\xd7\x9a", "\xd7\x9b", "\xd7\x9c", "\xd7\x9d",
"\xd7\x9e", "\xd7\x9f", "\xd7\xa0", "\xd7\xa1", "\xd7\xa2",
"\xd7\xa3", "\xd7\xa4", "\xd7\xa5", "\xd7\xa6", "\xd7\xa7",
"\xd7\xa8", "\xd7\xa9", "\xd7\xaa", "\xef\xbf\xbd", "\xef\xbf\xbd",
"\xe2\x80\x8e", "\xe2\x80\x8f", "\xef\xbf\xbd",
));
}
return strtr($str, $tbl);
}我用这个PHP脚本生成了上面的代码:
function win1255ToUtf8($str) {
static $tbl = null;
if (!$tbl) {
$tbl = array_combine(range("\x80", "\xff"), array(
<?php
function encodeString($str) {
return '"' . preg_replace('/../', '\x$0', bin2hex($str)) . '"';
}
function codepointToUtf8($n) {
return mb_convert_encoding(pack('V', $n), 'UTF-8', 'UTF-32LE');
}
$text = strip_tags( file_get_contents( 'http://msdn.microsoft.com/en-us/goglobal/cc305148.aspx') );
preg_match_all('/([0-9A-F]{2}) = U\+([0-9A-F]{4})/', $text, $matches, PREG_SET_ORDER);
$table = array_fill(0, 128, "\xef\xbf\xbd");
foreach ($matches as $match) {
$input = hexdec($match[1]) - 128;
if ($input >= 0) {
$table[$input] = codepointToUtf8(hexdec($match[2]));
}
}
$buf = '';
foreach ($table as $from => $to) {
$buf .= encodeString($to) . ', ';
}
echo wordwrap(substr($buf, 0, -1), 68, "\n "), "\n";
?>
));
}
return strtr($str, $tbl);
}发布于 2015-05-19 16:43:01
找到了这个,它对我很有效
$content = iconv("CP1255", "UTF-8", $content);来源:Expert Exchange
发布于 2013-03-24 08:51:25
function cp1251_to_utf8 ($txt) {
$in_arr = array (
chr(208), chr(192), chr(193), chr(194),
chr(195), chr(196), chr(197), chr(168),
chr(198), chr(199), chr(200), chr(201),
chr(202), chr(203), chr(204), chr(205),
chr(206), chr(207), chr(209), chr(210),
chr(211), chr(212), chr(213), chr(214),
chr(215), chr(216), chr(217), chr(218),
chr(219), chr(220), chr(221), chr(222),
chr(223), chr(224), chr(225), chr(226),
chr(227), chr(228), chr(229), chr(184),
chr(230), chr(231), chr(232), chr(233),
chr(234), chr(235), chr(236), chr(237),
chr(238), chr(239), chr(240), chr(241),
chr(242), chr(243), chr(244), chr(245),
chr(246), chr(247), chr(248), chr(249),
chr(250), chr(251), chr(252), chr(253),
chr(254), chr(255), chr(170), chr(186), chr(434), chr(435), chr(431), chr(447)
);
$out_arr = array (
chr(208).chr(160), chr(208).chr(144), chr(208).chr(145),
chr(208).chr(146), chr(208).chr(147), chr(208).chr(148),
chr(208).chr(149), chr(208).chr(129), chr(208).chr(150),
chr(208).chr(151), chr(208).chr(152), chr(208).chr(153),
chr(208).chr(154), chr(208).chr(155), chr(208).chr(156),
chr(208).chr(157), chr(208).chr(158), chr(208).chr(159),
chr(208).chr(161), chr(208).chr(162), chr(208).chr(163),
chr(208).chr(164), chr(208).chr(165), chr(208).chr(166),
chr(208).chr(167), chr(208).chr(168), chr(208).chr(169),
chr(208).chr(170), chr(208).chr(171), chr(208).chr(172),
chr(208).chr(173), chr(208).chr(174), chr(208).chr(175),
chr(208).chr(176), chr(208).chr(177), chr(208).chr(178),
chr(208).chr(179), chr(208).chr(180), chr(208).chr(181),
chr(209).chr(145), chr(208).chr(182), chr(208).chr(183),
chr(208).chr(184), chr(208).chr(185), chr(208).chr(186),
chr(208).chr(187), chr(208).chr(188), chr(208).chr(189),
chr(208).chr(190), chr(208).chr(191), chr(209).chr(128),
chr(209).chr(129), chr(209).chr(130), chr(209).chr(131),
chr(209).chr(132), chr(209).chr(133), chr(209).chr(134),
chr(209).chr(135), chr(209).chr(136), chr(209).chr(137),
chr(209).chr(138), chr(209).chr(139), chr(209).chr(140),
chr(209).chr(141), chr(209).chr(142), chr(209).chr(143),
chr(209).chr(148), chr(209).chr(404), chr(209).chr(150),
chr(209).chr(406), chr(209).chr(151), chr(209).chr(407)
);
//$txt = str_replace($in_arr,$out_arr,$txt);
$txtNew='';
for($C=0;$C<StrLen($txt);$C++){
If(In_Array($txt[$C],$in_arr)){
$txtNew.=$out_arr[Array_Search($txt[$C],$in_arr)];
}Else{
$txtNew.=Utf8_Encode($txt[$C]);
}
}
//return $txt;
return $txtNew;
}摘自http://forums.adelavida.com/?Idx=php&Status=1291168368
https://stackoverflow.com/questions/15593394
复制相似问题