- 帖子
- 212
- 主题
- 189
- 精华
- 0
- 积分
- 1452
- 贡献
- 1134
- 激情
- 1349
- 阅读权限
- 100
- 最后登录
- 2018-6-13
|
[PHP 语言] 分享PHP自建函数 UTF-8和Unicode编码互转(多语言)
分享PHP自建函数 UTF-8和Unicode编码互转(多语言): J6 u: d2 e6 J
- <?/**
- * //将内容进行UNICODE编码
- * utf-8 转unicode
- *
- * @param string $name
- * @return string
- */
- function utf8_unicode($name){
- $name = iconv('UTF-8', 'UCS-2', $name);
- $len = strlen($name);
- $str = '';
- for ($i = 0; $i < $len - 1; $i = $i + 2){
- $c = $name[$i];
- $c2 = $name[$i + 1];
- if (ord($c) > 0){ //两个字节的文字
- $str .= '\u'.base_convert(ord($c), 10, 16).str_pad(base_convert(ord($c2), 10, 16), 2, 0, STR_PAD_LEFT);
- //$str .= base_convert(ord($c), 10, 16).str_pad(base_convert(ord($c2), 10, 16), 2, 0, STR_PAD_LEFT);
- } else {
- $str .= '\u'.str_pad(base_convert(ord($c2), 10, 16), 4, 0, STR_PAD_LEFT);
- //$str .= str_pad(base_convert(ord($c2), 10, 16), 4, 0, STR_PAD_LEFT);
- }
- }
- $str = strtoupper($str);//转换为大写
- return $str;
- }
-
- /**
- * unicode 转 utf-8
- *
- * @param string $name
- * @return string
- */
- function unicode_decodessss($name)
- {
- $name = strtolower($name);
- // 转换编码,将Unicode编码转换成可以浏览的utf-8编码
- $pattern = '/([\w]+)|(\\\u([\w]{4}))/i';
- preg_match_all($pattern, $name, $matches);
- if (!empty($matches))
- {
- $name = '';
- for ($j = 0; $j < count($matches[0]); $j++)
- {
- $str = $matches[0][$j];
- if (strpos($str, '\\u') === 0)
- {
- $code = base_convert(substr($str, 2, 2), 16, 10);
- $code2 = base_convert(substr($str, 4), 16, 10);
- $c = chr($code).chr($code2);
- $c = iconv('UCS-2', 'UTF-8', $c);
- $name .= $c;
- }
- else
- {
- $name .= $str;
- }
- }
- }
- return $name;
- } ?>
复制代码 % Q! S/ }% _& j9 y& ]8 u
7 g/ j5 f n: Q+ B) K; P6 E5 [" w& [
4 g8 N/ Q- A' H% T4 Q$ c! P
调用及结果:, B' L' q, |" i: E. G( r, I- P/ R# M2 o
; L, i- S4 B& x5 ~- $utf8_str = '我';
- //这是汉字“你”的Unicode编码$unicode_str = '\u4f60';
- //输出 6211echo utf8_unicode($utf8_str) . "<br/>";
- //输出汉字“你”echo unicode_decodes($unicode_str);
复制代码 / z3 j/ _$ _9 T. A0 Z1 ~$ k) Z
5 ?) z; s% o, r f u- k
; X6 B& u3 k4 e
1 I- r- w" K: \ u% B! j注: 由于浏览器默认会解读,所以要看源代码 4 r5 n+ q1 I6 q% U$ K
& N: W) N( i5 _( T0 g+ g3 B
" g% M5 _- f, }
3 F3 R: y2 h) d+ {5 t3 ~0 x% k0 X3 ?# {
其它功能类似的函数, 经测试OK的:$ L; U2 {4 M' o* u% b; }% h' A7 V$ \# v
: e8 I7 n2 Q, H, l$ z* L) @' l3 S; h4 S6 |$ R! C9 J9 W
- <?
- // utf8 - unicode this OK
- function utf8_unicode($c) {
- switch(strlen($c)) {
- case 1:
- return ord($c);
- case 2:
- $n = (ord($c[0]) & 0x3f) << 6;
- $n += ord($c[1]) & 0x3f;
- return $n;
- case 3:
- $n = (ord($c[0]) & 0x1f) << 12;
- $n += (ord($c[1]) & 0x3f) << 6;
- $n += ord($c[2]) & 0x3f;
- return $n;
- case 4:
- $n = (ord($c[0]) & 0x0f) << 18;
- $n += (ord($c[1]) & 0x3f) << 12;
- $n += (ord($c[2]) & 0x3f) << 6;
- $n += ord($c[3]) & 0x3f;
- return $n;
- }
- }
- echo "utf8_unicode正常结果为中 实际为:".utf8_unicode('中')."\r\n";
- ?>
复制代码 . U" P& A! `' t9 K# m% o
0 U1 V4 G' {% S( X( W; g4 H5 O2 _: Y/ A4 C7 N
- I, C" S/ b' E+ J$ d i1 t; k输出ASCII形式. ; G( ~2 J( t# \! B9 B8 t$ B5 v
- <?
- function enode($utf8_str){
- $len = strlen($utf8_str);
- $a = 0;
- while ($a < $len){
- $ud = 0;
- if (ord($utf8_str{$a}) >=0 && ord($utf8_str{$a})<=127)
- {
- $ud = ord($utf8_str{$a});
- $a += 1;
- }
- else if (ord($utf8_str{$a}) >=192 && ord($utf8_str{$a})<=223)
- {
- $ud = (ord($utf8_str{$a})-192)*64 + (ord($utf8_str{$a+1})-128);
- $a += 2;
- }
- else if (ord($utf8_str{$a}) >=224 && ord($utf8_str{$a})<=239)
- {
- $ud = (ord($utf8_str{$a})-224)*4096 + (ord($utf8_str{$a+1})-128)*64 + (ord($utf8_str{$a+2})-128);
- $a += 3;
- }
- else if (ord($utf8_str{$a}) >=240 && ord($utf8_str{$a})<=247)
- {
- $ud = (ord($utf8_str{$a})-240)*262144 + (ord($utf8_str{$a+1})-128)*4096 + (ord($utf8_str{$a+2})-128)*64 + (ord($utf8_str{$a+3})-128);
- $a += 4;
- }
- else if (ord($c{$a}) >=248 && ord($c{$a})<=251)
- {
- $ud = (ord($utf8_str{$a})-248)*16777216 + (ord($utf8_str{$a+1})-128)*262144 + (ord($utf8_str{$a+2})-128)*4096 + (ord($utf8_str{$a+3})-128)*64 + (ord($utf8_str{$a+4})-128);
- $a += 5;
- }
- else if (ord($utf8_str{$a}) >=252 && ord($utf8_str{$a})<=253)
- {
- $ud = (ord($utf8_str{$a})-252)*1073741824 + (ord($utf8_str{$a+1})-128)*16777216 + (ord($utf8_str{$a+2})-128)*262144 + (ord($utf8_str{$a+3})-128)*4096 + (ord($utf8_str{$a+4})-128)*64 + (ord($utf8_str{$a+5})-128);
- $a += 6;
- }
- else if (ord($utf8_str{$a}) >=254 && ord($utf8_str{$a})<=255)
- { //error
- $ud = false;
- }
- @$scill .="&#$ud;";
- }
- return $scill;
- }
- echo "正常结果为台 实际为:".enode('台')."<br>\r\n"; // 台 浏览器默认会解读,所以要看源代码echo "正常结果为风 实际为:".enode('风')."<br>\r\n";?>
复制代码 : R1 Z3 Y$ d7 U5 T( y
) z: t: Q+ \) P( \" K, O |+ {* ^
' S9 j4 \) l; u. Y结果为:& N/ k$ w F% z% \7 ^/ |
- 正常结果为台 实际为:台<br> 正常结果为风 实际为:风<br>
复制代码
0 q8 ]( V" W C$ f4 b2 m5 ?9 s4 x# \" p$ ~' | N4 s
1 T7 r! Q1 d4 s" f2 R; q2 [0 f; A. _9 T
- /**
- * utf8字符转换成Unicode字符
- * @param [type] $utf8_str Utf-8字符
- * @return [type] Unicode字符
- */
- function utf8_str_to_unicode($utf8_str) {
- $unicode = 0;
- $unicode = (ord($utf8_str[0]) & 0x1F) << 12;
- $unicode |= (ord($utf8_str[1]) & 0x3F) << 6;
- $unicode |= (ord($utf8_str[2]) & 0x3F);
- return dechex($unicode);
- }
- echo utf8_str_to_unicode("你");
复制代码
. x* }( u& o+ ]" k: j; I% z: v9 j
7 v4 b, u/ U5 c* E2 s. ]; D" X# \- { I9 x" g8 v$ p
这个为只输入Unicode的数字编码
) M2 Y1 Q8 W! o( n6 ^/ I k/ Q& e5 G4 u$ G) w
201979 o2 _7 W; K( b. V( K; _2 K4 E. Q
' `/ ?+ Y/ \! o3 ~" b& y, }; \5 v8 L" L
参考:
j' @# H5 [6 O5 \! b% c0 W! @8 Thttp://bbs.tianya.cn/post-itinfo-13681-1.shtml
/ j% j9 Z. r5 c5 y' M- Ghttp://www.thinksaas.cn/group/topic/115335/6 P" b$ [7 P" a1 W7 x. e- b
http://www.jb51.net/article/14831.htm |
|