PHP切割汉字的常用方法实例总结
发布:smiling 来源: PHP粉丝网 添加日期:2021-11-20 11:47:59 浏览: 评论:0
这篇文章主要介绍了PHP切割汉字的常用方法,结合实例形式总结分析了php针对汉字的编码转换、遍历、截取等相关操作技巧,需要的朋友可以参考下。
本文实例讲述了PHP切割汉字的常用方法,分享给大家供大家参考,具体如下:
- <?php
- /*
- @UTF-8编码的字符可能由1~3个字节组成。
- */
- /*--------------------------方法一截取中文字符串方法------------------------------*/
- function msubstr($str, $start, $len)
- {
- $tmpstr = "";
- $strlen = $start + $len;
- for ($i = 0; $i < $strlen; $i++) {
- if (ord(substr($str, $i, 1)) > 0xa0) //ord()函数返回字符串的第一个字符的ASCII值
- {
- $tmpstr .= substr($str, $i, 2);
- $i++;
- } else {
- $tmpstr .= substr($str, $i, 1);
- }
- }
- return $tmpstr;
- }
- /*----------------------------第二种方法-----------------------------------*/
- //截取的是UTF-8字符串
- function utf_substr($str, $len)
- {
- $new_str = [];
- for ($i = 0; $i < $len; $i++) {
- $tem_str = substr($str, 0, 1);
- if (ord($tem_str > 127)) {
- $i++;
- if ($i < $len) {
- $new_str[] = substr($str, 0, 3);
- $str = substr($str, 3);
- }
- } else {
- $new_str[] = substr($str, 0, 1);
- $str = substr($str, 1);
- }
- }
- return join($new_str);//join()函数把数组元素组合为一个字符串
- }
- /*-------------------------------------第三种方法(UTF-8)--------------------------------*/
- function cutstr($string, $length)
- {
- preg_match_all("/[\x01-\x7f]|[\xc2-\xdf]|[\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/", $string, $info);
- $wordscut = "";
- $j = 0;
- for ($i = 0; $i < count($info[0]); $i++) {
- $wordscut .= $info[0][$i];
- $j = ord($info[0][$i]) > 127 ? $j + 2 : $j + 1;
- if ($j > $length - 3) {
- return $wordscut . "...";
- }
- }
- return join('', $info[0]);
- }
- $string = "312哈哈,这个组合很难切割哦";
- echo cutstr($string, 10);
- /*---------------------------------下面是曾经用过的截取第三个的字符串的------------------------------*/
- // $name1 = mysql_result($my_rst,0,"name");
- // $name = preg_match("/([1-9][0-9]+)/",$name1,$r);
- // $name = $r[0];
- // if($name == ""){
- // $name=preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,2}'.
- // '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,1}).*#s',
- // '$1',$name1);
- // }
- /*--------------------------------------------第四种方法(UTF-8)---------------------------------------------*/
- function cut_str($sourcestr, $cutlength)
- {
- $returnstr = '';
- $i = 0;
- $n = 0;
- $str_length = strlen($sourcestr);//字符串的字节数
- while ($n < $cutlength && $i <= $str_length) {
- $temp_str = substr($sourcestr, $i, 1);
- $ascnum = ord($temp_str);//得到字符串中第$i位字符的ASCII码
- if ($ascnum >= 224) {
- $returnstr = $returnstr . substr($sourcestr, $i, 3);//根据UTF-8编码规范,将3个连续的字符计为单个字符
- $i = $i + 3;//实际Byte记为3
- $n++;//字串长度为1
- } elseif ($ascnum >= 192)//如果ASCII位高于192
- {
- $returnstr = $returnstr . substr($sourcestr, $i, 2);//根据UTF-8编码规范,将2个连续的字符记为单个字符
- $i = $i + 2;//实际Byte记为2
- $n++;//字串长度为1
- } elseif ($ascnum >= 65 && $ascnum <= 90)//如果是大写字母
- {
- $returnstr = $returnstr . substr($sourcestr, $i, 1);
- $i = $i + 1;//byte记为1
- $n++;//但考虑到整体美观,大写字母计成一个高位字符
- } else {
- $returnstr = $returnstr . substr($sourcestr, $i, 1);
- $i = $i + 1;//实际的Byte记为1
- $n = $n + 0.5;//小写字母和半角标点等与半个高位字符宽...
- }
- }
- if ($str_length > $cutlength) {
- $returnstr = $returnstr . "...";//超过长度时在尾处加上省略号
- }
- return $returnstr;
- }
- /*--------------------第五种方法(UTF-8)---------------------------------------------*/
- function FSubstr($title, $start, $len = "", $magic = true)
- {
- if ($len == "") $len = strlen($title);
- if ($start != 0) {
- $startv = ord(substr($title, $start, 1));
- if ($startv >= 128) {
- if ($startv < 192) {
- for ($i = $start - 1; $i > 0; $i--) {
- $tempv = ord(substr($title, $i, 1));
- if ($tempv >= 192) break;
- }
- $start = $i;
- }
- }
- }
- if (strlen($title) <= $len) return substr($title, $start, $len);
- $alen = 0;
- $blen = 0;
- $realnum = 0;
- $length = 0;
- for ($i = $start; $i < strlen($title); $i++) {
- $ctype = 0;
- $cstep = 0;
- $cur = substr($title, $i, 1);
- if ($cur == "&") {
- if (substr($title, $i, 4) == "<") {
- $cstep = 4;
- $length += 4;
- $i += 3;
- $realnum++;
- if ($magic) {
- $alen++;
- }
- } elseif (substr($title, $i, 4) == ">") {
- $cstep = 4;
- $length += 4;
- $i += 3;
- $realnum++;
- if ($magic) {
- $alen++;
- }
- } elseif (substr($title, $i, 5) == "&") {
- $cstep = 5;
- $length += 5;
- $i += 4;
- $realnum++;
- if ($magic) {
- $alen++;
- }
- } elseif (substr($title, $i, 6) == """) {
- $cstep = 6;
- $length += 6;
- $i += 5;
- $realnum++;
- if ($magic) {
- $alen++;
- }
- } elseif (preg_match("/&#(\d+);?/i", substr($title, $i), $match)) {
- $cstep = strlen($match[0]);
- $length += strlen($match[0]);
- $i += strlen($match[0]) - 1;
- $realnum++;
- if ($magic) {
- $blen++;
- $ctype = 1;
- }
- }
- } else {
- if (ord($cur) >= 252) {
- $cstep = 6;
- $length += 6;
- $i += 5;
- $realnum++;
- if ($magic) {
- $blen++;
- $ctype = 1;
- }
- } elseif (ord($cur) >= 248) {
- $cstep = 5;
- $length += 5;
- $i += 4;
- $realnum++;
- if ($magic) {
- $ctype = 1;
- $blen++;
- }
- } elseif (ord($cur) >= 240) {
- $cstep = 4;
- $length += 4;
- $i += 3;
- $realnum++;
- if ($magic) {
- $blen++;
- $ctype = 1;
- }
- } elseif (ord($cur) >= 224) {
- $cstep = 3;
- $length += 3;
- $i += 2;
- $realnum++;
- if ($magic) {
- $ctype = 1;
- $blen++;
- }
- } elseif (ord($cur) >= 192) {
- $ctype = 2;
- $length += 2;
- $i += 1;
- $realnum++;
- if ($magic) {
- $blen++;
- $ctype = 1;
- }
- } elseif (ord($cur) >= 128) {
- $length += 1;
- } else {
- $cstep = 1;
- $length += 1;
- $realnum++;
- if ($magic) {
- if (ord($cur) >= 65 && ord($cur) <= 90) {
- $blen++;
- } else {
- $alen++;
- }
- }
- }
- }
- if ($magic) {
- if (($blen * 2 + $alen) == ($len * 2)) break;
- if (($blen * 2 + $alen) == ($len * 2) + 1) {
- if ($ctype == 1) {
- $length -= $cstep;
- break;
- } else {
- break;
- }
- }
- } else {
- if ($realnum == $len) break;
- }
- }
- unset($cur);
- unset($alen);
- unset($blen);
- unset($realnum);
- unset($ctype);
- unset($cstep);
- return substr($title, $start, $length);
- }
- function utf8Substr($str, $from, $len)
- {
- return preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $from . '}' .
- '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $len . '}).*#s',
- '$1', $str);
- }
- $title = "你哈珀niad1纳斯达wop asdni你爱谁都没阿斯顿撒旦12ccs- sd";
- $title = utf8Substr($title, 0, 15);
- echo $title;
- ?>
Tags: PHP切割汉字
- 上一篇:PHP实现字符串的全排列详解
- 下一篇:PHP操作路由器实现方法示例
推荐文章
热门文章
最新评论文章
- 写给考虑创业的年轻程序员(10)
- PHP新手上路(一)(7)
- 惹恼程序员的十件事(5)
- PHP邮件发送例子,已测试成功(5)
- 致初学者:PHP比ASP优秀的七个理由(4)
- PHP会被淘汰吗?(4)
- PHP新手上路(四)(4)
- 如何去学习PHP?(2)
- 简单入门级php分页代码(2)
- php中邮箱email 电话等格式的验证(2)