最新下载
热门教程
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
php实现的中文分词类完整实例
时间:2022-06-24 22:42:43 编辑:袖梨 来源:一聚教程网
代码如下 | 复制代码 |
classSegmentation { var$options=array('lowercase'=> TRUE, 'segment_english'=> FALSE); var$dict_name='Unknown' var$dict_words=array(); functionsetLowercase($value) { if($value) { $this->options['lowercase'] = TRUE; }else{ $this->options['lowercase'] = FALSE; } returnTRUE; } functionsetSegmentEnglish($value) { if($value) { $this->options['segment_english'] = TRUE; }else{ $this->options['segment_english'] = FALSE; } returnTRUE; } functionload($dict_file) { if(!file_exists($dict_file)) { returnFALSE; } $fp=fopen($dict_file,'r'); $temp=fgets($fp, 1024); if($temp=== FALSE) { returnFALSE; }else{ if(strpos($temp,"t") !== FALSE) { list ($dict_type,$dict_name) =explode("t", trim($temp)); }else{ $dict_type= trim($temp); $dict_name='Unknown' } $this->dict_name =$dict_name; if($dict_type!=='DICT_WORD_W') { returnFALSE; } } while(!feof($fp)) { $this->dict_words[rtrim(fgets($fp, 32))] = 1; } fclose($fp); returnTRUE; } functiongetDictName() { return$this->dict_name; } functionsegmentString($str) { if(count($this->dict_words) === 0) { returnFALSE; } $lines=explode("n",$str); return$this->_segmentLines($lines); } functionsegmentFile($filename) { if(count($this->dict_words) === 0) { returnFALSE; } $lines= file($filename); return$this->_segmentLines($lines); } function_segmentLines($lines) { $contents_segmented='' foreach($linesas$line) { $contents_segmented.=$this->_segmentLine(rtrim($line)) ." n"; } do{ $contents_segmented=str_replace(' ',' ',$contents_segmented); } while(strpos($contents_segmented,' ') !== FALSE); return$contents_segmented; } function_segmentLine($str) { $str_final='' $str_array=array(); $str_length=strlen($str); if($str_length> 0) { if(ord($str{$str_length-1}) >= 129) { $str.=' ' } } for($i=0;$i<$str_length;$i++) { if(ord($str{$i}) >= 129) { $str_array[] =$str{$i} .$str{$i+1}; $i++; }else{ $str_tmp=$str{$i}; for($j=$i+1;$j<$str_length;$j++) { if(ord($str{$j}) < 129) { $str_tmp.=$str{$j}; }else{ break; } } $str_array[] =array($str_tmp); $i=$j- 1; } } $pos=count($str_array); while($pos> 0) { $char=$str_array[$pos-1]; if(is_array($char)) { $str_final_tmp=$char[0]; if($this->options['segment_english']) { $str_final_tmp= preg_replace("/([!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~tf]+)/"," $1 ",$str_final_tmp); $str_final_tmp= preg_replace("/([!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~tf])([!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~tf])/"," $1 $2 ",$str_final_tmp); } if($this->options['lowercase']) { $str_final_tmp=strtolower($str_final_tmp); } $str_final=" $str_final_tmp$str_final"; $pos--; }else{ $word_found= 0; $word_array=array(0 =>''); if($pos< 4) { $word_temp=$pos+ 1; }else{ $word_temp= 5; } for($i=1;$i<$word_temp;$i++) { $word_array[$i] =$str_array[$pos-$i] .$word_array[$i-1]; } for($i=($word_temp-1);$i>1;$i--) { if(array_key_exists($word_array[$i],$this->dict_words)) { $word_found=$i; break; } } if($word_found) { $str_final=" $word_array[$word_found]$str_final"; $pos=$pos-$word_found; }else{ $str_final=" $char$str_final"; $pos--; } } } return$str_final; } } ?> |
相关文章
- 王者荣耀侦探能力大测试攻略 王者荣耀侦探能力大测试怎么过 11-22
- 无期迷途主线前瞻兑换码是什么 11-22
- 原神欧洛伦怎么培养 11-22
- 炉石传说网易云音乐联动怎么玩 11-22
- 永劫无间手游确幸转盘怎么样 11-22
- 无期迷途主线前瞻兑换码是什么 无期迷途主线前瞻直播兑换码介绍 11-22