这两段文字有哪些不同之处,能否详细说明一下?
- 内容介绍
- 文章标签
- 相关推荐
本文共计1613个文字,预计阅读时间需要7分钟。
代码片段改写如下:
php
127行:根据$value的值,设置$split为2、3或4,否则设为1。$key设为NULL。然后进行循环处理。
127){
if($value >= 192 && $value <= 223)
$split = 2;
elseif($value >= 224 && $value <= 239)
$split = 3;
elseif($value >= 240 && $value <= 247)
$split = 4;
} else {
$split = 1;
}
$key = NULL;
for ( $j = 0; $j < $split; ++$j, ++$i ) {
$key .= $str[$i];
}
$array[] = $key;
//array_push( $array, $key );
}
return $array;
}
function isHanz($str) {
return preg_match("/[\\x{7F}-\\x{FF}]/",$str);
/*for ( $i = 0; $i < strlen($str);++$i ){
$value = ord($str[$i]);
if($value > 127){
return true;
}
}
return false;*/
}
/**
* 从某索引取到另外一个索引
* 不包含end_offset
*
*/
function array_slice_byoffset(&$arr,$start_offset,$end_offset,$preserve_keys = false) {
return array_slice($arr,$start_offset,$end_offset - $start_offset,preserve_keys);
/*
$result = array();
foreach($arr as $k => $v) {
if ($k >= $start_offset && $k < $end_offset) {
if ($preserve_keys) {
$result[$k] = $v;
} else {
$result[] = $v;
}
}
}
return $result;
*/
}
function preg_match_array($pattern_array,$subject) {
$result = 0;
foreach($pattern_array as $v) {
$result |= preg_match('/'.$v.'/',$subject);
}
return $result;
}
define('MODE_CHARACTER',1);
define('MODE_TAG',2);
define('MODE_WHITESPACE',3);
define('ACTION_EQUAL',1);
define('ACTION_DELETE',2);
define('ACTION_INSERT',3);
define('ACTION_NONE',4);
define('ACTION_REPLACE',5);
class html_diff {
private $specialCaseOpeningTags,$specialCaseClosingTags;
private $content,$wordIndices;
private $oldWords,$newWords;
public function html_diff() {
$this->specialCaseOpeningTags = array( "
\\s]+", "
\\s]+", "
\\s]+", "
\\s]+", "
\\s]+", "
\\s]+", "
\\s]+", "
\\s]+", "
a ab c' * diff result: ' ab c
\\s]+", "
\\s]+" ); $this->specialCaseClosingTags = array( "", "", "", "", "", "", "", "", "", "" ); } public function &instance() { static $_instance; if (!$_instance) { $_instance = new self(); } return $_instance; } public function compare($oldText,$newText) { $this->content = array(); $this->wordIndices = array(); $this->oldWords = $this->ConvertHtmlToListOfWords(str_split_utf8($oldText)); $this->newWords = $this->ConvertHtmlToListOfWords(str_split_utf8($newText)); $this->wordIndices = $this->IndexNewWords($this->newWords); $operations = $this->Operations(); //print_r($this->wordIndices); //print_r($this->newWords); foreach ($operations as $item) { $this->PerformOperation($item); } return implode('',$this->content); } private function IndexNewWords(&$newWords) { $wordIndices = array(); for ($i = 0; $i < count($newWords); $i++) { $word = $newWords[$i]; if (array_key_exists($word,$wordIndices)) { $wordIndices[$word][] = $i; } else { $wordIndices[$word] = array($i); } } return $wordIndices; } private function ConvertHtmlToListOfWords($characterString) { $mode = MODE_CHARACTER; $current_word = ''; $words = array(); foreach($characterString as $character) { switch($mode) { case MODE_CHARACTER: if ($this->IsStartOfTag($character)) { if (!empty($current_word)) $words[] = $current_word; $current_word = '<'; $mode = MODE_TAG; } else if ($this->IsWhiteSpace($character)) { if (!empty($current_word)) $words[] = $current_word; $current_word = $character; $mode = MODE_WHITESPACE; } else { //$current_word .= $character; //src english if (isHanz($current_word.$character)) { //hanz if (!empty($current_word)) $words[] = $current_word; $current_word = $character; } else { $current_word .= $character; } } break; case MODE_TAG: if ($this->isEndOfTag($character)) { $current_word .= '>'; $words[] = $current_word; $current_word = ''; if ($this->IsWhiteSpace($character)) { $mode = MODE_WHITESPACE; } else { $mode = MODE_CHARACTER; } } else { $current_word .= $character; } break; case MODE_WHITESPACE: if ($this->IsStartOfTag($character)) { if (!empty($current_word)) $words[] = $current_word; $current_word = '<'; $mode = MODE_TAG; } else if ($this->IsWhiteSpace($character)) { $current_word .= $character; } else { if (!empty($current_word)) $words[] = $current_word; $current_word = $character; $mode = MODE_CHARACTER; } break; default: break; } } if (!empty($current_word)) $words[] = $current_word; return $words; } private function IsStartOfTag($val) { return $val == '<'; } private function IsEndOfTag($val) { return $val == '>'; } private function IsWhiteSpace($value) { $result = preg_match('/\s/',$value); return $result; } private function PerformOperation(&$operation){ switch ($operation->Action) { case ACTION_EQUAL: $this->ProcessEqualOperation($operation); break; case ACTION_DELETE: $this->ProcessDeleteOperation($operation, 'diffdel'); break; case ACTION_INSERT: $this->ProcessInsertOperation($operation, 'diffins'); break; case ACTION_NONE: break; case ACTION_REPLACE: $this->ProcessReplaceOperation($operation); break; default: break; } } private function ProcessReplaceOperation(&$operation) { $this->ProcessDeleteOperation($operation, 'diffmod'); $this->ProcessInsertOperation($operation, 'diffmod'); } private function ProcessInsertOperation(&$operation, $cssClass) { $text = array_slice_byoffset($this->newWords,$operation->StartInNew,$operation->EndInNew); $this->InsertTag("ins", $cssClass, $text); } private function ProcessDeleteOperation(&$operation, $cssClass) { $text = array_slice_byoffset($this->oldWords,$operation->StartInOld,$operation->EndInOld); $this->InsertTag("del", $cssClass, $text); } private function ProcessEqualOperation(&$operation) { $result = array_slice_byoffset($this->newWords,$operation->StartInNew,$operation->EndInNew); $this->content[] = implode('', $result); } /** * This method encloses words within a specified tag (ins or del), and adds this into "content", * with a twist: if there are words contain tags, it actually creates multiple ins or del, * so that they don't include any ins or del. This handles cases like * old: '
本文共计1613个文字,预计阅读时间需要7分钟。
代码片段改写如下:
php
127行:根据$value的值,设置$split为2、3或4,否则设为1。$key设为NULL。然后进行循环处理。
127){
if($value >= 192 && $value <= 223)
$split = 2;
elseif($value >= 224 && $value <= 239)
$split = 3;
elseif($value >= 240 && $value <= 247)
$split = 4;
} else {
$split = 1;
}
$key = NULL;
for ( $j = 0; $j < $split; ++$j, ++$i ) {
$key .= $str[$i];
}
$array[] = $key;
//array_push( $array, $key );
}
return $array;
}
function isHanz($str) {
return preg_match("/[\\x{7F}-\\x{FF}]/",$str);
/*for ( $i = 0; $i < strlen($str);++$i ){
$value = ord($str[$i]);
if($value > 127){
return true;
}
}
return false;*/
}
/**
* 从某索引取到另外一个索引
* 不包含end_offset
*
*/
function array_slice_byoffset(&$arr,$start_offset,$end_offset,$preserve_keys = false) {
return array_slice($arr,$start_offset,$end_offset - $start_offset,preserve_keys);
/*
$result = array();
foreach($arr as $k => $v) {
if ($k >= $start_offset && $k < $end_offset) {
if ($preserve_keys) {
$result[$k] = $v;
} else {
$result[] = $v;
}
}
}
return $result;
*/
}
function preg_match_array($pattern_array,$subject) {
$result = 0;
foreach($pattern_array as $v) {
$result |= preg_match('/'.$v.'/',$subject);
}
return $result;
}
define('MODE_CHARACTER',1);
define('MODE_TAG',2);
define('MODE_WHITESPACE',3);
define('ACTION_EQUAL',1);
define('ACTION_DELETE',2);
define('ACTION_INSERT',3);
define('ACTION_NONE',4);
define('ACTION_REPLACE',5);
class html_diff {
private $specialCaseOpeningTags,$specialCaseClosingTags;
private $content,$wordIndices;
private $oldWords,$newWords;
public function html_diff() {
$this->specialCaseOpeningTags = array( "
\\s]+", "
\\s]+", "
\\s]+", "
\\s]+", "
\\s]+", "
\\s]+", "
\\s]+", "
\\s]+", "
a ab c' * diff result: ' ab c
\\s]+", "
\\s]+" ); $this->specialCaseClosingTags = array( "", "", "", "", "", "", "", "", "", "" ); } public function &instance() { static $_instance; if (!$_instance) { $_instance = new self(); } return $_instance; } public function compare($oldText,$newText) { $this->content = array(); $this->wordIndices = array(); $this->oldWords = $this->ConvertHtmlToListOfWords(str_split_utf8($oldText)); $this->newWords = $this->ConvertHtmlToListOfWords(str_split_utf8($newText)); $this->wordIndices = $this->IndexNewWords($this->newWords); $operations = $this->Operations(); //print_r($this->wordIndices); //print_r($this->newWords); foreach ($operations as $item) { $this->PerformOperation($item); } return implode('',$this->content); } private function IndexNewWords(&$newWords) { $wordIndices = array(); for ($i = 0; $i < count($newWords); $i++) { $word = $newWords[$i]; if (array_key_exists($word,$wordIndices)) { $wordIndices[$word][] = $i; } else { $wordIndices[$word] = array($i); } } return $wordIndices; } private function ConvertHtmlToListOfWords($characterString) { $mode = MODE_CHARACTER; $current_word = ''; $words = array(); foreach($characterString as $character) { switch($mode) { case MODE_CHARACTER: if ($this->IsStartOfTag($character)) { if (!empty($current_word)) $words[] = $current_word; $current_word = '<'; $mode = MODE_TAG; } else if ($this->IsWhiteSpace($character)) { if (!empty($current_word)) $words[] = $current_word; $current_word = $character; $mode = MODE_WHITESPACE; } else { //$current_word .= $character; //src english if (isHanz($current_word.$character)) { //hanz if (!empty($current_word)) $words[] = $current_word; $current_word = $character; } else { $current_word .= $character; } } break; case MODE_TAG: if ($this->isEndOfTag($character)) { $current_word .= '>'; $words[] = $current_word; $current_word = ''; if ($this->IsWhiteSpace($character)) { $mode = MODE_WHITESPACE; } else { $mode = MODE_CHARACTER; } } else { $current_word .= $character; } break; case MODE_WHITESPACE: if ($this->IsStartOfTag($character)) { if (!empty($current_word)) $words[] = $current_word; $current_word = '<'; $mode = MODE_TAG; } else if ($this->IsWhiteSpace($character)) { $current_word .= $character; } else { if (!empty($current_word)) $words[] = $current_word; $current_word = $character; $mode = MODE_CHARACTER; } break; default: break; } } if (!empty($current_word)) $words[] = $current_word; return $words; } private function IsStartOfTag($val) { return $val == '<'; } private function IsEndOfTag($val) { return $val == '>'; } private function IsWhiteSpace($value) { $result = preg_match('/\s/',$value); return $result; } private function PerformOperation(&$operation){ switch ($operation->Action) { case ACTION_EQUAL: $this->ProcessEqualOperation($operation); break; case ACTION_DELETE: $this->ProcessDeleteOperation($operation, 'diffdel'); break; case ACTION_INSERT: $this->ProcessInsertOperation($operation, 'diffins'); break; case ACTION_NONE: break; case ACTION_REPLACE: $this->ProcessReplaceOperation($operation); break; default: break; } } private function ProcessReplaceOperation(&$operation) { $this->ProcessDeleteOperation($operation, 'diffmod'); $this->ProcessInsertOperation($operation, 'diffmod'); } private function ProcessInsertOperation(&$operation, $cssClass) { $text = array_slice_byoffset($this->newWords,$operation->StartInNew,$operation->EndInNew); $this->InsertTag("ins", $cssClass, $text); } private function ProcessDeleteOperation(&$operation, $cssClass) { $text = array_slice_byoffset($this->oldWords,$operation->StartInOld,$operation->EndInOld); $this->InsertTag("del", $cssClass, $text); } private function ProcessEqualOperation(&$operation) { $result = array_slice_byoffset($this->newWords,$operation->StartInNew,$operation->EndInNew); $this->content[] = implode('', $result); } /** * This method encloses words within a specified tag (ins or del), and adds this into "content", * with a twist: if there are words contain tags, it actually creates multiple ins or del, * so that they don't include any ins or del. This handles cases like * old: '

