html;
}
/**
* Validate html.
*
* @param string $html HTML to process.
*
* @return int Number of errors.
*/
public function validate( $html ) {
$html = $this->hide_wp_bugs( $html );
$html = $this->hide_cdata( $html );
$html = $this->hide_comments( $html );
$html = $this->hide_self_closing_tags( $html );
$html = $this->hide_scripts( $html );
$html = $this->hide_styles( $html );
$processed_html = '';
$html_arr = array(
'processed' => $processed_html,
'next' => $html,
);
while ( '' !== $html_arr['next'] ) {
$html_arr = $this->validate_next( $html_arr['next'] );
if ( $html_arr ) {
$processed_html .= $html_arr['processed'];
}
}
$html = $processed_html;
$html = $this->restore_styles( $html );
$html = $this->restore_scripts( $html );
$html = $this->restore_self_closing_tags( $html );
$html = $this->restore_comments( $html );
$html = $this->restore_wp_bugs( $html );
$this->html = $html;
return $this->error_count;
}
/**
* Validate first tag in html flow and return processed html and rest.
* In processed part broken html is replaced by wpml comment.
*
* @param string $html HTML to process.
*
* @return array|null
*/
private function validate_next( $html ) {
$regs = array();
// Get first opening or closing tag.
$pattern = '<\s*?([a-z]+|/[a-z]+)((?:.|\s)*?)>';
mb_eregi( $pattern, $html, $regs );
if ( $regs ) {
$full_tag = $regs[0];
$pos = mb_strpos( $html, $full_tag );
$next_html = mb_substr( $html, $pos + mb_strlen( $full_tag ) );
$tag = $regs[1];
$result = true;
if ( '/' === mb_substr( $tag, 0, 1 ) ) {
$result = $this->close_tag( mb_substr( $tag, 1 ) );
} else {
$this->open_tag( $tag );
}
if ( $result ) {
$processed_html = mb_substr( $html, 0, $pos + mb_strlen( $full_tag ) );
} else {
$processed_html = mb_substr( $html, 0, $pos ) . '';
}
return array(
'processed' => $processed_html,
'next' => $next_html,
);
}
return array(
'processed' => $html,
'next' => '',
);
}
/**
* Convert WP bugs into wpml commented bugs.
*
* @param string $html HTML to process.
*
* @return false|string
*/
private function hide_wp_bugs( $html ) {
// WP bug fix for comments - in case you REALLY meant to type '< !--'
$html = str_replace( '< !--', '< !--', $html );
// WP bug fix for LOVE <3 (and other situations with '<' before a number)
$pattern = '<([0-9]{1})';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_wp_bug_callback' ), $html, 'msri' );
return $html;
}
/**
* Callback to convert WP bugs into wpml commented bugs.
*
* @param array $matches
*
* @return string
*/
public function hide_wp_bug_callback( $matches ) {
return '';
}
/**
* Convert wpml commented bugs to WP bugs.
*
* @param $html
*
* @return false|string
*/
private function restore_wp_bugs( $html ) {
$pattern = '';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_bug_callback' ), $html, 'msri' );
$html = str_replace( '< !--', '< !--', $html );
return $html;
}
/**
* Callback to convert wpml commented bugs to WP bugs.
*
* @param array $matches
*
* @return mixed
*/
public function restore_bug_callback( $matches ) {
return $matches[1];
}
/**
* Convert HTML comments into wpml comments.
*
* @param string $html HTML to process.
*
* @return false|string
*/
private function hide_comments( $html ) {
$pattern = '';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_comment_callback' ), $html, 'msri' );
$pattern = '';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_declaration_callback' ), $html, 'msri' );
return $html;
}
/**
* Callback to convert HTML comment to wpml comment.
*
* @param array $matches
*
* @return string
*/
public function hide_comment_callback( $matches ) {
return '';
}
/**
* Callback to convert HTML declaration to wpml declaration.
*
* @param array $matches
*
* @return string
*/
public function hide_declaration_callback( $matches ) {
return '';
}
/**
* Convert wpml comments to HTML comments.
*
* @param string $html
*
* @return string
*/
private function restore_comments( $html ) {
$pattern = '';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_encoded_content_callback' ), $html, 'msri' );
$pattern = '';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_encoded_content_callback' ), $html, 'msri' );
return $html;
}
/**
* Callback to convert wpml base64 encoded content.
*
* @param array $matches
*
* @return string
*/
public function restore_encoded_content_callback( $matches ) {
return base64_decode( $matches[1] );
}
/**
* Convert self-closing tags to wpml self-closing tags.
*
* @param string $html HTML to process.
*
* @return false|string
*/
private function hide_self_closing_tags( $html ) {
$self_closing_tags = array(
'area',
'base',
'basefont',
'br',
'col',
'command',
'embed',
'frame',
'hr',
'img',
'input',
'isindex',
'link',
'meta',
'param',
'source',
'track',
'wbr',
'command',
'keygen',
'menuitem',
'path',
'polyline',
);
foreach ( $self_closing_tags as $self_closing_tag ) {
$pattern = '<\s*?' . $self_closing_tag . '((?:.|\s)*?)(>|/>)';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_sct_callback' ), $html, 'msri' );
}
$pattern = '<\s*?[^>]*/>';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_sct_callback' ), $html, 'msri' );
return $html;
}
/**
* Callback to convert self-closing tags to wpml self-closing tags.
*
* @param $matches
*
* @return string
*/
public function hide_sct_callback( $matches ) {
return '';
}
/**
* Convert wpml self-closing tags to HTML self-closing tags.
*
* @param $html
*
* @return false|string
*/
private function restore_self_closing_tags( $html ) {
$pattern = '';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_sct_callback' ), $html, 'msri' );
return $html;
}
/**
* Callback to convert wpml self-closing tags to self-closing tags.
*
* @param $matches
*
* @return string
*/
public function restore_sct_callback( $matches ) {
return '<' . $matches[1] . '>';
}
/**
* Convert wpml comments to initial HTML.
*
* @param $html
*
* @return false|string
*/
public function restore_html( $html ) {
$html = $this->restore_cdata( $html );
$html = $this->restore_html_fragments( $html );
return $html;
}
/**
* Convert wpml fragments to HTML fragments.
*
* @param $html
*
* @return false|string
*/
private function restore_html_fragments( $html ) {
$pattern = '';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_html_fragment_callback' ), $html, 'msri' );
return $html;
}
/**
* Callback to convert wpml fragment to HTML fragment.
*
* @param $matches
*
* @return string
*/
public function restore_html_fragment_callback( $matches ) {
return $matches[1];
}
/**
* Convert scripts to wpml scripts.
*
* @param string $html HTML to process.
*
* @return false|string
*/
private function hide_scripts( $html ) {
$pattern = '<\s*?script\s*?>((?:.|\s)*?)';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_script_callback' ), $html, 'msri' );
return $html;
}
/**
* Callback to convert script to wpml script.
*
* @param array $matches
*
* @return string
*/
public function hide_script_callback( $matches ) {
return '';
}
/**
* Convert wpml scripts to scripts.
*
* @param $html
*
* @return false|string
*/
private function restore_scripts( $html ) {
$pattern = '';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_encoded_content_callback' ), $html, 'msri' );
return $html;
}
/**
* Convert CDATA to wpml cdata.
*
* @param string $html HTML to process.
*
* @return false|string
*/
private function hide_cdata( $html ) {
$pattern = '';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_cdata_callback' ), $html, 'msri' );
return $html;
}
/**
* Callback to convert CDATA to wpml cdata.
*
* @param array $matches
*
* @return string
*/
public function hide_cdata_callback( $matches ) {
return '';
}
/**
* Convert wpml cdata to CDATA.
*
* @param $html
*
* @return false|string
*/
private function restore_cdata( $html ) {
$pattern = '';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_encoded_content_callback' ), $html, 'msri' );
return $html;
}
/**
* Convert styles to wpml scripts.
*
* @param string $html HTML to process.
*
* @return false|string
*/
private function hide_styles( $html ) {
$pattern = '<\s*?style\s*?>((?:.|\s)*?)';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_style_callback' ), $html, 'msri' );
return $html;
}
/**
* Callback to convert style to wpml style.
*
* @param array $matches
*
* @return string
*/
public function hide_style_callback( $matches ) {
return '';
}
/**
* Convert wpml styles to styles.
*
* @param $html
*
* @return false|string
*/
private function restore_styles( $html ) {
$pattern = '';
$html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_encoded_content_callback' ), $html, 'msri' );
return $html;
}
/**
* Open tag encountered in html.
*
* @param string $tag Tag name.
*/
private function open_tag( $tag ) {
$tag = mb_strtolower( $tag );
array_push( $this->tags, $tag );
}
/**
* Close tag encountered in html.
*
* @param string $tag Tag name.
*
* @return bool Closed successfully.
*/
private function close_tag( $tag ) {
$tag = mb_strtolower( $tag );
$last_tag = end( $this->tags );
if ( $last_tag === $tag ) {
array_pop( $this->tags );
return true;
} else {
$this->error_count ++;
if ( in_array( $tag, $this->tags, true ) ) {
do {
array_pop( $this->tags );
} while ( $this->tags && end( $this->tags ) !== $tag );
array_pop( $this->tags );
}
return false;
}
}
}