html; } /** * Validate html. * * @param string $html HTML to process. * * @return int Number of errors. */ public function validate( $html ) { $html = $this->hide_wp_bugs( $html ); $html = $this->hide_cdata( $html ); $html = $this->hide_comments( $html ); $html = $this->hide_self_closing_tags( $html ); $html = $this->hide_scripts( $html ); $html = $this->hide_styles( $html ); $processed_html = ''; $html_arr = array( 'processed' => $processed_html, 'next' => $html, ); while ( '' !== $html_arr['next'] ) { $html_arr = $this->validate_next( $html_arr['next'] ); if ( $html_arr ) { $processed_html .= $html_arr['processed']; } } $html = $processed_html; $html = $this->restore_styles( $html ); $html = $this->restore_scripts( $html ); $html = $this->restore_self_closing_tags( $html ); $html = $this->restore_comments( $html ); $html = $this->restore_wp_bugs( $html ); $this->html = $html; return $this->error_count; } /** * Validate first tag in html flow and return processed html and rest. * In processed part broken html is replaced by wpml comment. * * @param string $html HTML to process. * * @return array|null */ private function validate_next( $html ) { $regs = array(); // Get first opening or closing tag. $pattern = '<\s*?([a-z]+|/[a-z]+)((?:.|\s)*?)>'; mb_eregi( $pattern, $html, $regs ); if ( $regs ) { $full_tag = $regs[0]; $pos = mb_strpos( $html, $full_tag ); $next_html = mb_substr( $html, $pos + mb_strlen( $full_tag ) ); $tag = $regs[1]; $result = true; if ( '/' === mb_substr( $tag, 0, 1 ) ) { $result = $this->close_tag( mb_substr( $tag, 1 ) ); } else { $this->open_tag( $tag ); } if ( $result ) { $processed_html = mb_substr( $html, 0, $pos + mb_strlen( $full_tag ) ); } else { $processed_html = mb_substr( $html, 0, $pos ) . ''; } return array( 'processed' => $processed_html, 'next' => $next_html, ); } return array( 'processed' => $html, 'next' => '', ); } /** * Convert WP bugs into wpml commented bugs. * * @param string $html HTML to process. * * @return false|string */ private function hide_wp_bugs( $html ) { // WP bug fix for comments - in case you REALLY meant to type '< !--' $html = str_replace( '< !--', '< !--', $html ); // WP bug fix for LOVE <3 (and other situations with '<' before a number) $pattern = '<([0-9]{1})'; $html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_wp_bug_callback' ), $html, 'msri' ); return $html; } /** * Callback to convert WP bugs into wpml commented bugs. * * @param array $matches * * @return string */ public function hide_wp_bug_callback( $matches ) { return ''; } /** * Convert wpml commented bugs to WP bugs. * * @param $html * * @return false|string */ private function restore_wp_bugs( $html ) { $pattern = ''; $html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_bug_callback' ), $html, 'msri' ); $html = str_replace( '< !--', '< !--', $html ); return $html; } /** * Callback to convert wpml commented bugs to WP bugs. * * @param array $matches * * @return mixed */ public function restore_bug_callback( $matches ) { return $matches[1]; } /** * Convert HTML comments into wpml comments. * * @param string $html HTML to process. * * @return false|string */ private function hide_comments( $html ) { $pattern = ''; $html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_comment_callback' ), $html, 'msri' ); $pattern = ''; $html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_declaration_callback' ), $html, 'msri' ); return $html; } /** * Callback to convert HTML comment to wpml comment. * * @param array $matches * * @return string */ public function hide_comment_callback( $matches ) { return ''; } /** * Callback to convert HTML declaration to wpml declaration. * * @param array $matches * * @return string */ public function hide_declaration_callback( $matches ) { return ''; } /** * Convert wpml comments to HTML comments. * * @param string $html * * @return string */ private function restore_comments( $html ) { $pattern = ''; $html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_encoded_content_callback' ), $html, 'msri' ); $pattern = ''; $html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_encoded_content_callback' ), $html, 'msri' ); return $html; } /** * Callback to convert wpml base64 encoded content. * * @param array $matches * * @return string */ public function restore_encoded_content_callback( $matches ) { return base64_decode( $matches[1] ); } /** * Convert self-closing tags to wpml self-closing tags. * * @param string $html HTML to process. * * @return false|string */ private function hide_self_closing_tags( $html ) { $self_closing_tags = array( 'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param', 'source', 'track', 'wbr', 'command', 'keygen', 'menuitem', 'path', 'polyline', ); foreach ( $self_closing_tags as $self_closing_tag ) { $pattern = '<\s*?' . $self_closing_tag . '((?:.|\s)*?)(>|/>)'; $html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_sct_callback' ), $html, 'msri' ); } $pattern = '<\s*?[^>]*/>'; $html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_sct_callback' ), $html, 'msri' ); return $html; } /** * Callback to convert self-closing tags to wpml self-closing tags. * * @param $matches * * @return string */ public function hide_sct_callback( $matches ) { return ''; } /** * Convert wpml self-closing tags to HTML self-closing tags. * * @param $html * * @return false|string */ private function restore_self_closing_tags( $html ) { $pattern = ''; $html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_sct_callback' ), $html, 'msri' ); return $html; } /** * Callback to convert wpml self-closing tags to self-closing tags. * * @param $matches * * @return string */ public function restore_sct_callback( $matches ) { return '<' . $matches[1] . '>'; } /** * Convert wpml comments to initial HTML. * * @param $html * * @return false|string */ public function restore_html( $html ) { $html = $this->restore_cdata( $html ); $html = $this->restore_html_fragments( $html ); return $html; } /** * Convert wpml fragments to HTML fragments. * * @param $html * * @return false|string */ private function restore_html_fragments( $html ) { $pattern = ''; $html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_html_fragment_callback' ), $html, 'msri' ); return $html; } /** * Callback to convert wpml fragment to HTML fragment. * * @param $matches * * @return string */ public function restore_html_fragment_callback( $matches ) { return $matches[1]; } /** * Convert scripts to wpml scripts. * * @param string $html HTML to process. * * @return false|string */ private function hide_scripts( $html ) { $pattern = '<\s*?script\s*?>((?:.|\s)*?)'; $html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_script_callback' ), $html, 'msri' ); return $html; } /** * Callback to convert script to wpml script. * * @param array $matches * * @return string */ public function hide_script_callback( $matches ) { return ''; } /** * Convert wpml scripts to scripts. * * @param $html * * @return false|string */ private function restore_scripts( $html ) { $pattern = ''; $html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_encoded_content_callback' ), $html, 'msri' ); return $html; } /** * Convert CDATA to wpml cdata. * * @param string $html HTML to process. * * @return false|string */ private function hide_cdata( $html ) { $pattern = ''; $html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_cdata_callback' ), $html, 'msri' ); return $html; } /** * Callback to convert CDATA to wpml cdata. * * @param array $matches * * @return string */ public function hide_cdata_callback( $matches ) { return ''; } /** * Convert wpml cdata to CDATA. * * @param $html * * @return false|string */ private function restore_cdata( $html ) { $pattern = ''; $html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_encoded_content_callback' ), $html, 'msri' ); return $html; } /** * Convert styles to wpml scripts. * * @param string $html HTML to process. * * @return false|string */ private function hide_styles( $html ) { $pattern = '<\s*?style\s*?>((?:.|\s)*?)'; $html = mb_ereg_replace_callback( $pattern, array( $this, 'hide_style_callback' ), $html, 'msri' ); return $html; } /** * Callback to convert style to wpml style. * * @param array $matches * * @return string */ public function hide_style_callback( $matches ) { return ''; } /** * Convert wpml styles to styles. * * @param $html * * @return false|string */ private function restore_styles( $html ) { $pattern = ''; $html = mb_ereg_replace_callback( $pattern, array( $this, 'restore_encoded_content_callback' ), $html, 'msri' ); return $html; } /** * Open tag encountered in html. * * @param string $tag Tag name. */ private function open_tag( $tag ) { $tag = mb_strtolower( $tag ); array_push( $this->tags, $tag ); } /** * Close tag encountered in html. * * @param string $tag Tag name. * * @return bool Closed successfully. */ private function close_tag( $tag ) { $tag = mb_strtolower( $tag ); $last_tag = end( $this->tags ); if ( $last_tag === $tag ) { array_pop( $this->tags ); return true; } else { $this->error_count ++; if ( in_array( $tag, $this->tags, true ) ) { do { array_pop( $this->tags ); } while ( $this->tags && end( $this->tags ) !== $tag ); array_pop( $this->tags ); } return false; } } }