diff options
author | 2022-07-01 15:10:30 +0200 | |
---|---|---|
committer | 2022-07-01 15:10:30 +0200 | |
commit | 4f75591060d95208a301bc6bf460d875631b29cc (patch) | |
tree | 4e37d86840e8d990a563ba75d3de6f84a53cc2de /lib/html.php | |
parent | 66568e3a39c61546c09a47a5688914a0bdf3c60c (diff) | |
download | rss-bridge-4f75591060d95208a301bc6bf460d875631b29cc.tar.gz rss-bridge-4f75591060d95208a301bc6bf460d875631b29cc.tar.zst rss-bridge-4f75591060d95208a301bc6bf460d875631b29cc.zip |
Reformat codebase v4 (#2872)
Reformat code base to PSR12
Co-authored-by: rssbridge <noreply@github.com>
Diffstat (limited to 'lib/html.php')
-rw-r--r-- | lib/html.php | 202 |
1 files changed, 102 insertions, 100 deletions
diff --git a/lib/html.php b/lib/html.php index 69bd1424..e82d5e0e 100644 --- a/lib/html.php +++ b/lib/html.php @@ -1,4 +1,5 @@ <?php + /** * This file is part of RSS-Bridge, a PHP project capable of generating RSS and * Atom feeds for websites that don't have one. @@ -6,9 +7,9 @@ * For the full license information, please view the UNLICENSE file distributed * with this source code. * - * @package Core - * @license http://unlicense.org/ UNLICENSE - * @link https://github.com/rss-bridge/rss-bridge + * @package Core + * @license http://unlicense.org/ UNLICENSE + * @link https://github.com/rss-bridge/rss-bridge */ /** @@ -25,27 +26,29 @@ * @todo Check if this implementation is still necessary, because simplehtmldom * already removes some of the tags (search for `remove_noise` in simple_html_dom.php). */ -function sanitize($html, - $tags_to_remove = array('script', 'iframe', 'input', 'form'), - $attributes_to_keep = array('title', 'href', 'src'), - $text_to_keep = array()){ - - $htmlContent = str_get_html($html); - - foreach($htmlContent->find('*') as $element) { - if(in_array($element->tag, $text_to_keep)) { - $element->outertext = $element->plaintext; - } elseif(in_array($element->tag, $tags_to_remove)) { - $element->outertext = ''; - } else { - foreach($element->getAllAttributes() as $attributeName => $attribute) { - if(!in_array($attributeName, $attributes_to_keep)) - $element->removeAttribute($attributeName); - } - } - } - - return $htmlContent; +function sanitize( + $html, + $tags_to_remove = ['script', 'iframe', 'input', 'form'], + $attributes_to_keep = ['title', 'href', 'src'], + $text_to_keep = [] +) { + $htmlContent = str_get_html($html); + + foreach ($htmlContent->find('*') as $element) { + if (in_array($element->tag, $text_to_keep)) { + $element->outertext = $element->plaintext; + } elseif (in_array($element->tag, $tags_to_remove)) { + $element->outertext = ''; + } else { + foreach ($element->getAllAttributes() as $attributeName => $attribute) { + if (!in_array($attributeName, $attributes_to_keep)) { + $element->removeAttribute($attributeName); + } + } + } + } + + return $htmlContent; } /** @@ -74,23 +77,18 @@ function sanitize($html, * @param string $htmlContent The HTML content * @return string The HTML content with all ocurrences replaced */ -function backgroundToImg($htmlContent) { - - $regex = '/background-image[ ]{0,}:[ ]{0,}url\([\'"]{0,}(.*?)[\'"]{0,}\)/'; - $htmlContent = str_get_html($htmlContent); - - foreach($htmlContent->find('*') as $element) { - - if(preg_match($regex, $element->style, $matches) > 0) { - - $element->outertext = '<img style="display:block;" src="' . $matches[1] . '" />'; - - } - - } - - return $htmlContent; - +function backgroundToImg($htmlContent) +{ + $regex = '/background-image[ ]{0,}:[ ]{0,}url\([\'"]{0,}(.*?)[\'"]{0,}\)/'; + $htmlContent = str_get_html($htmlContent); + + foreach ($htmlContent->find('*') as $element) { + if (preg_match($regex, $element->style, $matches) > 0) { + $element->outertext = '<img style="display:block;" src="' . $matches[1] . '" />'; + } + } + + return $htmlContent; } /** @@ -104,26 +102,27 @@ function backgroundToImg($htmlContent) { * @param string $server Fully qualified URL to the page containing relative links * @return object Content with fixed URLs. */ -function defaultLinkTo($content, $server){ - $string_convert = false; - if (is_string($content)) { - $string_convert = true; - $content = str_get_html($content); - } - - foreach($content->find('img') as $image) { - $image->src = urljoin($server, $image->src); - } - - foreach($content->find('a') as $anchor) { - $anchor->href = urljoin($server, $anchor->href); - } - - if ($string_convert) { - $content = $content->outertext; - } - - return $content; +function defaultLinkTo($content, $server) +{ + $string_convert = false; + if (is_string($content)) { + $string_convert = true; + $content = str_get_html($content); + } + + foreach ($content->find('img') as $image) { + $image->src = urljoin($server, $image->src); + } + + foreach ($content->find('a') as $anchor) { + $anchor->href = urljoin($server, $anchor->href); + } + + if ($string_convert) { + $content = $content->outertext; + } + + return $content; } /** @@ -135,12 +134,13 @@ function defaultLinkTo($content, $server){ * @return string|bool Extracted string, e.g. `John Doe`, or false if the * delimiters were not found. */ -function extractFromDelimiters($string, $start, $end) { - if (strpos($string, $start) !== false) { - $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); - $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); - return $section_retrieved; - } return false; +function extractFromDelimiters($string, $start, $end) +{ + if (strpos($string, $start) !== false) { + $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); + $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); + return $section_retrieved; + } return false; } /** @@ -151,13 +151,14 @@ function extractFromDelimiters($string, $start, $end) { * @param string $end End delimiter, e.g. `</script>` * @return string Cleaned string, e.g. `foobar` */ -function stripWithDelimiters($string, $start, $end) { - while(strpos($string, $start) !== false) { - $section_to_remove = substr($string, strpos($string, $start)); - $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); - $string = str_replace($section_to_remove, '', $string); - } - return $string; +function stripWithDelimiters($string, $start, $end) +{ + while (strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } + return $string; } /** @@ -170,28 +171,29 @@ function stripWithDelimiters($string, $start, $end) { * * @todo This function needs more documentation to make it maintainable. */ -function stripRecursiveHTMLSection($string, $tag_name, $tag_start){ - $open_tag = '<' . $tag_name; - $close_tag = '</' . $tag_name . '>'; - $close_tag_length = strlen($close_tag); - if(strpos($tag_start, $open_tag) === 0) { - while(strpos($string, $tag_start) !== false) { - $max_recursion = 100; - $section_to_remove = null; - $section_start = strpos($string, $tag_start); - $search_offset = $section_start; - do { - $max_recursion--; - $section_end = strpos($string, $close_tag, $search_offset); - $search_offset = $section_end + $close_tag_length; - $section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length); - $open_tag_count = substr_count($section_to_remove, $open_tag); - $close_tag_count = substr_count($section_to_remove, $close_tag); - } while ($open_tag_count > $close_tag_count && $max_recursion > 0); - $string = str_replace($section_to_remove, '', $string); - } - } - return $string; +function stripRecursiveHTMLSection($string, $tag_name, $tag_start) +{ + $open_tag = '<' . $tag_name; + $close_tag = '</' . $tag_name . '>'; + $close_tag_length = strlen($close_tag); + if (strpos($tag_start, $open_tag) === 0) { + while (strpos($string, $tag_start) !== false) { + $max_recursion = 100; + $section_to_remove = null; + $section_start = strpos($string, $tag_start); + $search_offset = $section_start; + do { + $max_recursion--; + $section_end = strpos($string, $close_tag, $search_offset); + $search_offset = $section_end + $close_tag_length; + $section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length); + $open_tag_count = substr_count($section_to_remove, $open_tag); + $close_tag_count = substr_count($section_to_remove, $close_tag); + } while ($open_tag_count > $close_tag_count && $max_recursion > 0); + $string = str_replace($section_to_remove, '', $string); + } + } + return $string; } /** @@ -202,8 +204,8 @@ function stripRecursiveHTMLSection($string, $tag_name, $tag_start){ * @param string $string Input string in Markdown format * @return string output string in HTML format */ -function markdownToHtml($string) { - - $Parsedown = new Parsedown(); - return $Parsedown->text($string); +function markdownToHtml($string) +{ + $Parsedown = new Parsedown(); + return $Parsedown->text($string); } |