aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorGravatar Anshul Gupta <ansg191@anshulg.com> 2024-11-17 13:27:55 -0800
committerGravatar Anshul Gupta <ansg191@anshulg.com> 2024-11-17 13:27:55 -0800
commitc90c08d05755f8c94be0ea8131b063701fb6dedb (patch)
tree1e083e84172a63399abcd60aeb77c13e767820f4 /core
parent333882de824732324aa5b8dd6f0fea70732cba16 (diff)
parent4f2a445302056a4de201927060a1da7bfbd13f11 (diff)
downloadminiflux-ai-main.tar.gz
miniflux-ai-main.tar.zst
miniflux-ai-main.zip
Merge branch 'image-processing'HEADmain
Diffstat (limited to 'core')
-rw-r--r--core/image.py25
1 files changed, 25 insertions, 0 deletions
diff --git a/core/image.py b/core/image.py
new file mode 100644
index 0000000..e699d79
--- /dev/null
+++ b/core/image.py
@@ -0,0 +1,25 @@
+from bs4 import BeautifulSoup
+from typing import Optional
+
+
+def contains_image(html: str) -> Optional[str]:
+ """
+ Extracts the source URL of the first image found in the given HTML content.
+
+ This function parses the provided HTML string to locate the first image
+ (`<img>`) tag. If an image tag is found, it returns the value of the `src`
+ attribute. If no image is found, it returns None.
+
+ :param html: A string containing HTML content to be parsed.
+ :type html: str
+ :return: The `src` attribute value of the first found image tag, or None if
+ no image tag is found.
+ :rtype: Optional[str]
+ """
+ soup = BeautifulSoup(html, "html.parser")
+ image = soup.find("img", recursive=True)
+ if image:
+ src = image.get("src")
+ if src and src.lower().split('.')[-1] in ['png', 'jpeg', 'gif', 'webp']:
+ return src
+ return None