diff options
author | 2024-11-17 13:27:55 -0800 | |
---|---|---|
committer | 2024-11-17 13:27:55 -0800 | |
commit | c90c08d05755f8c94be0ea8131b063701fb6dedb (patch) | |
tree | 1e083e84172a63399abcd60aeb77c13e767820f4 | |
parent | 333882de824732324aa5b8dd6f0fea70732cba16 (diff) | |
parent | 4f2a445302056a4de201927060a1da7bfbd13f11 (diff) | |
download | miniflux-ai-main.tar.gz miniflux-ai-main.tar.zst miniflux-ai-main.zip |
-rw-r--r-- | core/image.py | 25 | ||||
-rw-r--r-- | main.py | 11 | ||||
-rw-r--r-- | requirements.txt | 1 |
3 files changed, 37 insertions, 0 deletions
diff --git a/core/image.py b/core/image.py new file mode 100644 index 0000000..e699d79 --- /dev/null +++ b/core/image.py @@ -0,0 +1,25 @@ +from bs4 import BeautifulSoup +from typing import Optional + + +def contains_image(html: str) -> Optional[str]: + """ + Extracts the source URL of the first image found in the given HTML content. + + This function parses the provided HTML string to locate the first image + (`<img>`) tag. If an image tag is found, it returns the value of the `src` + attribute. If no image is found, it returns None. + + :param html: A string containing HTML content to be parsed. + :type html: str + :return: The `src` attribute value of the first found image tag, or None if + no image tag is found. + :rtype: Optional[str] + """ + soup = BeautifulSoup(html, "html.parser") + image = soup.find("img", recursive=True) + if image: + src = image.get("src") + if src and src.lower().split('.')[-1] in ['png', 'jpeg', 'gif', 'webp']: + return src + return None @@ -3,6 +3,7 @@ import time import traceback from common.logger import logger from core.entry_filter import filter_entry +from core.image import contains_image import miniflux from markdownify import markdownify as md @@ -20,8 +21,18 @@ def process_entry(entry): for agent in config['agents'].items(): messages = [ {"role": "system", "content": agent[1]['prompt']}, + {"role": "user", "content": "The following is the title:\n---\n " + entry['title']}, + {"role": "user", "content": "The following is the author:\n---\n " + entry['author']}, {"role": "user", "content": "The following is the input content:\n---\n " + md(entry['content']) } ] + + image_url = contains_image(entry['content']) + if image_url: + messages.append({"role": "user", "content": [ + {"type": "text", "text": "The following is the first image in the content:"}, + {"type": "image_url", "image_url": {"url": image_url}} + ]}) + # filter, if AI is not generating, and in allow_list, or not in deny_list if filter_entry(config, agent, entry): completion = llm_client.chat.completions.create( diff --git a/requirements.txt b/requirements.txt index 7599e77..8688e7b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ openai markdownify markdown PyYAML +bs4 |