diff options
author | 2024-10-12 01:08:06 +0800 | |
---|---|---|
committer | 2024-10-12 01:08:06 +0800 | |
commit | 92fa74553341d4cdac134b0cafdaf16fa94b3afa (patch) | |
tree | 1deb2b9ed46ec962d4d7bbb4d773b6cb7e699243 | |
parent | e775f7b66c52cc2096938bfa2265a25248a3a7da (diff) | |
download | miniflux-ai-92fa74553341d4cdac134b0cafdaf16fa94b3afa.tar.gz miniflux-ai-92fa74553341d4cdac134b0cafdaf16fa94b3afa.tar.zst miniflux-ai-92fa74553341d4cdac134b0cafdaf16fa94b3afa.zip |
support Deny-/allow list wildcard url
-rw-r--r-- | common/logger.py | 10 | ||||
-rw-r--r-- | core/__init__.py | 0 | ||||
-rw-r--r-- | core/entry_filter.py | 31 | ||||
-rw-r--r-- | main.py | 24 | ||||
-rw-r--r-- | tests/test_filter.py | 125 |
5 files changed, 170 insertions, 20 deletions
diff --git a/common/logger.py b/common/logger.py new file mode 100644 index 0000000..cd05a28 --- /dev/null +++ b/common/logger.py @@ -0,0 +1,10 @@ +import logging +from yaml import safe_load + +config = safe_load(open('config.yml', encoding='utf8')) +logger = logging.getLogger(__name__) +logger.setLevel(config.get('log_level', 'INFO')) +formatter = logging.Formatter('%(asctime)s - %(filename)s - %(lineno)d - %(levelname)s - %(message)s') +console = logging.StreamHandler() +console.setFormatter(formatter) +logger.addHandler(console) diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/core/__init__.py diff --git a/core/entry_filter.py b/core/entry_filter.py new file mode 100644 index 0000000..62ab9f5 --- /dev/null +++ b/core/entry_filter.py @@ -0,0 +1,31 @@ +import fnmatch + +def filter_entry(config, agent, entry): + start_with_list = [name[1]['title'] for name in config['agents'].items()] + style_block = [name[1]['style_block'] for name in config['agents'].items()] + [start_with_list.append('<pre') for i in style_block if i] + + # Todo Compatible with whitelist/blacklist parameter, to be removed + allow_list = agent[1].get('allow_list') if agent[1].get('allow_list') is not None else agent[1].get('whitelist') + deny_list = agent[1]['deny_list'] if agent[1].get('deny_list') is not None else agent[1].get('blacklist') + + # filter, if not content starts with start flag + if not entry['content'].startswith(tuple(start_with_list)): + + # filter, if in allow_list + if allow_list is not None: + if any(fnmatch.fnmatch(entry['feed']['site_url'], pattern) for pattern in allow_list): + return True + + # filter, if not in deny_list + elif deny_list is not None: + if any(fnmatch.fnmatch(entry['feed']['site_url'], pattern) for pattern in deny_list): + return False + else: + return True + + # filter, if allow_list and deny_list are both None + elif allow_list is None and deny_list is None: + return True + + return False @@ -1,7 +1,8 @@ import concurrent.futures import time -import logging import traceback +from common.logger import logger +from core.entry_filter import filter_entry import miniflux from markdownify import markdownify as md @@ -13,33 +14,16 @@ config = safe_load(open('config.yml', encoding='utf8')) miniflux_client = miniflux.Client(config['miniflux']['base_url'], api_key=config['miniflux']['api_key']) llm_client = OpenAI(base_url=config['llm']['base_url'], api_key=config['llm']['api_key']) -logger = logging.getLogger(__name__) -logger.setLevel(config.get('log_level', 'INFO')) -formatter = logging.Formatter('%(asctime)s - %(filename)s - %(lineno)d - %(levelname)s - %(message)s') -console = logging.StreamHandler() -console.setFormatter(formatter) -logger.addHandler(console) - def process_entry(entry): llm_result = '' - start_with_list = [name[1]['title'] for name in config['agents'].items()] - style_block = [name[1]['style_block'] for name in config['agents'].items()] - [start_with_list.append('<pre') for i in style_block if i] for agent in config['agents'].items(): - # Todo Compatible with whitelist/blacklist parameter, to be removed - allow_list = agent[1].get('allow_list') if agent[1].get('allow_list') is not None else agent[1].get('whitelist') - deny_list = agent[1]['deny_list'] if agent[1].get('deny_list') is not None else agent[1].get('blacklist') - messages = [ {"role": "system", "content": agent[1]['prompt']}, {"role": "user", "content": "The following is the input content:\n---\n " + md(entry['content']) } ] # filter, if AI is not generating, and in allow_list, or not in deny_list - if ((not entry['content'].startswith(tuple(start_with_list))) and - (((allow_list is not None) and (entry['feed']['site_url'] in allow_list)) or - (deny_list is not None and entry['feed']['site_url'] not in deny_list) or - (allow_list is None and deny_list is None))): + if filter_entry(config, agent, entry): completion = llm_client.chat.completions.create( model=config['llm']['model'], messages= messages, @@ -47,7 +31,7 @@ def process_entry(entry): ) response_content = completion.choices[0].message.content - logger.info(f"\nagents:{agent[0]} \nfeed_title:{entry['title']} \nresult:{response_content}") + logger.info(f"agents:{agent[0]} feed_title:{entry['title']} result:{response_content}") if agent[1]['style_block']: llm_result = (llm_result + '<pre style="white-space: pre-wrap;"><code>\n' diff --git a/tests/test_filter.py b/tests/test_filter.py new file mode 100644 index 0000000..5aa4add --- /dev/null +++ b/tests/test_filter.py @@ -0,0 +1,125 @@ +import unittest +from yaml import safe_load +from core.entry_filter import filter_entry + +test_config = ''' +{ + "test_style_block": { + "agents": { + "test": { + "title": "🌐AI 翻译", + "style_block": true, + "allow_list": , + "deny_list": + } + } + }, + "test_allow_list": { + "agents": { + "test": { + "title": "🌐AI 翻译", + "style_block": false, + "allow_list": [ + "https://9to5mac.com/", + "https://home.kpmg/*" + ], + "deny_list": + } + } + }, + "test_deny_list": { + "agents": { + "test": { + "title": "🌐AI 翻译", + "style_block": false, + "allow_list": , + "deny_list": [ + "https://9to5mac.com/", + "https://home.kpmg/cn/zh/home/insights.html" + ] + } + } + }, + "test_None": { + "agents": { + "test": { + "title": "🌐AI 翻译", + "style_block": false, + "allow_list": , + "deny_list": + } + } + } +} +''' + +test_entries = ''' +{ + "test_style_block": + { + "entry": + { + "content": '<pre', + "feed": + { + "site_url": "https://weibo.com/1906286443/OAih1wghK", + }, + }, + "result": False, + }, + "test_allow_list": + { + "entry": + { + "content": '123', + "feed": + { + "site_url": "https://home.kpmg/cn/zh/home/insights.html", + }, + }, + "result": True, + }, + "test_deny_list": + { + "entry": + { + "content": '123', + "feed": + { + "site_url": "https://weibo.com/1906286443/OAih1wghK", + }, + }, + "result": True, + }, + "test_None": + { + "entry": + { + "content": '123', + "feed": + { + "site_url": "https://weibo.com/1906286443/OAih1wghK", + }, + }, + "result": True, + }, +} + +''' + +configs = safe_load(test_config) +entries = safe_load(test_entries) + +class MyTestCase(unittest.TestCase): + def test_entry_filter(self): + i = 0 + + for agent in configs.items(): + entry = entries[list(configs.keys())[i]] + result = filter_entry(configs['test_style_block'], agent, entry['entry']) + self.assertEqual(result, entry['result']) + i += 1 + + +if __name__ == '__main__': + unittest.main() |