aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Qetesh <4559341+Qetesh@users.noreply.github.com> 2024-10-12 01:08:06 +0800
committerGravatar Qetesh <4559341+Qetesh@users.noreply.github.com> 2024-10-12 01:08:06 +0800
commit92fa74553341d4cdac134b0cafdaf16fa94b3afa (patch)
tree1deb2b9ed46ec962d4d7bbb4d773b6cb7e699243
parente775f7b66c52cc2096938bfa2265a25248a3a7da (diff)
downloadminiflux-ai-92fa74553341d4cdac134b0cafdaf16fa94b3afa.tar.gz
miniflux-ai-92fa74553341d4cdac134b0cafdaf16fa94b3afa.tar.zst
miniflux-ai-92fa74553341d4cdac134b0cafdaf16fa94b3afa.zip
support Deny-/allow list wildcard url
-rw-r--r--common/logger.py10
-rw-r--r--core/__init__.py0
-rw-r--r--core/entry_filter.py31
-rw-r--r--main.py24
-rw-r--r--tests/test_filter.py125
5 files changed, 170 insertions, 20 deletions
diff --git a/common/logger.py b/common/logger.py
new file mode 100644
index 0000000..cd05a28
--- /dev/null
+++ b/common/logger.py
@@ -0,0 +1,10 @@
+import logging
+from yaml import safe_load
+
+config = safe_load(open('config.yml', encoding='utf8'))
+logger = logging.getLogger(__name__)
+logger.setLevel(config.get('log_level', 'INFO'))
+formatter = logging.Formatter('%(asctime)s - %(filename)s - %(lineno)d - %(levelname)s - %(message)s')
+console = logging.StreamHandler()
+console.setFormatter(formatter)
+logger.addHandler(console)
diff --git a/core/__init__.py b/core/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/core/__init__.py
diff --git a/core/entry_filter.py b/core/entry_filter.py
new file mode 100644
index 0000000..62ab9f5
--- /dev/null
+++ b/core/entry_filter.py
@@ -0,0 +1,31 @@
+import fnmatch
+
+def filter_entry(config, agent, entry):
+ start_with_list = [name[1]['title'] for name in config['agents'].items()]
+ style_block = [name[1]['style_block'] for name in config['agents'].items()]
+ [start_with_list.append('<pre') for i in style_block if i]
+
+ # Todo Compatible with whitelist/blacklist parameter, to be removed
+ allow_list = agent[1].get('allow_list') if agent[1].get('allow_list') is not None else agent[1].get('whitelist')
+ deny_list = agent[1]['deny_list'] if agent[1].get('deny_list') is not None else agent[1].get('blacklist')
+
+ # filter, if not content starts with start flag
+ if not entry['content'].startswith(tuple(start_with_list)):
+
+ # filter, if in allow_list
+ if allow_list is not None:
+ if any(fnmatch.fnmatch(entry['feed']['site_url'], pattern) for pattern in allow_list):
+ return True
+
+ # filter, if not in deny_list
+ elif deny_list is not None:
+ if any(fnmatch.fnmatch(entry['feed']['site_url'], pattern) for pattern in deny_list):
+ return False
+ else:
+ return True
+
+ # filter, if allow_list and deny_list are both None
+ elif allow_list is None and deny_list is None:
+ return True
+
+ return False
diff --git a/main.py b/main.py
index 9cf56c2..514f410 100644
--- a/main.py
+++ b/main.py
@@ -1,7 +1,8 @@
import concurrent.futures
import time
-import logging
import traceback
+from common.logger import logger
+from core.entry_filter import filter_entry
import miniflux
from markdownify import markdownify as md
@@ -13,33 +14,16 @@ config = safe_load(open('config.yml', encoding='utf8'))
miniflux_client = miniflux.Client(config['miniflux']['base_url'], api_key=config['miniflux']['api_key'])
llm_client = OpenAI(base_url=config['llm']['base_url'], api_key=config['llm']['api_key'])
-logger = logging.getLogger(__name__)
-logger.setLevel(config.get('log_level', 'INFO'))
-formatter = logging.Formatter('%(asctime)s - %(filename)s - %(lineno)d - %(levelname)s - %(message)s')
-console = logging.StreamHandler()
-console.setFormatter(formatter)
-logger.addHandler(console)
-
def process_entry(entry):
llm_result = ''
- start_with_list = [name[1]['title'] for name in config['agents'].items()]
- style_block = [name[1]['style_block'] for name in config['agents'].items()]
- [start_with_list.append('<pre') for i in style_block if i]
for agent in config['agents'].items():
- # Todo Compatible with whitelist/blacklist parameter, to be removed
- allow_list = agent[1].get('allow_list') if agent[1].get('allow_list') is not None else agent[1].get('whitelist')
- deny_list = agent[1]['deny_list'] if agent[1].get('deny_list') is not None else agent[1].get('blacklist')
-
messages = [
{"role": "system", "content": agent[1]['prompt']},
{"role": "user", "content": "The following is the input content:\n---\n " + md(entry['content']) }
]
# filter, if AI is not generating, and in allow_list, or not in deny_list
- if ((not entry['content'].startswith(tuple(start_with_list))) and
- (((allow_list is not None) and (entry['feed']['site_url'] in allow_list)) or
- (deny_list is not None and entry['feed']['site_url'] not in deny_list) or
- (allow_list is None and deny_list is None))):
+ if filter_entry(config, agent, entry):
completion = llm_client.chat.completions.create(
model=config['llm']['model'],
messages= messages,
@@ -47,7 +31,7 @@ def process_entry(entry):
)
response_content = completion.choices[0].message.content
- logger.info(f"\nagents:{agent[0]} \nfeed_title:{entry['title']} \nresult:{response_content}")
+ logger.info(f"agents:{agent[0]} feed_title:{entry['title']} result:{response_content}")
if agent[1]['style_block']:
llm_result = (llm_result + '<pre style="white-space: pre-wrap;"><code>\n'
diff --git a/tests/test_filter.py b/tests/test_filter.py
new file mode 100644
index 0000000..5aa4add
--- /dev/null
+++ b/tests/test_filter.py
@@ -0,0 +1,125 @@
+import unittest
+from yaml import safe_load
+from core.entry_filter import filter_entry
+
+test_config = '''
+{
+ "test_style_block": {
+ "agents": {
+ "test": {
+ "title": "🌐AI 翻译",
+ "style_block": true,
+ "allow_list": ,
+ "deny_list":
+ }
+ }
+ },
+ "test_allow_list": {
+ "agents": {
+ "test": {
+ "title": "🌐AI 翻译",
+ "style_block": false,
+ "allow_list": [
+ "https://9to5mac.com/",
+ "https://home.kpmg/*"
+ ],
+ "deny_list":
+ }
+ }
+ },
+ "test_deny_list": {
+ "agents": {
+ "test": {
+ "title": "🌐AI 翻译",
+ "style_block": false,
+ "allow_list": ,
+ "deny_list": [
+ "https://9to5mac.com/",
+ "https://home.kpmg/cn/zh/home/insights.html"
+ ]
+ }
+ }
+ },
+ "test_None": {
+ "agents": {
+ "test": {
+ "title": "🌐AI 翻译",
+ "style_block": false,
+ "allow_list": ,
+ "deny_list":
+ }
+ }
+ }
+}
+'''
+
+test_entries = '''
+{
+ "test_style_block":
+ {
+ "entry":
+ {
+ "content": '<pre',
+ "feed":
+ {
+ "site_url": "https://weibo.com/1906286443/OAih1wghK",
+ },
+ },
+ "result": False,
+ },
+ "test_allow_list":
+ {
+ "entry":
+ {
+ "content": '123',
+ "feed":
+ {
+ "site_url": "https://home.kpmg/cn/zh/home/insights.html",
+ },
+ },
+ "result": True,
+ },
+ "test_deny_list":
+ {
+ "entry":
+ {
+ "content": '123',
+ "feed":
+ {
+ "site_url": "https://weibo.com/1906286443/OAih1wghK",
+ },
+ },
+ "result": True,
+ },
+ "test_None":
+ {
+ "entry":
+ {
+ "content": '123',
+ "feed":
+ {
+ "site_url": "https://weibo.com/1906286443/OAih1wghK",
+ },
+ },
+ "result": True,
+ },
+}
+
+'''
+
+configs = safe_load(test_config)
+entries = safe_load(test_entries)
+
+class MyTestCase(unittest.TestCase):
+ def test_entry_filter(self):
+ i = 0
+
+ for agent in configs.items():
+ entry = entries[list(configs.keys())[i]]
+ result = filter_entry(configs['test_style_block'], agent, entry['entry'])
+ self.assertEqual(result, entry['result'])
+ i += 1
+
+
+if __name__ == '__main__':
+ unittest.main()