add logger, llm timeout/max_workers, replace deny/allow_list

author: Qetesh <4559341+Qetesh@users.noreply.github.com> 2024-09-21 15:59:57 +0800
committer: Qetesh <4559341+Qetesh@users.noreply.github.com> 2024-09-21 15:59:57 +0800
commit: 2f366fed45a202db861ef1d869fef3899ee4b66b (patch)
tree: f3377762722963d40f98fb870567017e38634433
parent: 309e685238c3865d0bc841efb78f80f8d6633c02 (diff)
download: miniflux-ai-2f366fed45a202db861ef1d869fef3899ee4b66b.tar.gz
miniflux-ai-2f366fed45a202db861ef1d869fef3899ee4b66b.tar.zst
miniflux-ai-2f366fed45a202db861ef1d869fef3899ee4b66b.zip
3 files changed, 53 insertions, 22 deletions
diff --git a/README.md b/README.md
index dd83c1a..32608b0 100644
--- a/README.md
+++ b/README.md
@@ -28,11 +28,14 @@ This project fetches RSS subscription content from Miniflux via API and utilizes
 The repository includes a template configuration file: `config.sample.yml`. Modify the `config.yml` to set up:
 
 - **Miniflux**: Base URL and API key.
-- **LLM**: Model settings, API key, and endpoint.
-- **Agents**: Define each agent's prompt, whitelist/blacklist filters, and output style（`style_block` parameter controls whether the output is formatted as a code block in Markdown）.
+- **LLM**: Model settings, API key, and endpoint.Add timeout, max_workers parameters due to multithreading
+- **Agents**: Define each agent's prompt, allow_list/deny_list filters, and output style（`style_block` parameter controls whether the output is formatted as a code block in Markdown）.
 
 Example `config.yml`:
 ```yaml
+# INFO、DEBUG、WARN、ERROR
+log_level: "INFO"
+
 miniflux:
   base_url: https://your.server.com
   api_key: Miniflux API key here
@@ -41,21 +44,24 @@ llm:
   base_url: http://host.docker.internal:11434/v1
   api_key: ollama
   model: llama3.1:latest
+#  timeout: 60
+#  max_workers: 4
 
 agents:
   summary:
     title: "💡AI 摘要"
     prompt: "Please summarize the content of the article under 50 words in Chinese. Do not add any additional Character、markdown language to the result text. 请用不超过50个汉字概括文章内容。结果文本中不要添加任何额外的字符、Markdown语言。"
     style_block: true
-    blacklist:
+    deny_list:
       - https://xxxx.net
-    whitelist:
+    allow_list:
+
   translate:
     title: "🌐AI 翻译"
     prompt: "You are a highly skilled translation engine with expertise in the news media sector. Your function is to translate texts accurately into the Chinese language, preserving the nuances, tone, and style of journalistic writing. Do not add any explanations or annotations to the translated text."
     style_block: false
-    blacklist:
-    whitelist:
+    deny_list:
+    allow_list:
       - https://www.xxx.com/
 ```
 
diff --git a/config.sample.yml b/config.sample.yml
index a9d4c39..83e1436 100644
--- a/config.sample.yml
+++ b/config.sample.yml
@@ -1,3 +1,6 @@
+# INFO、DEBUG、WARN、ERROR
+log_level: "INFO"
+
 miniflux:
   base_url: https://your.server.com
   api_key: Miniflux API key here
@@ -6,19 +9,22 @@ llm:
   base_url: http://host.docker.internal:11434/v1
   api_key: ollama
   model: llama3.1:latest
+#  timeout: 60
+#  max_workers: 4
 
 agents:
   summary:
     title: "💡AI 摘要"
     prompt: "Please summarize the content of the article under 50 words in Chinese. Do not add any additional Character、markdown language to the result text. 请用不超过50个汉字概括文章内容。结果文本中不要添加任何额外的字符、Markdown语言。"
     style_block: true
-    blacklist:
+    deny_list:
       - https://xxxx.net
-    whitelist:
+    allow_list:
+
   translate:
     title: "🌐AI 翻译"
     prompt: "You are a highly skilled translation engine with expertise in the news media sector. Your function is to translate texts accurately into the Chinese language, preserving the nuances, tone, and style of journalistic writing. Do not add any explanations or annotations to the translated text."
     style_block: false
-    blacklist:
-    whitelist:
+    deny_list:
+    allow_list:
       - https://www.xxx.com/
 \ No newline at end of file
diff --git a/main.py b/main.py
index dc041b7..9cf56c2 100644
--- a/main.py
+++ b/main.py
@@ -1,16 +1,25 @@
 import concurrent.futures
 import time
+import logging
+import traceback
 
 import miniflux
 from markdownify import markdownify as md
 import markdown
 from openai import OpenAI
-from yaml import load, Loader
+from yaml import safe_load
 
-config = load(open('config.yml', encoding='utf8'), Loader=Loader)
+config = safe_load(open('config.yml', encoding='utf8'))
 miniflux_client = miniflux.Client(config['miniflux']['base_url'], api_key=config['miniflux']['api_key'])
 llm_client = OpenAI(base_url=config['llm']['base_url'], api_key=config['llm']['api_key'])
 
+logger = logging.getLogger(__name__)
+logger.setLevel(config.get('log_level', 'INFO'))
+formatter = logging.Formatter('%(asctime)s - %(filename)s - %(lineno)d - %(levelname)s - %(message)s')
+console = logging.StreamHandler()
+console.setFormatter(formatter)
+logger.addHandler(console)
+
 def process_entry(entry):
     llm_result = ''
     start_with_list = [name[1]['title'] for name in config['agents'].items()]
@@ -18,18 +27,27 @@ def process_entry(entry):
     [start_with_list.append('<pre') for i in style_block if i]
 
     for agent in config['agents'].items():
+        # Todo Compatible with whitelist/blacklist parameter, to be removed
+        allow_list = agent[1].get('allow_list') if agent[1].get('allow_list') is not None else agent[1].get('whitelist')
+        deny_list = agent[1]['deny_list'] if agent[1].get('deny_list') is not None else agent[1].get('blacklist')
+
         messages = [
             {"role": "system", "content": agent[1]['prompt']},
             {"role": "user", "content": "The following is the input content:\n---\n " + md(entry['content']) }
         ]
-        # filter, if AI is not generating, and in whitelist, or not in blacklist
+        # filter, if AI is not generating, and in allow_list, or not in deny_list
         if ((not entry['content'].startswith(tuple(start_with_list))) and
-                (((agent[1]['whitelist'] is not None) and (entry['feed']['site_url'] in agent[1]['whitelist'])) or
-                 (agent[1]['blacklist'] is not None and entry['feed']['site_url'] not in agent[1]['blacklist']) or
-                 (agent[1]['whitelist'] is None and agent[1]['blacklist'] is None))):
-            completion = llm_client.chat.completions.create( model=config['llm']['model'], messages= messages, timeout=15 )
+                (((allow_list is not None) and (entry['feed']['site_url'] in allow_list)) or
+                 (deny_list is not None and entry['feed']['site_url'] not in deny_list) or
+                 (allow_list is None and deny_list is None))):
+            completion = llm_client.chat.completions.create(
+                model=config['llm']['model'],
+                messages= messages,
+                timeout=config.get('llm', {}).get('timeout', 60)
+            )
+
             response_content = completion.choices[0].message.content
-            print(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime()), agent[0], entry['feed']['feed_url'], response_content)
+            logger.info(f"\nagents:{agent[0]} \nfeed_title:{entry['title']} \nresult:{response_content}")
 
             if agent[1]['style_block']:
                 llm_result = (llm_result + '<pre style="white-space: pre-wrap;"><code>\n'
@@ -45,16 +63,17 @@ def process_entry(entry):
 while True:
     entries = miniflux_client.get_entries(status=['unread'], limit=10000)
     start_time = time.time()
-    print(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime()), 'Fetched unread entries: ' + str(len(entries['entries']))) if len(entries['entries']) > 0 else print(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime()), 'No new entries')
+    logger.info('Fetched unread entries: ' + str(len(entries['entries']))) if len(entries['entries']) > 0 else logger.info('No new entries')
 
-    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
+    with concurrent.futures.ThreadPoolExecutor(max_workers=config.get('llm', {}).get('max_workers', 4)) as executor:
         futures = [executor.submit(process_entry, i) for i in entries['entries']]
         for future in concurrent.futures.as_completed(futures):
             try:
                 data = future.result()
             except Exception as e:
-                print('generated an exception: %s' % e)
+                logger.error(traceback.format_exc())
+                logger.error('generated an exception: %s' % e)
 
     if len(entries['entries']) > 0 and time.time() - start_time >= 3:
-        print(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime()), 'Done')
+        logger.info('Done')
     time.sleep(60)
 \ No newline at end of file
author	Qetesh <4559341+Qetesh@users.noreply.github.com>	2024-09-21 15:59:57 +0800
committer	Qetesh <4559341+Qetesh@users.noreply.github.com>	2024-09-21 15:59:57 +0800
commit	2f366fed45a202db861ef1d869fef3899ee4b66b (patch)
tree	f3377762722963d40f98fb870567017e38634433
parent	309e685238c3865d0bc841efb78f80f8d6633c02 (diff)
download	miniflux-ai-2f366fed45a202db861ef1d869fef3899ee4b66b.tar.gz miniflux-ai-2f366fed45a202db861ef1d869fef3899ee4b66b.tar.zst miniflux-ai-2f366fed45a202db861ef1d869fef3899ee4b66b.zip