Optimize, use ymlv0.3

author: Qetesh <4559341+Qetesh@users.noreply.github.com> 2024-08-19 16:50:11 +0800
committer: Qetesh <4559341+Qetesh@users.noreply.github.com> 2024-08-19 16:50:11 +0800
commit: 8f77416b1790b877cbbca1f1eca5d320c5a693fa (patch)
tree: edcb99b6c780636aa4ff2ae78c794ce810041684
parent: ed78d9a9006b367452dd8711176dc23330edc320 (diff)
download: miniflux-ai-8f77416b1790b877cbbca1f1eca5d320c5a693fa.tar.gz
miniflux-ai-8f77416b1790b877cbbca1f1eca5d320c5a693fa.tar.zst
miniflux-ai-8f77416b1790b877cbbca1f1eca5d320c5a693fa.zip
5 files changed, 225 insertions, 85 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..287a2f0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,162 @@
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
diff --git a/config.sample.yml b/config.sample.yml
new file mode 100644
index 0000000..d56863f
--- /dev/null
+++ b/config.sample.yml
@@ -0,0 +1,24 @@
+miniflux:
+  base_url: https://your.server.com
+  api_key: Miniflux API key here
+
+llm:
+  base_url: http://host.docker.internal:11434/v1
+  api_key: ollama
+  model: llama3.1:latest
+
+agents:
+  summary:
+    title: "💡AI 摘要"
+    prompt: "Please summarize the content of the article in 50 words in Chinese.  Do not add any explanations or annotations to the result text. 请用 50 个字的中文总结文章的内容。"
+    style_block: true
+    blacklist:
+      - https://xxxx.net
+    whitelist:
+  translate:
+    title: "🌐AI 翻译"
+    prompt: "You are a highly skilled translation engine with expertise in the news media sector. Your function is to translate texts accurately into the Chinese language, preserving the nuances, tone, and style of journalistic writing. Do not add any explanations or annotations to the translated text."
+    style_block: false
+    blacklist:
+    whitelist:
+      - https://www.xxx.com/
+\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index dae993c..1aa8e7d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -6,5 +6,5 @@ services:
         restart: always
         environment:
             TZ: Asia/Shanghai
-        env_file:
-            - .env
-\ No newline at end of file
+        volumes:
+            - ./config.yml:/app/config.yml
+\ No newline at end of file
diff --git a/main.py b/main.py
index 17b24ff..9180760 100644
--- a/main.py
+++ b/main.py
@@ -1,94 +1,47 @@
-import os
+import concurrent.futures
 import time
 
-from openai import OpenAI
 import miniflux
 from markdownify import markdownify as md
-import concurrent.futures
-
-miniflux_base_url = os.getenv('miniflux_base_url')
-miniflux_api_key = os.getenv('miniflux_api_key')
-llm_base_url = os.getenv('llm_base_url')
-llm_api_key = os.getenv('llm_api_key')
-llm_model = os.getenv('llm_model')
-feed_blacklist = os.getenv('feed_blacklist').split(',')
+from openai import OpenAI
+from yaml import load, Loader
 
-miniflux_client = miniflux.Client(miniflux_base_url, api_key=miniflux_api_key)
-llm_client = OpenAI(base_url=llm_base_url, api_key=llm_api_key)
+config = load(open('config.yml', encoding='utf8'), Loader=Loader)
+miniflux_client = miniflux.Client(config['miniflux']['base_url'], api_key=config['miniflux']['api_key'])
+llm_client = OpenAI(base_url=config['llm']['base_url'], api_key=config['llm']['api_key'])
 
 def process_entry(entry):
-    if (not entry['content'].startswith('摘要')) & (entry['feed']['site_url'] not in feed_blacklist):
-        completion = llm_client.chat.completions.create(
-            model=llm_model,
-            messages=[
-                {
-                    "role": "system",
-                    "content": (
-                        "You are a highly skilled AI assistant capable of understanding and summarizing complex content from various "
-                        "Your task is to read the provided content, understand the main points, and produce a concise summary in Chinese."
-                        "Limit the summary to 50 words and 2 sentences. Do not add any additional text."
-                    )
-                },
-                {
-                    "role": "user",
-                    "content": (
-                        "Summarize the following content in Chinese: 'The latest advancements in AI chip technology have enabled "
-                        "faster processing speeds and lower energy consumption. These innovations are paving the way for more efficient "
-                        "machine learning models, and companies are rapidly adopting these technologies to stay competitive.'"
-                    )
-                },
-                {
-                    "role": "assistant",
-                    "content": (
-                        "最新的AI芯片技术取得了突破，使处理速度更快、能耗更低。这些创新为更高效的机器学习模型铺平了道路，企业纷纷采用这些技术以保持竞争力。"
-                    )
-                },
-                {
-                    "role": "user",
-                    "content": (
-                        "Summarize the following content in Chinese: 'The government has announced new policies aimed at reducing "
-                        "carbon emissions by 2030. These measures include investing in renewable energy, imposing stricter regulations "
-                        "on industries, and promoting electric vehicles. Experts believe these policies will significantly reduce the "
-                        "country's carbon footprint.'"
-                    )
-                },
-                {
-                    "role": "assistant",
-                    "content": (
-                        "政府宣布了到2030年减少碳排放的新政策，包括投资可再生能源、加强行业监管和推广电动汽车。专家认为这些政策将显著减少国家的碳足迹。"
-                    )
-                },
-                {
-                    "role": "user",
-                    "content": (
-                        "Summarize the following content in Chinese: 'Participants are debating the pros and cons of remote work. "
-                        "Some argue that it increases productivity and work-life balance, while others believe it leads to isolation and "
-                        "decreased collaboration. Overall, the consensus is that remote work is beneficial if managed properly.'"
-                    )
-                },
-                {
-                    "role": "assistant",
-                    "content": (
-                        "论坛讨论了远程工作的利弊。有人认为它提高了生产力和平衡了工作与生活，有人则认为它导致孤立和减少了协作。总体而言，大家认为远程工作在管理得当的情况下是有益的。"
-                    )
-                },
-                {
-                    "role": "user",
-                    "content": (
-                            "Summarize the following content in Chinese: '" + md(entry['content']) + "'"
-                    )
-                }
-            ]
-        )
-        llm_result = completion.choices[0].message.content
-        print(llm_result)
-        miniflux_client.update_entry(entry['id'], content='摘要：' + llm_result + '<hr><br />' + entry['content'])
-    return None
+    llm_result = ''
+    start_with_list = [name[1]['title'] for name in config['agents'].items()]
+    style_block = [name[1]['style_block'] for name in config['agents'].items()]
+    [start_with_list.append('<pre') for i in style_block if i]
+
+    for agent in config['agents'].items():
+        messages = [{"role": "system", "content": agent[1]['prompt']}, {"role": "user", "content": "The following is the input content:\n---\n " + md(entry['content'])}]
+        # filter, if AI is not generating, and in whitelist, or not in blacklist
+        if ((not entry['content'].startswith(tuple(start_with_list))) and
+                (((agent[1]['whitelist'] is not None) and (entry['feed']['site_url'] in agent[1]['whitelist'])) or
+                 (agent[1]['blacklist'] is not None and entry['feed']['site_url'] not in agent[1]['blacklist']) or
+                 (agent[1]['whitelist'] is None and agent[1]['blacklist'] is None))):
+            completion = llm_client.chat.completions.create( model=config['llm']['model'], messages= messages )
+            response_content = completion.choices[0].message.content
+            print(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime()), agent[0], entry['feed']['feed_url'], response_content)
+
+            if agent[1]['style_block']:
+                llm_result = llm_result + f"<pre style=\"white-space: pre-wrap;\"><code>\n{agent[1]['title']}：{response_content}</code></pre><hr><br />"
+            else:
+                llm_result = llm_result + f"{agent[1]['title']}：{response_content}<hr><br />"
+
+    if len(llm_result) > 0:
+        miniflux_client.update_entry(entry['id'], content= llm_result + entry['content'])
 
 while True:
     # Fetch entries with status unread
     entries = miniflux_client.get_entries(status=['unread'], limit=10000)
-
+    print(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime()), 'Fetched new entries: ' + str(len(entries['entries']))) if len(entries['entries']) > 0 else print(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime()), 'No new entries')
     with concurrent.futures.ThreadPoolExecutor() as executor:
         futures = [executor.submit(process_entry, i) for i in entries['entries']]
+
+    if len(entries['entries']) > 0:
+        print(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime()), 'Done')
     time.sleep(60)
 \ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index dff3612..4596549 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
-miniflux~=1.0.1
-openai~=1.40.3
-markdownify~=0.13.1
-\ No newline at end of file
+miniflux
+openai
+markdownify
+PyYAML
+\ No newline at end of file
author	Qetesh <4559341+Qetesh@users.noreply.github.com>	2024-08-19 16:50:11 +0800
committer	Qetesh <4559341+Qetesh@users.noreply.github.com>	2024-08-19 16:50:11 +0800
commit	8f77416b1790b877cbbca1f1eca5d320c5a693fa (patch)
tree	edcb99b6c780636aa4ff2ae78c794ce810041684
parent	ed78d9a9006b367452dd8711176dc23330edc320 (diff)
download	miniflux-ai-8f77416b1790b877cbbca1f1eca5d320c5a693fa.tar.gz miniflux-ai-8f77416b1790b877cbbca1f1eca5d320c5a693fa.tar.zst miniflux-ai-8f77416b1790b877cbbca1f1eca5d320c5a693fa.zip