Source code for folderbot.config

"""Configuration loading for self-bot."""

import logging
import os
import tomllib
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Optional

logger = logging.getLogger(__name__)


# Default values for read rules
DEFAULT_INCLUDE_PATTERNS = ["**/*.md", "**/*.txt"]
DEFAULT_EXCLUDE_PATTERNS = [
    "**/docs/**",
    "folderbot/**",
    ".folderbot/**",
    "logs/**",
    ".git/**",
]
DEFAULT_APPEND_ALLOWED_PATTERNS = ["**/todo.md", "**/todos.md"]

# Default values for file watching
DEFAULT_WATCH_INCLUDE = ["**/*.md"]
DEFAULT_WATCH_EXCLUDE = [".git/**", ".folderbot/**", "**/__pycache__/**", "**/*.pyc"]


[docs] @dataclass class WatchConfig: """Configuration for file change watching. The watcher is always running, but notifications are per-user and controlled via enable_file_notifications/disable_file_notifications tools. """ include: list[str] = field(default_factory=lambda: DEFAULT_WATCH_INCLUDE.copy()) exclude: list[str] = field(default_factory=lambda: DEFAULT_WATCH_EXCLUDE.copy()) debounce_seconds: float = 2.0
[docs] @dataclass class ReadRules: include: list[str] = field(default_factory=lambda: DEFAULT_INCLUDE_PATTERNS.copy()) exclude: list[str] = field(default_factory=lambda: DEFAULT_EXCLUDE_PATTERNS.copy()) append_allowed: list[str] = field( default_factory=lambda: DEFAULT_APPEND_ALLOWED_PATTERNS.copy() )
[docs] def migrate_yaml_to_toml(yaml_path: Path) -> Path | None: """Migrate a YAML config file to TOML format. Args: yaml_path: Path to the .yaml config file. Returns: Path to the new .toml file, or None if migration was skipped. """ if not yaml_path.exists(): return None toml_path = yaml_path.with_suffix(".toml") if toml_path.exists(): return None # Don't overwrite existing TOML try: import yaml # Only needed for migration except ImportError: logger.warning( f"Found {yaml_path} but pyyaml not installed. " "Install pyyaml to auto-migrate: pip install pyyaml" ) return None try: import tomlkit with open(yaml_path) as f: data = yaml.safe_load(f) or {} toml_path.write_text(tomlkit.dumps(data)) yaml_path.unlink() logger.info(f"Migrated {yaml_path}{toml_path}") return toml_path except Exception as e: logger.error(f"Failed to migrate {yaml_path}: {e}") return None
[docs] def find_config_path(start_dir: Optional[Path] = None) -> Path: """Find the config file at .folderbot/config.toml in the given directory. If config.toml doesn't exist but config.yaml does, auto-migrates to TOML. Args: start_dir: Directory to check for .folderbot/. Defaults to Path.cwd(). Returns: Path to .folderbot/config.toml (may not exist yet — caller should check). """ if start_dir is None: start_dir = Path.cwd() toml_path = start_dir / ".folderbot" / "config.toml" if not toml_path.exists(): yaml_path = start_dir / ".folderbot" / "config.yaml" if yaml_path.exists(): migrate_yaml_to_toml(yaml_path) return toml_path
def _is_local_config(config_path: Path) -> bool: """Check if a config path is inside a .folderbot/ directory.""" return config_path.parent.name == ".folderbot"
[docs] @dataclass class Config: telegram_token: str root_folder: Path allowed_user_ids: list[int] api_key: str = "" user_name: str = "User" read_rules: ReadRules = field(default_factory=ReadRules) watch_config: WatchConfig = field(default_factory=WatchConfig) db_path: Path = field(default_factory=lambda: Path(".folderbot/sessions.db")) todo_path: Path = field(default_factory=lambda: Path(".folderbot/todos.md")) model: str = "anthropic/claude-sonnet-4-20250514" max_context_chars: int = 100_000 max_history_chars: int = 50_000 whisper_model: str = "base" google_api_key: str = "" google_cx: str = "" tools: dict[str, dict[str, Any]] = field(default_factory=lambda: {})
[docs] @classmethod def load( cls, config_path: Optional[Path] = None, bot_name: Optional[str] = None ) -> "Config": """Load config from YAML file and/or environment variables. Args: config_path: Path to config file. If None, searches for .folderbot/config.toml in PWD, then ~/.config/folderbot/config.toml. bot_name: Name of bot to load (for multi-bot configs). If None and config has multiple bots, raises an error. If config has only one bot, it's selected automatically. """ raw_config: dict[str, Any] = {} # Find config file if not explicitly provided if config_path is None: config_path = find_config_path() if config_path.exists(): with open(config_path, "rb") as f: try: raw_config = tomllib.load(f) except tomllib.TOMLDecodeError: raw_config = {} # Determine effective config (handle multi-bot structure) config_data = cls._resolve_bot_config(raw_config, bot_name) # Environment variables override YAML telegram_token = os.environ.get("TELEGRAM_BOT_TOKEN") or config_data.get( "telegram_token" ) # Resolve API key: env var > provider-specific YAML key > generic api_key model = os.environ.get("SELF_BOT_MODEL") or config_data.get( "model", "anthropic/claude-sonnet-4-20250514" ) provider = model.split("/")[0] if "/" in model else "" # google_api_key is reserved for Google Custom Search, not the LLM key provider_key_name = ( f"{provider}_api_key" if provider and provider != "google" else "" ) api_key = ( os.environ.get("FOLDERBOT_API_KEY") or (config_data.get(provider_key_name, "") if provider_key_name else "") or config_data.get("api_key", "") ) if not telegram_token: raise ValueError("TELEGRAM_BOT_TOKEN not set (env var or config.toml)") # Root folder — for local config, default to parent of .folderbot/ root_folder_str = os.environ.get("SELF_BOT_ROOT") or config_data.get( "root_folder" ) if root_folder_str: root_folder = Path(root_folder_str).expanduser().resolve() elif _is_local_config(config_path): # .folderbot/config.toml → parent is .folderbot/ → parent.parent is root root_folder = config_path.parent.parent.resolve() else: root_folder = Path.cwd() # Allowed user IDs allowed_ids_str = os.environ.get("SELF_BOT_ALLOWED_IDS") if allowed_ids_str: allowed_user_ids = [int(x.strip()) for x in allowed_ids_str.split(",")] else: allowed_user_ids = config_data.get("allowed_user_ids", []) if not allowed_user_ids: raise ValueError("No allowed_user_ids configured") # Read rules read_rules_data = config_data.get("read_rules", {}) read_rules = ReadRules( include=read_rules_data.get("include", DEFAULT_INCLUDE_PATTERNS.copy()), exclude=read_rules_data.get("exclude", DEFAULT_EXCLUDE_PATTERNS.copy()), append_allowed=read_rules_data.get( "append_allowed", DEFAULT_APPEND_ALLOWED_PATTERNS.copy() ), ) # Watch config watch_data = config_data.get("watch", {}) watch_config = WatchConfig( include=watch_data.get("include", DEFAULT_WATCH_INCLUDE.copy()), exclude=watch_data.get("exclude", DEFAULT_WATCH_EXCLUDE.copy()), debounce_seconds=watch_data.get("debounce_seconds", 2.0), ) # Other settings user_name = config_data.get("user_name", "User") db_path_str = os.environ.get("SELF_BOT_DB_PATH") or config_data.get("db_path") if db_path_str: db_path = Path(db_path_str).expanduser() else: db_path = root_folder / ".folderbot" / "sessions.db" todo_path_str = os.environ.get("SELF_BOT_TODO_PATH") or config_data.get( "todo_path" ) if todo_path_str: todo_path = Path(todo_path_str).expanduser() else: todo_path = root_folder / ".folderbot" / "todos.md" max_context_chars = config_data.get("max_context_chars", 100_000) max_history_chars = config_data.get("max_history_chars", 50_000) # Parse [tools] section tools: dict[str, dict[str, Any]] = dict(config_data.get("tools", {})) # Whisper model: env > [tools.whisper].model > flat whisper_model > default whisper_tool_cfg = tools.get("whisper", {}) whisper_model = whisper_tool_cfg.get( "model", config_data.get("whisper_model", "base") ) # Google keys: env > [tools.web_search].* > flat * > default web_search_cfg = tools.get("web_search", {}) google_api_key = os.environ.get("GOOGLE_API_KEY") or web_search_cfg.get( "google_api_key", config_data.get("google_api_key", "") ) google_cx = os.environ.get("GOOGLE_CX") or web_search_cfg.get( "google_cx", config_data.get("google_cx", "") ) return cls( telegram_token=telegram_token, api_key=api_key, root_folder=root_folder, allowed_user_ids=allowed_user_ids, user_name=user_name, read_rules=read_rules, watch_config=watch_config, db_path=db_path, todo_path=todo_path, model=model, max_context_chars=max_context_chars, max_history_chars=max_history_chars, whisper_model=whisper_model, google_api_key=google_api_key, google_cx=google_cx, tools=tools, )
@classmethod def _resolve_bot_config( cls, raw_config: dict[str, Any], bot_name: Optional[str] ) -> dict[str, Any]: """Resolve the effective config, handling multi-bot structure. If raw_config has a 'bots' section, merge global settings with bot-specific. Otherwise, return raw_config as-is (backward compatible flat format). """ bots = raw_config.get("bots") if not bots: # Flat config format (backward compatible) return raw_config # Multi-bot config format if bot_name: if bot_name not in bots: available = ", ".join(bots.keys()) raise ValueError( f"Bot '{bot_name}' not found in config. Available bots: {available}" ) selected_bot = bot_name elif len(bots) == 1: # Auto-select the only bot selected_bot = next(iter(bots.keys())) else: # Multiple bots, need to specify which one available = ", ".join(bots.keys()) raise ValueError( f"Config has multiple bots. Please specify which bot to run: {available}" ) # Merge global settings with bot-specific settings # Bot settings override global settings bot_config: dict[str, Any] = bots[selected_bot] merged: dict[str, Any] = {} # Copy global settings (excluding 'bots' key) for key, value in raw_config.items(): if key != "bots": merged[key] = value # Override with bot-specific settings for key, value in bot_config.items(): merged[key] = value return merged