"""Configuration loading for self-bot."""
import logging
import os
import tomllib
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Optional
logger = logging.getLogger(__name__)
# Default values for read rules
DEFAULT_INCLUDE_PATTERNS = ["**/*.md", "**/*.txt"]
DEFAULT_EXCLUDE_PATTERNS = [
"**/docs/**",
"folderbot/**",
".folderbot/**",
"logs/**",
".git/**",
]
DEFAULT_APPEND_ALLOWED_PATTERNS = ["**/todo.md", "**/todos.md"]
# Default values for file watching
DEFAULT_WATCH_INCLUDE = ["**/*.md"]
DEFAULT_WATCH_EXCLUDE = [".git/**", ".folderbot/**", "**/__pycache__/**", "**/*.pyc"]
[docs]
@dataclass
class WatchConfig:
"""Configuration for file change watching.
The watcher is always running, but notifications are per-user
and controlled via enable_file_notifications/disable_file_notifications tools.
"""
include: list[str] = field(default_factory=lambda: DEFAULT_WATCH_INCLUDE.copy())
exclude: list[str] = field(default_factory=lambda: DEFAULT_WATCH_EXCLUDE.copy())
debounce_seconds: float = 2.0
[docs]
@dataclass
class ReadRules:
include: list[str] = field(default_factory=lambda: DEFAULT_INCLUDE_PATTERNS.copy())
exclude: list[str] = field(default_factory=lambda: DEFAULT_EXCLUDE_PATTERNS.copy())
append_allowed: list[str] = field(
default_factory=lambda: DEFAULT_APPEND_ALLOWED_PATTERNS.copy()
)
[docs]
def migrate_yaml_to_toml(yaml_path: Path) -> Path | None:
"""Migrate a YAML config file to TOML format.
Args:
yaml_path: Path to the .yaml config file.
Returns:
Path to the new .toml file, or None if migration was skipped.
"""
if not yaml_path.exists():
return None
toml_path = yaml_path.with_suffix(".toml")
if toml_path.exists():
return None # Don't overwrite existing TOML
try:
import yaml # Only needed for migration
except ImportError:
logger.warning(
f"Found {yaml_path} but pyyaml not installed. "
"Install pyyaml to auto-migrate: pip install pyyaml"
)
return None
try:
import tomlkit
with open(yaml_path) as f:
data = yaml.safe_load(f) or {}
toml_path.write_text(tomlkit.dumps(data))
yaml_path.unlink()
logger.info(f"Migrated {yaml_path} → {toml_path}")
return toml_path
except Exception as e:
logger.error(f"Failed to migrate {yaml_path}: {e}")
return None
[docs]
def find_config_path(start_dir: Optional[Path] = None) -> Path:
"""Find the config file at .folderbot/config.toml in the given directory.
If config.toml doesn't exist but config.yaml does, auto-migrates to TOML.
Args:
start_dir: Directory to check for .folderbot/. Defaults to Path.cwd().
Returns:
Path to .folderbot/config.toml (may not exist yet — caller should check).
"""
if start_dir is None:
start_dir = Path.cwd()
toml_path = start_dir / ".folderbot" / "config.toml"
if not toml_path.exists():
yaml_path = start_dir / ".folderbot" / "config.yaml"
if yaml_path.exists():
migrate_yaml_to_toml(yaml_path)
return toml_path
def _is_local_config(config_path: Path) -> bool:
"""Check if a config path is inside a .folderbot/ directory."""
return config_path.parent.name == ".folderbot"
[docs]
@dataclass
class Config:
telegram_token: str
root_folder: Path
allowed_user_ids: list[int]
api_key: str = ""
user_name: str = "User"
read_rules: ReadRules = field(default_factory=ReadRules)
watch_config: WatchConfig = field(default_factory=WatchConfig)
db_path: Path = field(default_factory=lambda: Path(".folderbot/sessions.db"))
todo_path: Path = field(default_factory=lambda: Path(".folderbot/todos.md"))
model: str = "anthropic/claude-sonnet-4-20250514"
max_context_chars: int = 100_000
max_history_chars: int = 50_000
whisper_model: str = "base"
google_api_key: str = ""
google_cx: str = ""
tools: dict[str, dict[str, Any]] = field(default_factory=lambda: {})
[docs]
@classmethod
def load(
cls, config_path: Optional[Path] = None, bot_name: Optional[str] = None
) -> "Config":
"""Load config from YAML file and/or environment variables.
Args:
config_path: Path to config file. If None, searches for
.folderbot/config.toml in PWD, then ~/.config/folderbot/config.toml.
bot_name: Name of bot to load (for multi-bot configs). If None and config
has multiple bots, raises an error. If config has only one bot,
it's selected automatically.
"""
raw_config: dict[str, Any] = {}
# Find config file if not explicitly provided
if config_path is None:
config_path = find_config_path()
if config_path.exists():
with open(config_path, "rb") as f:
try:
raw_config = tomllib.load(f)
except tomllib.TOMLDecodeError:
raw_config = {}
# Determine effective config (handle multi-bot structure)
config_data = cls._resolve_bot_config(raw_config, bot_name)
# Environment variables override YAML
telegram_token = os.environ.get("TELEGRAM_BOT_TOKEN") or config_data.get(
"telegram_token"
)
# Resolve API key: env var > provider-specific YAML key > generic api_key
model = os.environ.get("SELF_BOT_MODEL") or config_data.get(
"model", "anthropic/claude-sonnet-4-20250514"
)
provider = model.split("/")[0] if "/" in model else ""
# google_api_key is reserved for Google Custom Search, not the LLM key
provider_key_name = (
f"{provider}_api_key" if provider and provider != "google" else ""
)
api_key = (
os.environ.get("FOLDERBOT_API_KEY")
or (config_data.get(provider_key_name, "") if provider_key_name else "")
or config_data.get("api_key", "")
)
if not telegram_token:
raise ValueError("TELEGRAM_BOT_TOKEN not set (env var or config.toml)")
# Root folder — for local config, default to parent of .folderbot/
root_folder_str = os.environ.get("SELF_BOT_ROOT") or config_data.get(
"root_folder"
)
if root_folder_str:
root_folder = Path(root_folder_str).expanduser().resolve()
elif _is_local_config(config_path):
# .folderbot/config.toml → parent is .folderbot/ → parent.parent is root
root_folder = config_path.parent.parent.resolve()
else:
root_folder = Path.cwd()
# Allowed user IDs
allowed_ids_str = os.environ.get("SELF_BOT_ALLOWED_IDS")
if allowed_ids_str:
allowed_user_ids = [int(x.strip()) for x in allowed_ids_str.split(",")]
else:
allowed_user_ids = config_data.get("allowed_user_ids", [])
if not allowed_user_ids:
raise ValueError("No allowed_user_ids configured")
# Read rules
read_rules_data = config_data.get("read_rules", {})
read_rules = ReadRules(
include=read_rules_data.get("include", DEFAULT_INCLUDE_PATTERNS.copy()),
exclude=read_rules_data.get("exclude", DEFAULT_EXCLUDE_PATTERNS.copy()),
append_allowed=read_rules_data.get(
"append_allowed", DEFAULT_APPEND_ALLOWED_PATTERNS.copy()
),
)
# Watch config
watch_data = config_data.get("watch", {})
watch_config = WatchConfig(
include=watch_data.get("include", DEFAULT_WATCH_INCLUDE.copy()),
exclude=watch_data.get("exclude", DEFAULT_WATCH_EXCLUDE.copy()),
debounce_seconds=watch_data.get("debounce_seconds", 2.0),
)
# Other settings
user_name = config_data.get("user_name", "User")
db_path_str = os.environ.get("SELF_BOT_DB_PATH") or config_data.get("db_path")
if db_path_str:
db_path = Path(db_path_str).expanduser()
else:
db_path = root_folder / ".folderbot" / "sessions.db"
todo_path_str = os.environ.get("SELF_BOT_TODO_PATH") or config_data.get(
"todo_path"
)
if todo_path_str:
todo_path = Path(todo_path_str).expanduser()
else:
todo_path = root_folder / ".folderbot" / "todos.md"
max_context_chars = config_data.get("max_context_chars", 100_000)
max_history_chars = config_data.get("max_history_chars", 50_000)
# Parse [tools] section
tools: dict[str, dict[str, Any]] = dict(config_data.get("tools", {}))
# Whisper model: env > [tools.whisper].model > flat whisper_model > default
whisper_tool_cfg = tools.get("whisper", {})
whisper_model = whisper_tool_cfg.get(
"model", config_data.get("whisper_model", "base")
)
# Google keys: env > [tools.web_search].* > flat * > default
web_search_cfg = tools.get("web_search", {})
google_api_key = os.environ.get("GOOGLE_API_KEY") or web_search_cfg.get(
"google_api_key", config_data.get("google_api_key", "")
)
google_cx = os.environ.get("GOOGLE_CX") or web_search_cfg.get(
"google_cx", config_data.get("google_cx", "")
)
return cls(
telegram_token=telegram_token,
api_key=api_key,
root_folder=root_folder,
allowed_user_ids=allowed_user_ids,
user_name=user_name,
read_rules=read_rules,
watch_config=watch_config,
db_path=db_path,
todo_path=todo_path,
model=model,
max_context_chars=max_context_chars,
max_history_chars=max_history_chars,
whisper_model=whisper_model,
google_api_key=google_api_key,
google_cx=google_cx,
tools=tools,
)
@classmethod
def _resolve_bot_config(
cls, raw_config: dict[str, Any], bot_name: Optional[str]
) -> dict[str, Any]:
"""Resolve the effective config, handling multi-bot structure.
If raw_config has a 'bots' section, merge global settings with bot-specific.
Otherwise, return raw_config as-is (backward compatible flat format).
"""
bots = raw_config.get("bots")
if not bots:
# Flat config format (backward compatible)
return raw_config
# Multi-bot config format
if bot_name:
if bot_name not in bots:
available = ", ".join(bots.keys())
raise ValueError(
f"Bot '{bot_name}' not found in config. Available bots: {available}"
)
selected_bot = bot_name
elif len(bots) == 1:
# Auto-select the only bot
selected_bot = next(iter(bots.keys()))
else:
# Multiple bots, need to specify which one
available = ", ".join(bots.keys())
raise ValueError(
f"Config has multiple bots. Please specify which bot to run: {available}"
)
# Merge global settings with bot-specific settings
# Bot settings override global settings
bot_config: dict[str, Any] = bots[selected_bot]
merged: dict[str, Any] = {}
# Copy global settings (excluding 'bots' key)
for key, value in raw_config.items():
if key != "bots":
merged[key] = value
# Override with bot-specific settings
for key, value in bot_config.items():
merged[key] = value
return merged