"""
Feature flags synchronisation plugin (AV mode only).

In IM360 mode the Go resident-agent handles feature-flag sync.
In AV mode there is no resident-agent, so this plugin takes over.

Periodically POSTs the local file checksum to the API and writes
back any updated flags to ``/var/imunify360/feature_flags.json`` (legacy map
``{flag: true}`` on disk) and ``/var/imunify360/feature_flags`` (plain names,
one per line). The POSTed checksum is over the canonical JSON **array** of
enabled names, matching the correlation sync API—not over the on-disk map bytes.
"""

import asyncio
import json
import logging
import os
import urllib.error
import urllib.request

from defence360agent.contracts.config import Core
from defence360agent.contracts.plugins import MessageSource
from defence360agent.internals.feature_flags import (
    FLAGS_PATH,
    FLAGS_PLAIN_PATH,
    enabled_flag_names_sorted,
    legacy_feature_flags_map_bytes,
    plain_text_payload_for_enabled_flags,
    serialize_feature_flags_file_payload,
    sync_checksum_hex_from_flags_file,
)
from defence360agent.internals.iaid import (
    IAIDTokenError,
    IndependentAgentIDAPI,
)
from defence360agent.utils import Scope, atomic_rewrite

logger = logging.getLogger(__name__)

_SYNC_URL = "/api/sync/v1/feature-flags"


def _env_int(name: str, default: int) -> int:
    """Read an int env var tolerantly.

    A non-numeric value (empty string, typo, etc.) must NOT raise at
    import time — the plugin lives in the AV agent entry point and a
    bad env var would otherwise kill the whole agent.
    """
    raw = os.environ.get(name)
    if not raw:
        return default
    try:
        return int(raw)
    except ValueError:
        logger.warning(
            "feature-flags: %s=%r is not an int, using default %d",
            name,
            raw,
            default,
        )
        return default


_SYNC_INTERVAL = _env_int("I360_FEATURE_FLAGS_SYNC_INTERVAL_SEC", 3600)
_INITIAL_DELAY = 10
_UNREGISTERED_DELAY = 30
_HTTP_TIMEOUT = 30


class FeatureFlagsSync(MessageSource):
    SCOPE = Scope.AV

    async def create_source(self, loop, sink):
        self._loop = loop
        self._sink = sink
        self._task = loop.create_task(self._sync_loop())

    async def shutdown(self):
        if self._task is not None:
            self._task.cancel()
            try:
                await self._task
            except asyncio.CancelledError:
                pass

    def _local_checksum(self) -> str:
        return sync_checksum_hex_from_flags_file(FLAGS_PATH)

    async def _sync_loop(self):
        await asyncio.sleep(_INITIAL_DELAY)
        while True:
            delay = _SYNC_INTERVAL
            try:
                if not IndependentAgentIDAPI.is_registered():
                    delay = _UNREGISTERED_DELAY
                else:
                    delay = await self._do_sync() or _SYNC_INTERVAL
            except asyncio.CancelledError:
                raise
            except Exception:
                logger.warning("feature flags sync failed", exc_info=True)
            await asyncio.sleep(delay)

    async def _do_sync(self) -> int:
        try:
            token = await IndependentAgentIDAPI.get_token()
        except IAIDTokenError:
            logger.warning("no IAID token, skipping feature flags sync")
            return 0

        loop = asyncio.get_event_loop()
        checksum = await loop.run_in_executor(None, self._local_checksum)
        payload = json.dumps({"checksum": checksum}).encode()

        url = Core.API_BASE_URL.rstrip("/") + _SYNC_URL
        req = urllib.request.Request(
            url,
            data=payload,
            headers={
                "Content-Type": "application/json",
                "X-Auth": token,
            },
            method="POST",
        )

        try:
            resp_body = await loop.run_in_executor(
                None, self._blocking_request, req
            )
        except urllib.error.HTTPError as e:
            # Non-5xx (404/403/4xx) is usually a server-side routing or
            # auth state, not an agent bug — keep it a one-line WARNING.
            # 5xx means the server actually misbehaved; keep the traceback.
            if 500 <= e.code < 600:
                logger.error(
                    "feature flags sync HTTP %s on %s: %s",
                    e.code,
                    url,
                    e.reason,
                    exc_info=e,
                )
            else:
                logger.warning(
                    "feature flags sync HTTP %s on %s: %s",
                    e.code,
                    url,
                    e.reason,
                )
            return 0
        except urllib.error.URLError as e:
            # DNS, connection refused, TLS, timeout — transient network
            # conditions, not bugs. One-line WARNING so logs stay readable.
            logger.warning(
                "feature flags sync connection failed on %s: %s",
                url,
                e.reason,
            )
            return 0
        except Exception:
            logger.error(
                "feature flags sync request failed on %s",
                url,
                exc_info=True,
            )
            return 0

        try:
            result = json.loads(resp_body)
        except json.JSONDecodeError:
            logger.error("failed to parse feature flags response")
            return 0

        server_delay = result.get("delay", 0)

        if result.get("changed") is False:
            logger.debug("feature flags unchanged, skipping write")
            return server_delay

        flags = result.get("flags")
        if flags is not None:
            await loop.run_in_executor(None, self._write_flags, flags)
        return server_delay

    @staticmethod
    def _blocking_request(req: urllib.request.Request) -> bytes:
        with urllib.request.urlopen(req, timeout=_HTTP_TIMEOUT) as resp:
            return resp.read()

    @staticmethod
    def _write_flags(flags) -> None:
        """Persist flags: map on disk; checksum for next sync uses canonical array."""
        try:
            if isinstance(flags, list):
                data = legacy_feature_flags_map_bytes(flags)
            else:
                data = serialize_feature_flags_file_payload(flags)
        except TypeError:
            logger.warning(
                "feature flags sync: unexpected flags type %r, skipping write",
                type(flags).__name__,
            )
            return
        n_active = len(enabled_flag_names_sorted(flags))
        try:
            os.makedirs(os.path.dirname(FLAGS_PATH), exist_ok=True)
            # Atomic write-to-temp + rename so a crash mid-write can't
            # leave the flags file truncated/corrupt — otherwise readers
            # would fall back to defaults until the next sync.
            atomic_rewrite(FLAGS_PATH, data, backup=False)
            plain = plain_text_payload_for_enabled_flags(flags)
            atomic_rewrite(FLAGS_PLAIN_PATH, plain, backup=False)
            logger.info("feature flags synced: %d flags active", n_active)
        except OSError:
            logger.error("failed to write flags file", exc_info=True)
