p2pwiki-ai/src/mediawiki.py

"""MediaWiki API client for P2P Foundation Wiki."""

import http.cookiejar
from typing import Optional
import httpx

from .config import settings


class MediaWikiClient:
    """Client for interacting with MediaWiki API."""

    def __init__(self):
        self.api_url = settings.mediawiki_api_url
        self.cookie_file = settings.wiki_cookie_file
        self._cookies = None

    def _load_cookies(self) -> dict[str, str]:
        """Load cookies from Netscape cookie file."""
        if self._cookies is not None:
            return self._cookies

        cookies = {}
        if not self.cookie_file.exists():
            return cookies

        cj = http.cookiejar.MozillaCookieJar(str(self.cookie_file))
        try:
            cj.load(ignore_discard=True, ignore_expires=True)
            for cookie in cj:
                cookies[cookie.name] = cookie.value
        except Exception as e:
            print(f"Error loading cookies: {e}")

        self._cookies = cookies
        return cookies

    async def _api_call(self, params: dict, method: str = "GET") -> dict:
        """Make an API call to MediaWiki."""
        cookies = self._load_cookies()
        params["format"] = "json"

        async with httpx.AsyncClient(cookies=cookies, timeout=30.0) as client:
            if method == "GET":
                resp = await client.get(self.api_url, params=params)
            else:
                resp = await client.post(self.api_url, data=params)

            resp.raise_for_status()
            return resp.json()

    async def get_csrf_token(self) -> str:
        """Get a CSRF token for edit/move operations."""
        result = await self._api_call({
            "action": "query",
            "meta": "tokens",
            "type": "csrf"
        })
        return result.get("query", {}).get("tokens", {}).get("csrftoken", "")

    async def get_page_info(self, title: str) -> Optional[dict]:
        """Get information about a page."""
        result = await self._api_call({
            "action": "query",
            "titles": title,
            "prop": "info"
        })
        pages = result.get("query", {}).get("pages", {})
        for page_id, page_info in pages.items():
            if page_id != "-1":
                return page_info
        return None

    async def move_page(self, from_title: str, to_title: str, reason: str = "Approved draft article") -> dict:
        """Move a page from one title to another."""
        token = await self.get_csrf_token()
        if not token:
            return {"error": "Could not get CSRF token - not authenticated"}

        result = await self._api_call({
            "action": "move",
            "from": from_title,
            "to": to_title,
            "reason": reason,
            "movetalk": "1",
            "noredirect": "1",
            "token": token
        }, method="POST")

        return result

    async def get_page_content(self, title: str) -> Optional[str]:
        """Get the wikitext content of a page."""
        result = await self._api_call({
            "action": "query",
            "titles": title,
            "prop": "revisions",
            "rvprop": "content",
            "rvslots": "main"
        })
        pages = result.get("query", {}).get("pages", {})
        for page_id, page_info in pages.items():
            if page_id != "-1":
                revisions = page_info.get("revisions", [])
                if revisions:
                    slots = revisions[0].get("slots", {})
                    main_slot = slots.get("main", {})
                    return main_slot.get("*", "")
        return None

    async def edit_page(self, title: str, content: str, summary: str) -> dict:
        """Edit a page's content."""
        token = await self.get_csrf_token()
        if not token:
            return {"error": "Could not get CSRF token - not authenticated"}

        result = await self._api_call({
            "action": "edit",
            "title": title,
            "text": content,
            "summary": summary,
            "token": token
        }, method="POST")

        return result

    async def approve_draft(self, draft_title: str) -> dict:
        """
        Approve a draft article by moving it from Draft: namespace to main namespace.

        Args:
            draft_title: The title in Draft namespace (e.g., "Draft:Article_Name" or just "Article_Name")

        Returns:
            dict with success/error information
        """
        import re

        # Normalize the title
        if draft_title.startswith("Draft:"):
            from_title = draft_title
            to_title = draft_title[6:]  # Remove "Draft:" prefix
        else:
            from_title = f"Draft:{draft_title}"
            to_title = draft_title

        # Check if draft exists
        draft_info = await self.get_page_info(from_title)
        if not draft_info:
            return {"error": f"Draft page not found: {from_title}"}

        # Check if target already exists
        target_info = await self.get_page_info(to_title)
        if target_info:
            return {"error": f"Target page already exists: {to_title}"}

        # Move the page
        result = await self.move_page(from_title, to_title, "Draft approved by administrator")

        if "error" in result:
            return {"error": result["error"].get("info", "Move failed")}

        # Remove the {{Draft}} template from the approved article
        content = await self.get_page_content(to_title)
        if content:
            # Remove {{Draft|...}} template (handles various parameter formats)
            new_content = re.sub(r'\{\{Draft\|[^}]*\}\}\s*\n?', '', content, flags=re.IGNORECASE)
            new_content = re.sub(r'\{\{Draft\}\}\s*\n?', '', new_content, flags=re.IGNORECASE)

            if new_content != content:
                await self.edit_page(to_title, new_content, "Removed draft template after approval")

        return {
            "success": True,
            "from": from_title,
            "to": to_title,
            "url": f"https://wiki.p2pfoundation.net/{to_title.replace(' ', '_')}"
        }

    async def list_draft_articles(self) -> list[dict]:
        """List all articles in the Draft namespace pending review."""
        result = await self._api_call({
            "action": "query",
            "list": "categorymembers",
            "cmtitle": "Category:Draft articles pending review",
            "cmlimit": "100",
            "cmprop": "title|timestamp"
        })

        members = result.get("query", {}).get("categorymembers", [])
        return [{"title": m["title"], "timestamp": m.get("timestamp", "")} for m in members]

    async def check_auth(self) -> dict:
        """Check if we're authenticated and get user info."""
        result = await self._api_call({
            "action": "query",
            "meta": "userinfo",
            "uiprop": "groups|rights"
        })
        userinfo = result.get("query", {}).get("userinfo", {})
        rights = userinfo.get("rights", [])
        return {
            "authenticated": userinfo.get("id", 0) != 0,
            "username": userinfo.get("name", "Anonymous"),
            "groups": userinfo.get("groups", []),
            "rights": rights,
            "is_admin": "sysop" in userinfo.get("groups", []),
            "can_move": "move" in rights
        }


    async def login(self, username: str, password: str) -> dict:
        """
        Login to MediaWiki and save session cookies.

        Returns dict with success status and username.
        """
        # Get login token first
        result = await self._api_call({
            "action": "query",
            "meta": "tokens",
            "type": "login"
        })
        login_token = result.get("query", {}).get("tokens", {}).get("logintoken", "")

        if not login_token:
            return {"success": False, "error": "Could not get login token"}

        # Perform login
        async with httpx.AsyncClient(timeout=30.0) as client:
            resp = await client.post(
                self.api_url,
                data={
                    "action": "login",
                    "lgname": username,
                    "lgpassword": password,
                    "lgtoken": login_token,
                    "format": "json"
                }
            )
            resp.raise_for_status()
            data = resp.json()

            login_result = data.get("login", {})
            if login_result.get("result") == "Success":
                # Save cookies to file
                self._save_cookies(resp.cookies)
                self._cookies = None  # Clear cached cookies to reload
                return {
                    "success": True,
                    "username": login_result.get("lgusername")
                }
            else:
                return {
                    "success": False,
                    "error": login_result.get("reason", "Login failed")
                }

    def _save_cookies(self, cookies):
        """Save cookies to Netscape cookie file format."""
        with open(self.cookie_file, 'w') as f:
            f.write("# Netscape HTTP Cookie File\n")
            for name, value in cookies.items():
                # MediaWiki cookies are typically for the wiki domain
                domain = ".p2pfoundation.net"
                f.write(f"{domain}\tTRUE\t/\tFALSE\t0\t{name}\t{value}\n")


# Global client instance
wiki_client = MediaWikiClient()