p2pwiki-ai/src/mediawiki.py

271 lines
9.3 KiB
Python

"""MediaWiki API client for P2P Foundation Wiki."""
import http.cookiejar
from typing import Optional
import httpx
from .config import settings
class MediaWikiClient:
"""Client for interacting with MediaWiki API."""
def __init__(self):
self.api_url = settings.mediawiki_api_url
self.cookie_file = settings.wiki_cookie_file
self._cookies = None
def _load_cookies(self) -> dict[str, str]:
"""Load cookies from Netscape cookie file."""
if self._cookies is not None:
return self._cookies
cookies = {}
if not self.cookie_file.exists():
return cookies
cj = http.cookiejar.MozillaCookieJar(str(self.cookie_file))
try:
cj.load(ignore_discard=True, ignore_expires=True)
for cookie in cj:
cookies[cookie.name] = cookie.value
except Exception as e:
print(f"Error loading cookies: {e}")
self._cookies = cookies
return cookies
async def _api_call(self, params: dict, method: str = "GET") -> dict:
"""Make an API call to MediaWiki."""
cookies = self._load_cookies()
params["format"] = "json"
async with httpx.AsyncClient(cookies=cookies, timeout=30.0) as client:
if method == "GET":
resp = await client.get(self.api_url, params=params)
else:
resp = await client.post(self.api_url, data=params)
resp.raise_for_status()
return resp.json()
async def get_csrf_token(self) -> str:
"""Get a CSRF token for edit/move operations."""
result = await self._api_call({
"action": "query",
"meta": "tokens",
"type": "csrf"
})
return result.get("query", {}).get("tokens", {}).get("csrftoken", "")
async def get_page_info(self, title: str) -> Optional[dict]:
"""Get information about a page."""
result = await self._api_call({
"action": "query",
"titles": title,
"prop": "info"
})
pages = result.get("query", {}).get("pages", {})
for page_id, page_info in pages.items():
if page_id != "-1":
return page_info
return None
async def move_page(self, from_title: str, to_title: str, reason: str = "Approved draft article") -> dict:
"""Move a page from one title to another."""
token = await self.get_csrf_token()
if not token:
return {"error": "Could not get CSRF token - not authenticated"}
result = await self._api_call({
"action": "move",
"from": from_title,
"to": to_title,
"reason": reason,
"movetalk": "1",
"noredirect": "1",
"token": token
}, method="POST")
return result
async def get_page_content(self, title: str) -> Optional[str]:
"""Get the wikitext content of a page."""
result = await self._api_call({
"action": "query",
"titles": title,
"prop": "revisions",
"rvprop": "content",
"rvslots": "main"
})
pages = result.get("query", {}).get("pages", {})
for page_id, page_info in pages.items():
if page_id != "-1":
revisions = page_info.get("revisions", [])
if revisions:
slots = revisions[0].get("slots", {})
main_slot = slots.get("main", {})
return main_slot.get("*", "")
return None
async def edit_page(self, title: str, content: str, summary: str) -> dict:
"""Edit a page's content."""
token = await self.get_csrf_token()
if not token:
return {"error": "Could not get CSRF token - not authenticated"}
result = await self._api_call({
"action": "edit",
"title": title,
"text": content,
"summary": summary,
"token": token
}, method="POST")
return result
async def approve_draft(self, draft_title: str) -> dict:
"""
Approve a draft article by moving it from Draft: namespace to main namespace.
Args:
draft_title: The title in Draft namespace (e.g., "Draft:Article_Name" or just "Article_Name")
Returns:
dict with success/error information
"""
import re
# Normalize the title
if draft_title.startswith("Draft:"):
from_title = draft_title
to_title = draft_title[6:] # Remove "Draft:" prefix
else:
from_title = f"Draft:{draft_title}"
to_title = draft_title
# Check if draft exists
draft_info = await self.get_page_info(from_title)
if not draft_info:
return {"error": f"Draft page not found: {from_title}"}
# Check if target already exists
target_info = await self.get_page_info(to_title)
if target_info:
return {"error": f"Target page already exists: {to_title}"}
# Move the page
result = await self.move_page(from_title, to_title, "Draft approved by administrator")
if "error" in result:
return {"error": result["error"].get("info", "Move failed")}
# Remove the {{Draft}} template from the approved article
content = await self.get_page_content(to_title)
if content:
# Remove {{Draft|...}} template (handles various parameter formats)
new_content = re.sub(r'\{\{Draft\|[^}]*\}\}\s*\n?', '', content, flags=re.IGNORECASE)
new_content = re.sub(r'\{\{Draft\}\}\s*\n?', '', new_content, flags=re.IGNORECASE)
if new_content != content:
await self.edit_page(to_title, new_content, "Removed draft template after approval")
return {
"success": True,
"from": from_title,
"to": to_title,
"url": f"https://wiki.p2pfoundation.net/{to_title.replace(' ', '_')}"
}
async def list_draft_articles(self) -> list[dict]:
"""List all articles in the Draft namespace pending review."""
result = await self._api_call({
"action": "query",
"list": "categorymembers",
"cmtitle": "Category:Draft articles pending review",
"cmlimit": "100",
"cmprop": "title|timestamp"
})
members = result.get("query", {}).get("categorymembers", [])
return [{"title": m["title"], "timestamp": m.get("timestamp", "")} for m in members]
async def check_auth(self) -> dict:
"""Check if we're authenticated and get user info."""
result = await self._api_call({
"action": "query",
"meta": "userinfo",
"uiprop": "groups|rights"
})
userinfo = result.get("query", {}).get("userinfo", {})
rights = userinfo.get("rights", [])
return {
"authenticated": userinfo.get("id", 0) != 0,
"username": userinfo.get("name", "Anonymous"),
"groups": userinfo.get("groups", []),
"rights": rights,
"is_admin": "sysop" in userinfo.get("groups", []),
"can_move": "move" in rights
}
async def login(self, username: str, password: str) -> dict:
"""
Login to MediaWiki and save session cookies.
Returns dict with success status and username.
"""
# Get login token first
result = await self._api_call({
"action": "query",
"meta": "tokens",
"type": "login"
})
login_token = result.get("query", {}).get("tokens", {}).get("logintoken", "")
if not login_token:
return {"success": False, "error": "Could not get login token"}
# Perform login
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(
self.api_url,
data={
"action": "login",
"lgname": username,
"lgpassword": password,
"lgtoken": login_token,
"format": "json"
}
)
resp.raise_for_status()
data = resp.json()
login_result = data.get("login", {})
if login_result.get("result") == "Success":
# Save cookies to file
self._save_cookies(resp.cookies)
self._cookies = None # Clear cached cookies to reload
return {
"success": True,
"username": login_result.get("lgusername")
}
else:
return {
"success": False,
"error": login_result.get("reason", "Login failed")
}
def _save_cookies(self, cookies):
"""Save cookies to Netscape cookie file format."""
with open(self.cookie_file, 'w') as f:
f.write("# Netscape HTTP Cookie File\n")
for name, value in cookies.items():
# MediaWiki cookies are typically for the wiki domain
domain = ".p2pfoundation.net"
f.write(f"{domain}\tTRUE\t/\tFALSE\t0\t{name}\t{value}\n")
# Global client instance
wiki_client = MediaWikiClient()