From 6e52f7b20bb455ac826e7e5649ad093a7ac31157 Mon Sep 17 00:00:00 2001 From: Jeff Emmett Date: Sat, 21 Feb 2026 14:41:32 -0700 Subject: [PATCH] Add CirrusSearch deployment script and backlog tasks Adds install-cirrussearch.sh to replace MySQL full-text search with Elasticsearch, fixing stopword issues (e.g. "Will Ruddick" returning 0 results). Also adds backlog tasks for HitCounters (done) and CirrusSearch installation. Co-Authored-By: Claude Opus 4.6 --- backlog/config.yml | 1 + ...rs-extension-and-restore-page-view-data.md | 37 +++ ...Search-Elasticsearch-to-fix-wiki-search.md | 41 +++ wiki_deploy/install-cirrussearch.sh | 296 ++++++++++++++++++ 4 files changed, 375 insertions(+) create mode 100644 backlog/tasks/task-2 - Install-HitCounters-extension-and-restore-page-view-data.md create mode 100644 backlog/tasks/task-3 - Install-CirrusSearch-Elasticsearch-to-fix-wiki-search.md create mode 100755 wiki_deploy/install-cirrussearch.sh diff --git a/backlog/config.yml b/backlog/config.yml index 0ba0890..005b61c 100644 --- a/backlog/config.yml +++ b/backlog/config.yml @@ -13,3 +13,4 @@ auto_commit: false bypass_git_hooks: false check_active_branches: true active_branch_days: 30 +task_prefix: "task" diff --git a/backlog/tasks/task-2 - Install-HitCounters-extension-and-restore-page-view-data.md b/backlog/tasks/task-2 - Install-HitCounters-extension-and-restore-page-view-data.md new file mode 100644 index 0000000..5c34b86 --- /dev/null +++ b/backlog/tasks/task-2 - Install-HitCounters-extension-and-restore-page-view-data.md @@ -0,0 +1,37 @@ +--- +id: task-2 +title: Install HitCounters extension and restore page view data +status: Done +assignee: [] +created_date: '2026-02-13 04:03' +updated_date: '2026-02-13 04:03' +labels: + - wiki + - extension + - mediawiki +dependencies: [] +priority: medium +--- + +## Description + + +Install the HitCounters MediaWiki extension on the p2pwiki (wiki.p2pfoundation.net) to restore the visitor counter from the old deployment. Restore 106M+ historical page views from the database backup. + + +## Acceptance Criteria + +- [x] #1 HitCounters v0.3.4 extension installed on p2pwiki container (MW 1.40) +- [x] #2 Extension configured in LocalSettings.php with $wgDisableCounters = false +- [x] #3 Database tables created via maintenance/update.php +- [x] #4 Historical page view data preserved (45,484 pages, 106M+ views) +- [x] #5 Page footer shows 'This page has been accessed X times' +- [x] #6 Special:PopularPages works and lists most viewed pages +- [x] #7 Deprecation warning (Language::convert) fixed with LanguageConverterFactory + + +## Implementation Notes + + +Cloned HitCounters REL1_40 branch into p2pwiki container extensions volume. Added wfLoadExtension('HitCounters') and $wgDisableCounters=false to LocalSettings.php at /opt/websites/p2pwiki/. Ran maintenance/update.php - hit_counter tables already existed from DB import with 45,484 pages and 106,355,286 total views. Fixed SpecialPopularPages.php deprecation by replacing Language::convert with MediaWikiServices::getInstance()->getLanguageConverterFactory()->getLanguageConverter(). Committed and pushed to Gitea (7633ea6). + diff --git a/backlog/tasks/task-3 - Install-CirrusSearch-Elasticsearch-to-fix-wiki-search.md b/backlog/tasks/task-3 - Install-CirrusSearch-Elasticsearch-to-fix-wiki-search.md new file mode 100644 index 0000000..ebbc4f1 --- /dev/null +++ b/backlog/tasks/task-3 - Install-CirrusSearch-Elasticsearch-to-fix-wiki-search.md @@ -0,0 +1,41 @@ +--- +id: TASK-3 +title: Install CirrusSearch (Elasticsearch) to fix wiki search +status: To Do +assignee: [] +created_date: '2026-02-21 20:56' +labels: + - wiki + - search + - extension + - mediawiki + - elasticsearch +dependencies: [] +references: + - wiki_deploy/install-cirrussearch.sh + - 'https://www.mediawiki.org/wiki/Extension:CirrusSearch' + - 'https://www.mediawiki.org/wiki/Extension:Elastica' +priority: high +--- + +## Description + + +Wiki search at wiki.p2pfoundation.net fails for queries containing MySQL stopwords. Searching "Will Ruddick" returns 0 results because MySQL treats "will" as a stopword, even though there are 80+ articles mentioning Ruddick. + +Install CirrusSearch extension backed by Elasticsearch to replace MySQL full-text search. This provides fuzzy matching, proper relevance ranking, and no stopword issues. + +Deployment script: wiki_deploy/install-cirrussearch.sh +Run on Netcup server via interactive SSH session. + + +## Acceptance Criteria + +- [ ] #1 CirrusSearch and Elastica extensions installed (REL1_40) on p2pwiki container +- [ ] #2 Elasticsearch 7.10.2 running as p2pwiki-elasticsearch container on p2pwiki-internal network +- [ ] #3 Search index built for all ~23k articles +- [ ] #4 Search for 'Will Ruddick' returns results (previously 0 due to MySQL stopword) +- [ ] #5 Search for 'about governance' returns results +- [ ] #6 CirrusSearch visible on Special:Version page +- [ ] #7 Real-time search index updates enabled after initial indexing + diff --git a/wiki_deploy/install-cirrussearch.sh b/wiki_deploy/install-cirrussearch.sh new file mode 100755 index 0000000..4e60948 --- /dev/null +++ b/wiki_deploy/install-cirrussearch.sh @@ -0,0 +1,296 @@ +#!/bin/bash +# +# Install CirrusSearch (Elasticsearch) on P2P Wiki +# +# Replaces MySQL full-text search with Elasticsearch to fix stopword issues +# (e.g. "Will Ruddick" returns 0 results because MySQL drops "will"). +# +# Run on the Netcup server via interactive SSH: +# scp wiki_deploy/install-cirrussearch.sh netcup:/tmp/ +# ssh netcup +# bash /tmp/install-cirrussearch.sh +# +# Rollback: +# cd /opt/websites/p2pwiki +# cp LocalSettings.php.pre-cirrussearch LocalSettings.php +# rm docker-compose.override.yml +# docker compose restart p2pwiki +# docker compose stop p2pwiki-elasticsearch && docker compose rm -f p2pwiki-elasticsearch +# + +set -euo pipefail + +WIKI_DIR="/opt/websites/p2pwiki" +COMPOSE_FILE="$WIKI_DIR/docker-compose.yml" +LOCAL_SETTINGS="$WIKI_DIR/LocalSettings.php" +WIKI_CONTAINER="p2pwiki" +ES_CONTAINER="p2pwiki-elasticsearch" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +log() { echo -e "${GREEN}[INFO]${NC} $*"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +err() { echo -e "${RED}[ERROR]${NC} $*" >&2; } + +die() { err "$@"; exit 1; } + +# --- Preflight checks --- + +log "=== CirrusSearch Installation for P2P Wiki ===" +echo + +[ -f "$COMPOSE_FILE" ] || die "docker-compose.yml not found at $COMPOSE_FILE" +[ -f "$LOCAL_SETTINGS" ] || die "LocalSettings.php not found at $LOCAL_SETTINGS" + +# Check if CirrusSearch is already configured +if grep -q 'CirrusSearch' "$LOCAL_SETTINGS" 2>/dev/null; then + die "CirrusSearch already appears in LocalSettings.php. Aborting." +fi + +# Check if ES service already exists in compose file +if grep -q "$ES_CONTAINER" "$COMPOSE_FILE" 2>/dev/null; then + die "Elasticsearch service already in docker-compose.yml. Aborting." +fi + +# --- Step 1: Backup LocalSettings.php --- + +log "Step 1: Backing up LocalSettings.php..." +cp "$LOCAL_SETTINGS" "$LOCAL_SETTINGS.pre-cirrussearch" +log "Backup saved to $LOCAL_SETTINGS.pre-cirrussearch" + +# --- Step 2: Add Elasticsearch to docker-compose.yml --- + +log "Step 2: Adding Elasticsearch service to docker-compose.yml..." + +# Back up compose file too +cp "$COMPOSE_FILE" "$COMPOSE_FILE.pre-cirrussearch" + +# Write a separate compose override file for Elasticsearch. +# Docker Compose merges docker-compose.override.yml automatically. +cat > "$WIKI_DIR/docker-compose.override.yml" <<'OVERRIDE_EOF' +services: + p2pwiki-elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2 + container_name: p2pwiki-elasticsearch + restart: unless-stopped + environment: + discovery.type: single-node + ES_JAVA_OPTS: "-Xms1g -Xmx1g" + xpack.security.enabled: "false" + bootstrap.memory_lock: "true" + ulimits: + memlock: + soft: -1 + hard: -1 + volumes: + - p2pwiki-es-data:/usr/share/elasticsearch/data + networks: + - p2pwiki-internal + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost:9200/_cluster/health || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + + p2pwiki: + depends_on: + - p2pwiki-db + - p2pwiki-elasticsearch + +volumes: + p2pwiki-es-data: + +networks: + p2pwiki-internal: +OVERRIDE_EOF + +log "Created docker-compose.override.yml with Elasticsearch service" +log "Verifying compose config merges correctly..." +cd "$WIKI_DIR" +docker compose config > /dev/null || die "Compose config validation failed. Check docker-compose.override.yml" +log "Compose config validated" + +# --- Step 3: Start Elasticsearch --- + +log "Step 3: Starting Elasticsearch..." +cd "$WIKI_DIR" +docker compose up -d p2pwiki-elasticsearch + +log "Waiting for Elasticsearch to be ready..." +RETRIES=0 +MAX_RETRIES=60 +until docker exec "$ES_CONTAINER" curl -sf http://localhost:9200/_cluster/health >/dev/null 2>&1; do + RETRIES=$((RETRIES + 1)) + if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then + die "Elasticsearch failed to start after ${MAX_RETRIES}s. Check: docker logs $ES_CONTAINER" + fi + sleep 2 + printf '.' +done +echo +log "Elasticsearch is ready!" + +# Show cluster health +docker exec "$ES_CONTAINER" curl -s http://localhost:9200/_cluster/health?pretty + +# --- Step 4: Install extensions --- + +log "Step 4: Installing Elastica and CirrusSearch extensions..." + +# Ensure git is available in the wiki container +if ! docker exec "$WIKI_CONTAINER" which git >/dev/null 2>&1; then + log "Installing git in wiki container..." + docker exec "$WIKI_CONTAINER" apt-get update -qq + docker exec "$WIKI_CONTAINER" apt-get install -y -qq git >/dev/null 2>&1 +fi + +# Ensure composer is available +if ! docker exec "$WIKI_CONTAINER" which composer >/dev/null 2>&1; then + log "Installing composer in wiki container..." + docker exec "$WIKI_CONTAINER" bash -c 'curl -sS https://getcomposer.org/installer | php -- --install-dir=/usr/local/bin --filename=composer' +fi + +# Install Elastica +if docker exec "$WIKI_CONTAINER" test -d /var/www/html/extensions/Elastica; then + warn "Elastica extension already exists, skipping clone" +else + log "Cloning Elastica (REL1_40)..." + docker exec "$WIKI_CONTAINER" git clone --depth 1 -b REL1_40 \ + https://gerrit.wikimedia.org/r/mediawiki/extensions/Elastica.git \ + /var/www/html/extensions/Elastica +fi + +log "Installing Elastica dependencies..." +docker exec -w /var/www/html/extensions/Elastica "$WIKI_CONTAINER" \ + composer install --no-dev --no-interaction --quiet + +# Install CirrusSearch +if docker exec "$WIKI_CONTAINER" test -d /var/www/html/extensions/CirrusSearch; then + warn "CirrusSearch extension already exists, skipping clone" +else + log "Cloning CirrusSearch (REL1_40)..." + docker exec "$WIKI_CONTAINER" git clone --depth 1 -b REL1_40 \ + https://gerrit.wikimedia.org/r/mediawiki/extensions/CirrusSearch.git \ + /var/www/html/extensions/CirrusSearch +fi + +log "Installing CirrusSearch dependencies..." +docker exec -w /var/www/html/extensions/CirrusSearch "$WIKI_CONTAINER" \ + composer install --no-dev --no-interaction --quiet + +log "Extensions installed successfully" + +# --- Step 5: Configure CirrusSearch in LocalSettings.php --- + +log "Step 5: Configuring CirrusSearch in LocalSettings.php..." + +cat >> "$LOCAL_SETTINGS" <<'PHPEOF' + +# --- CirrusSearch (Elasticsearch) --- +# Replaces MySQL full-text search for better results, fuzzy matching, no stopwords +wfLoadExtension( 'Elastica' ); +wfLoadExtension( 'CirrusSearch' ); +$wgCirrusSearchServers = [ 'p2pwiki-elasticsearch' ]; +$wgSearchType = 'CirrusSearch'; +$wgCirrusSearchIndexBaseName = 'p2pwiki'; + +# Temporarily disable real-time search updates during initial indexing +$wgDisableSearchUpdate = true; +PHPEOF + +log "LocalSettings.php updated" + +# --- Step 6: Restart wiki to load new config --- + +log "Step 6: Restarting wiki container..." +docker compose restart "$WIKI_CONTAINER" + +# Wait for wiki to be ready +sleep 10 +RETRIES=0 +until docker exec "$WIKI_CONTAINER" curl -sf http://localhost/wiki/Special:Version >/dev/null 2>&1; do + RETRIES=$((RETRIES + 1)) + if [ "$RETRIES" -ge 30 ]; then + warn "Wiki may not be fully ready yet, proceeding anyway..." + break + fi + sleep 2 + printf '.' +done +echo + +# --- Step 7: Build search index --- + +log "Step 7: Building search index (this may take 5-20 minutes)..." +echo + +log "Creating Elasticsearch index mappings..." +docker exec "$WIKI_CONTAINER" php /var/www/html/extensions/CirrusSearch/maintenance/UpdateSearchIndexConfig.php + +log "Indexing page content..." +docker exec "$WIKI_CONTAINER" php /var/www/html/extensions/CirrusSearch/maintenance/ForceSearchIndex.php \ + --skipLinks --indexOnSkip + +log "Indexing link data..." +docker exec "$WIKI_CONTAINER" php /var/www/html/extensions/CirrusSearch/maintenance/ForceSearchIndex.php \ + --skipParse + +log "Search index built successfully!" + +# --- Step 8: Enable real-time search updates --- + +log "Step 8: Enabling real-time search updates..." + +# Remove the temporary disable line +sed -i '/^\$wgDisableSearchUpdate = true;$/d' "$LOCAL_SETTINGS" +# Also remove the comment above it if present +sed -i '/^# Temporarily disable real-time search updates during initial indexing$/d' "$LOCAL_SETTINGS" + +docker compose restart "$WIKI_CONTAINER" +sleep 10 + +# --- Step 9: Verify --- + +log "Step 9: Verifying search works..." +echo + +# Test search via the MediaWiki API +API_RESULT=$(docker exec "$WIKI_CONTAINER" curl -sf \ + 'http://localhost/api.php?action=query&list=search&srsearch=Will+Ruddick&format=json' 2>/dev/null || true) + +if [ -n "$API_RESULT" ]; then + # Extract totalhits using grep (works without jq/python) + TOTAL_HITS=$(echo "$API_RESULT" | grep -oP '"totalhits":\s*\K[0-9]+' || echo "0") + if [ "$TOTAL_HITS" -gt 0 ] 2>/dev/null; then + log "Search is working! 'Will Ruddick' returned $TOTAL_HITS results" + # Show first few title matches + echo "$API_RESULT" | grep -oP '"title":\s*"\K[^"]+' | head -5 | while read -r title; do + echo " - $title" + done + else + warn "Search returned 0 results. The index may still be building." + warn "Wait a minute and try: https://wiki.p2pfoundation.net/index.php?search=Will+Ruddick" + fi +else + warn "Could not reach the search API. Check manually:" + warn " https://wiki.p2pfoundation.net/index.php?search=Will+Ruddick" +fi + +echo +log "=== CirrusSearch installation complete! ===" +echo +echo "Verify at:" +echo " - Search: https://wiki.p2pfoundation.net/index.php?search=Will+Ruddick" +echo " - Version: https://wiki.p2pfoundation.net/wiki/Special:Version" +echo +echo "Rollback if needed:" +echo " cd $WIKI_DIR" +echo " cp LocalSettings.php.pre-cirrussearch LocalSettings.php" +echo " rm docker-compose.override.yml" +echo " docker compose restart p2pwiki" +echo " docker compose stop p2pwiki-elasticsearch && docker compose rm -f p2pwiki-elasticsearch"