User:Jhedden/notes/ElasticSearch
Appearance
< User:Jhedden | notes
Script to remote reindex or compare indexes between clusters
#!/bin/bash
# Copyright 2020 Wikimedia Foundation Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
set -o errexit
set -o pipefail
set -o nounset
VERBOSE="false"
LOCAL_ES='http://localhost:9200'
REMOTE_ES='http://tools-elastic-01.tools.eqiad.wmflabs:80'
if [ "$1" != "" ]; then
REMOTE_PASS=$1
else
echo "ERROR: No remote password provided"
exit
fi
logmsg () {
# Always log changes, log everything else in verbose mode
level="$1"
msg="$2"
case $level in
CHANGE)
echo "$msg"
;;
*)
if [ "$VERBOSE" = "true" ]; then
echo "$msg"
fi
;;
esac
}
# Load all the indexes into an array
mapfile -t DATA <<< $(curl -s "$REMOTE_ES/_cat/indices?format=json" | jq -c '.[]')
for i in ${!DATA[@]}; do
unset INDEX HEALTH LOCAL_DATA LOCAL_INDEX
declare -A INDEX
while IFS='|' read -r key value; do
INDEX[name]=$key
INDEX[docs]=$value
done < <(echo ${DATA[${i}]} | jq -r '. | "\(.index)|\(."docs.count")"')
# Skip internal .tasks index
if [ "${INDEX[name]}" = ".tasks" ]; then
continue
fi
# Start a remote reindex if the index doesn't exist locally
logmsg "INFO" "Checking index: ${INDEX[name]}"
HEALTH=$(curl -s "$LOCAL_ES/_cat/indices/${INDEX[name]}?format=json" | jq -r '.status' 2>/dev/null || echo 0)
if [ $HEALTH = '404' ]; then
logmsg "CHANGE" "Starting remote reindex on ${INDEX[name]}"
curl -HContent-Type:application/json -XPOST $LOCAL_ES/_reindex?pretty -d'
{
"source": {
"remote": {
"host": "'"${REMOTE_ES}"'",
"username": "reindex",
"password": "'"${REMOTE_PASS}"'"
},
"index": "'"${INDEX[name]}"'",
"size": "200"
},
"dest": {
"index": "'"${INDEX[name]}"'"
}
}'
# Configure replicas on the new index
logmsg "CHANGE" "Adding replicas on ${INDEX[name]}"
curl -s -HContent-Type:application/json -XPUT $LOCAL_ES/${INDEX[name]}/_settings -d '{"index.number_of_replicas" : 2}'
else
# If the index exists locally, compare .docs.count between remote and local
logmsg "INFO" "Found existing index ${INDEX[name]} checking doc count"
mapfile -t LOCAL_DATA <<< $(curl -s "$LOCAL_ES/_cat/indices/${INDEX[name]}?format=json" | jq -c '.[]')
declare -A LOCAL_INDEX
while IFS='|' read -r key value; do
LOCAL_INDEX[name]=$key
LOCAL_INDEX[docs]=$value
done < <(echo ${LOCAL_DATA} | jq -r '. | "\(.index)|\(."docs.count")"')
if [ "${LOCAL_INDEX[docs]}" = "${INDEX[docs]}" ]; then
logmsg "INFO" "doc count in sync on index ${INDEX[name]}"
else
logmsg "CHANGE" "OUT OF SYNC index ${INDEX[name]} found: ${LOCAL_INDEX[docs]} expected: ${INDEX[docs]}"
fi
fi
done