#!/bin/sh # IDfr 2022/03 # Debuggage #set -x #Conf Part BLUE='\033[1;36m' NC='\033[0m' filePattern="analyse-logs.json" filePatternReadyForCurl="analyse-logs_curlReady.json" dashboardTemplateFolder=./kibanatemplates genericDashboardTemplate="dashboard-generique.ndjson" detailDashboardTemplate="dashboard-detail.ndjson" chunksMaxLines=4000 #Help and args check if [ "$#" -ne 2 ] then echo "" echo "${BLUE}IDfr${NC} - Sisyphe Corpus indexation - 2022/03" echo "" echo "${BLUE}#################${NC}" echo "Please add args" echo "${BLUE}#################${NC}" echo "" echo "Script Usage : indexCorpus.sh \$FullPath \$Type" echo " \$FullPath = full path to json out file" echo " \$Type = analysis type ${BLUE}generique|detail${NC}" echo "" echo "Exemple : $0 /applis/panist/home/sisyphe_out/1660119726-elsevier2019-2022-08-10-generique/analyse-logs.json generique" echo "" exit fi fullPath=$1 analysisType=$2 #Check if $fullPath exists if [ -f $fullPath ] then outPath=`dirname $fullPath` outCurlFolder="${outPath}/out.curl" #extract json file name from path" fileName=$(echo $fullPath | awk -F "/" '{print $NF}') #check if json file = $filePattern if [ "$fileName" = "$filePattern" ] then #get corpus name from path (remove timestamp) corpusName=$(echo $fullPath | awk -F "/" '{print $(NF-1)}' | cut -d '-' -f2- ) #get log file folder fullPathFolder=$(dirname $fullPath) echo "${BLUE}#################${NC}" echo "Corpus : "$corpusName echo "${BLUE}#################${NC}" #create buffer folder for json convert and kibana dashboard template mkdir -p $outCurlFolder # Suppression index elasticsearch du meme nom si existant curl -k --noproxy '*' -XDELETE $ELASTIC_URL":"$ELASTIC_PORT"/analyse-"$corpusName > /dev/null 2>&1 #Check if import file is too big - https://www.elastic.co/guide/en/app-search/current/limits.html - Split in chunks with $chunksMaxLines max lines nbLinesImport=`wc -l $fullPath | awk '{print $1}'` if [ "$nbLinesImport" -gt "$chunksMaxLines" ] then echo $fullPath" has "$nbLinesImport" lines, splitting it before indexing" mkdir -p $outCurlFolder"/split" split -a 4 -l $chunksMaxLines $fullPath $outCurlFolder"/split/split" echo "Converting file for bulk" for chunk in `ls $outCurlFolder"/split/"` do #echo "indexing "$chunk #convert json file to json ready for bulk sed 's/{"corpusname":/{ "index" : {} }\n{"corpusname":/gi' $outCurlFolder"/split/"$chunk > $outCurlFolder"/"$chunk"-curlReady" curl -k --noproxy '*' -XPOST $ELASTIC_URL":"$ELASTIC_PORT"/analyse-"$corpusName"/_bulk?pretty" -H "Content-Type: application/json" --data-binary "@"$outCurlFolder"/"$chunk"-curlReady" > /dev/null 2>&1 rm $outCurlFolder"/"$chunk"-curlReady" done rm -rf $outCurlFolder"/split" else echo "Sending to Index" #convert json file to json ready for bulk sed 's/{"corpusname":/{ "index" : {} }\n{"corpusname":/gi' $fullPath > $outCurlFolder"/"$filePatternReadyForCurl curl -k --noproxy '*' -XPOST $ELASTIC_URL":"$ELASTIC_PORT"/analyse-"$corpusName"/_bulk?pretty" -H "Content-Type: application/json" --data-binary "@"$outCurlFolder"/"$filePatternReadyForCurl > /dev/null 2>&1 fi #Import Dashboard - check type first if [ "$analysisType" = "generique" ] then echo "Generic Analysis" cat $dashboardTemplateFolder"/"$genericDashboardTemplate | sed "s/CORPUSNAMEREPLACE/"$corpusName"/" | sed "s/DASHBOARDTITLEGEN/"$corpusName"/" > $outCurlFolder"/dashboardGEN.ndjson" echo "Creating dashboard" curl -k --noproxy '*' -X POST $ELASTIC_URL":"$KIBANA_PORT"/api/saved_objects/_import?createNewCopies=true" -H "kbn-xsrf: true" --form "file=@"$outCurlFolder"/dashboardGEN.ndjson" elif [ "$analysisType" = "detaille" ] then echo "Detailled dashboard" cat $dashboardTemplateFolder"/"$detailDashboardTemplate | sed "s/CORPUSNAMEREPLACE/"$corpusName"/" | sed "s/DASHBOARDTITLEGEN/"$corpusName"/" > $outCurlFolder"/dashboardDET.ndjson" echo "Creating dashboard" curl -k --noproxy '*' -X POST $ELASTIC_URL":"$KIBANA_PORT"/api/saved_objects/_import?createNewCopies=true" -H "kbn-xsrf: true" --form "file=@"$outCurlFolder"/dashboardDET.ndjson" else echo "Unknown Analysis type : "$analysisType fi else echo "Unexpected file name : "$fileName" instead of "$filePattern exit fi else echo "File is missing" exit fi #Removing buffer folder rm -rf $outCurlFolder