Newer
Older
sisyphe-go / indexCorpus.sh
#!/bin/sh
# IDfr 2022/03

# Debuggage
#set -x

#Conf Part
BLUE='\033[1;36m'
NC='\033[0m'
filePattern="analyse-logs.json"
filePatternReadyForCurl="analyse-logs_curlReady.json"
dashboardTemplateFolder=./kibanatemplates
genericDashboardTemplate="dashboard-generique.ndjson"
detailDashboardTemplate="dashboard-detail.ndjson"
chunksMaxLines=4000

#Help and args check
if [ "$#" -ne 2 ]
	then
	echo ""
	echo  "${BLUE}IDfr${NC} - Sisyphe Corpus indexation - 2022/03"
	echo ""
	echo "${BLUE}#################${NC}"
	echo "Please add args"
	echo "${BLUE}#################${NC}"
	echo ""
	echo "Script Usage : indexCorpus.sh \$FullPath \$Type"
	echo " \$FullPath = full path to json out file"
	echo " \$Type = analysis type ${BLUE}generique|detail${NC}"
	echo ""
	echo "Exemple : $0 /applis/panist/home/sisyphe_out/1660119726-elsevier2019-2022-08-10-generique/analyse-logs.json generique"
	echo ""
	exit
fi
fullPath=$1
analysisType=$2

#Check if $fullPath exists
if [ -f $fullPath ]
	then
	outPath=`dirname $fullPath`
	outCurlFolder="${outPath}/out.curl"
	#extract json file name from path"
	fileName=$(echo $fullPath |  awk -F "/" '{print $NF}')
	#check if json file = $filePattern
	if [ "$fileName" = "$filePattern" ]
		then
		#get corpus name from path (remove timestamp)
		corpusName=$(echo $fullPath |  awk -F "/" '{print $(NF-1)}' | cut -d '-' -f2-  )
		#get log file folder
		fullPathFolder=$(dirname $fullPath)
		echo "${BLUE}#################${NC}"
		echo "Corpus : "$corpusName
		echo "${BLUE}#################${NC}"
		#create buffer folder for json convert and kibana dashboard template
		mkdir -p $outCurlFolder
		# Suppression index elasticsearch du meme nom si existant
		curl -k --noproxy '*' -XDELETE $ELASTIC_URL":"$ELASTIC_PORT"/analyse-"$corpusName > /dev/null 2>&1		
		#Check if import file is too big - https://www.elastic.co/guide/en/app-search/current/limits.html - Split in chunks with $chunksMaxLines max lines
		nbLinesImport=`wc -l $fullPath | awk '{print $1}'`
		if [ "$nbLinesImport" -gt "$chunksMaxLines" ]
			then
			echo $fullPath" has "$nbLinesImport" lines, splitting it before indexing"
			mkdir -p $outCurlFolder"/split"
			split -a 4 -l $chunksMaxLines $fullPath $outCurlFolder"/split/split"
			echo "Converting file for bulk"
			for chunk in `ls $outCurlFolder"/split/"`
				do #echo "indexing "$chunk
				#convert json file to json ready for bulk
				sed 's/{"corpusname":/{ "index" : {} }\n{"corpusname":/gi' $outCurlFolder"/split/"$chunk > $outCurlFolder"/"$chunk"-curlReady"
				curl -k --noproxy '*' -XPOST $ELASTIC_URL":"$ELASTIC_PORT"/analyse-"$corpusName"/_bulk?pretty" -H "Content-Type: application/json" --data-binary "@"$outCurlFolder"/"$chunk"-curlReady" > /dev/null 2>&1
				rm $outCurlFolder"/"$chunk"-curlReady"
			done
			rm -rf $outCurlFolder"/split"
		else
			echo "Sending to Index"
			#convert json file to json ready for bulk
 			sed 's/{"corpusname":/{ "index" : {} }\n{"corpusname":/gi' $fullPath > $outCurlFolder"/"$filePatternReadyForCurl
			curl -k --noproxy '*' -XPOST $ELASTIC_URL":"$ELASTIC_PORT"/analyse-"$corpusName"/_bulk?pretty" -H "Content-Type: application/json" --data-binary "@"$outCurlFolder"/"$filePatternReadyForCurl > /dev/null 2>&1
		fi

		#Import Dashboard - check type first
		if [ "$analysisType" = "generique" ]
			then
			echo "Generic Analysis"		
			cat $dashboardTemplateFolder"/"$genericDashboardTemplate | sed "s/CORPUSNAMEREPLACE/"$corpusName"/" | sed "s/DASHBOARDTITLEGEN/"$corpusName"/" > $outCurlFolder"/dashboardGEN.ndjson"
			echo "Creating dashboard"
			curl -k --noproxy '*' -X POST $ELASTIC_URL":"$KIBANA_PORT"/api/saved_objects/_import?createNewCopies=true" -H "kbn-xsrf: true" --form "file=@"$outCurlFolder"/dashboardGEN.ndjson"
		elif [ "$analysisType" = "detaille" ]
			then
			echo "Detailled dashboard"
			cat $dashboardTemplateFolder"/"$detailDashboardTemplate | sed "s/CORPUSNAMEREPLACE/"$corpusName"/" | sed "s/DASHBOARDTITLEGEN/"$corpusName"/" > $outCurlFolder"/dashboardDET.ndjson"
			echo "Creating dashboard"
			curl -k --noproxy '*' -X POST $ELASTIC_URL":"$KIBANA_PORT"/api/saved_objects/_import?createNewCopies=true" -H "kbn-xsrf: true" --form "file=@"$outCurlFolder"/dashboardDET.ndjson"
		else
			echo "Unknown Analysis type : "$analysisType
		fi
	else
		echo "Unexpected file name : "$fileName" instead of "$filePattern
		exit
	fi
else
	echo "File is missing"
	exit
fi

#Removing buffer folder
rm -rf $outCurlFolder