diff --git a/indexCorpus.sh b/indexCorpus.sh old mode 100755 new mode 100644 index e55f77f..a365f95 --- a/indexCorpus.sh +++ b/indexCorpus.sh @@ -62,11 +62,11 @@ then echo $fullPath" has "$nbLinesImport" lines, splitting it before indexing" mkdir -p $outCurlFolder"/"$corpusName"/split" - split -l $chunksMaxLines $outCurlFolder"/"$corpusName"/"$filePatternReadyForCurl $outCurlFolder"/"$corpusName"/split/split" + split -l $chunksMaxLines $fullPath $outCurlFolder"/"$corpusName"/split/split" for chunk in `ls $outCurlFolder"/"$corpusName"/split/"` do echo "indexing "$chunk #convert json file to json ready for bulk - sed 's/{"corpusname":/{ "index" : {} }\n{"corpusname":/gi' $outCurlFolder"/"$corpusName"/"$chunk > $outCurlFolder"/"$corpusName"/"$chunk"-curlReady" + sed 's/{"corpusname":/{ "index" : {} }\n{"corpusname":/gi' $outCurlFolder"/"$corpusName"/split/"$chunk > $outCurlFolder"/"$corpusName"/"$chunk"-curlReady" curl -k --noproxy '*' -XPOST $ELASTIC_URL":"$ELASTIC_PORT"/analyse-"$corpusName"/_bulk?pretty" -H "Content-Type: application/json" --data-binary "@"$outCurlFolder"/"$corpusName"/"$chunk"-curlReady" > /dev/null 2>&1 rm $outCurlFolder"/"$corpusName"/"$chunk"-curlReady" done