diff --git a/bash/harvest-perf.sh b/bash/harvest-perf.sh index 9dc8e12..4f28ea0 100755 --- a/bash/harvest-perf.sh +++ b/bash/harvest-perf.sh @@ -12,9 +12,11 @@ # PAGE_SIZE=10 -ISTEX_QUERY="hypertext" +# ISTEX_QUERY="hypertext" +# ISTEX_QUERY="changement%20AND%20climat" +ISTEX_QUERY="global%20AND%20warming%20AND%20denying" ISTEX_URI="https://api.istex.fr/document/?q=$ISTEX_QUERY&size=$PAGE_SIZE" -# ISTEX_URI="https://192.168.128.14/document/?q=$ISTEX_QUERY&size=$PAGE_SIZE" +# ISTEX_URI="http://192.168.128.14:53332/document/?q=$ISTEX_QUERY&size=$PAGE_SIZE" # Outil JQ permettant de manipuler le JSON en ligne de commande # http://stedolan.github.io/jq/ @@ -24,6 +26,7 @@ # et en déduire le nombre de page de résultats à télécharger FIRST_PAGE=$(curl -s $ISTEX_URI) TOTAL_DOC=$(echo $FIRST_PAGE | $JQ '.total') +export TOTAL_OBJDOC="$TOTAL_DOC" TOTAL_PAGE=$(wcalc -q "ceil($TOTAL_DOC/$PAGE_SIZE) - 1") echo "--> Téléchargement de $TOTAL_DOC documents" diff --git a/bash/perf.sh b/bash/perf.sh index 4ed7e69..e50561c 100755 --- a/bash/perf.sh +++ b/bash/perf.sh @@ -47,4 +47,4 @@ SIZE_TOTAL=$(du -s "$CORPUS_DIR" | cut -f 1) EXEC_TIME=$(cat "$TIMEFILE") -echo "$SESSION;$NB_PDF;$NB_XML;$NB_JSON;$NB_ZIP;$NB_TEI;$NB_MODS;$NB_TXT;$NB_TOTAL;$SIZE_TOTAL;$EXEC_TIME" >> "$RESULT_DIR/collecte.csv" +echo "$SESSION;$NB_PDF;$NB_XML;$NB_JSON;$NB_ZIP;$NB_TEI;$NB_MODS;$NB_TXT;$TOTAL_OBJDOC;$NB_TOTAL;$SIZE_TOTAL;$EXEC_TIME" >> "$RESULT_DIR/collecte.csv"