#HTTP_HEADER{Content-Type: application/json; charset=#CHARSET} [(#ENV{id}|setenv{id_jsonl})] [{ "header":{ "id": "id jsonl", "site": "Site", "date_aspi": "Date archive", "num_page": "Numéro page", "title": "Meta title", "desc": "Meta description", "keywords": "Meta keywords", "trackers": "Trackers", "rezos": "Rézos socios", "titres": "titres", "liens_ext": "Liens externes", "liens_int": "Liens internes", "text": "Texte" }, "filtreCol" : { "site" : "select", "date_aspi": "select", "num_page": "select", "title": "input", "desc": "input", "keywords": "input", "trackers": "select", "rezos": "select", "titres": "input", "liens_ext": "input", "liens_int": "input", "text": "input" }, "classes":{ "num_page": "w80p", "title": "w120p", "desc": "w120p", "keywords": "w200p", "trackers": "w100p", "rezos": "w100p", "liens_int": "maxw400p", "text": "minw400p" } } #SET{id_jsonl,1} [(#SET{site,#FILE|basename|recup_site})] [(#SET{date_aspi,#FILE|basename|recup_aspi})] #SET{liste_lignes,#LISTE{1}} #SET{trop_gros,#CONFIG{corpus_web/taille_max}|mult{1000000}} #SET{nb_lignes, #FILE|nombre_ligne} [(#GET{nb_lignes}|!={1}|oui) #SET{liste_lignes,#LISTE{1,#GET{nb_lignes}}}] [(#SET{num_ligne,[(#COMPTEUR_BOUCLE|=={1}|?{1,#GET{nb_lignes}})]})] [(#SET{content,[(#FILE|affiche_ligne{#GET{num_ligne}}|json_decode{true})]})] #SET{num_page,Page #VALEUR} #SET{title,''} #SET{keywords,''} #SET{description,''} #SET{titres,''} #SET{trackers,''} #SET{rezos,''} #SET{liens_ext,''} #SET{liens_int,''} [(#CLE|=={extractionContent}|oui) [(#CLE|=={boilerpipe:text}|oui) [(#SET{text, [(#VALEUR|print|supprimer_com|replace{
,§§}|replace{
,§§}|replace{<,<}|replace{>,>}|replace{§§,
}|replace{"\n",
})]})] ] [(#CLE|=={htmlmeta:head:title}|oui) [(#SET{title, [(#VALEUR|print|replace{
,§§}|replace{
,§§}|replace{<,<}|replace{>,>}|replace{§§,
}|replace{"\n",
})]})] ] [(#CLE|=={htmlmeta:head:keywords}|oui) [(#SET{keywords, [(#VALEUR|print|replace{
,§§}|replace{
,§§}|replace{<,<}|replace{>,>}|replace{§§,
}|replace{"\n",
}|replace{',', ', '})]})] ] [(#CLE|=={htmlmeta:head:description}|oui) [(#SET{description, [(#VALEUR|print|replace{
,§§}|replace{
,§§}|replace{<,<}|replace{>,>}|replace{§§,
}|replace{"\n",
})]})] ] ][(#CLE|=={htmlBytes}|oui) [(#SET{titres, [(#VALEUR|base64_decode|recup_titres)]})] [(#SET{trackers, [(#VALEUR|base64_decode|trouve_trackers)]})] [(#SET{rezos, [(#VALEUR|base64_decode|trouve_rezos)]})] [(#SET{liens_ext, [(#VALEUR|base64_decode|recup_liens_ext)]})] [(#SET{liens_int, [(#VALEUR|base64_decode|recup_liens_int)]})] ] ,{ "html": { "id": [(#GET{id_jsonl})], "site" : [(#GET{site}|json_encode)], "date_aspi": [(#GET{date_aspi}|json_encode)], "num_page": [(#GET{num_page}|json_encode)], "title": [(#GET{title}|json_encode)], "desc": [(#GET{description}|json_encode)], "keywords": [(#GET{keywords}|json_encode)], "trackers": [(#GET{trackers}|json_encode)], "rezos": [(#GET{rezos}|json_encode)], "titres": [(#GET{titres}|json_encode)], "liens_ext": [(#GET{liens_ext}|json_encode)], "liens_int": [(#GET{liens_int}|json_encode)], "text": [(#GET{text}|json_encode)] }, "classes": { "titre":"transcription" }, "search": { "id": [(#GET{id_jsonl})], "site" : [(#GET{site}|json_encode)], "date_aspi": [(#GET{date_aspi}|json_encode)], "num_page": [(#GET{num_page}|json_encode)], "title": [(#GET{title}|json_encode)], "desc": [(#GET{description}|json_encode)], "keywords": [(#GET{keywords}|json_encode)], "trackers": [(#GET{trackers}|json_encode)], "rezos": [(#GET{rezos}|json_encode)], "titres": [(#GET{titres}|json_encode)], "liens_ext": [(#GET{liens_ext}|json_encode)], "liens_int": [(#GET{liens_int}|json_encode)], "text": [(#GET{text}|json_encode)] } } [(#SET{id_jsonl, #GET{id_jsonl}|plus{1}})] ]