diff --git a/corpus_web_fonctions.php b/corpus_web_fonctions.php
index 395ea17..f81d832 100644
--- a/corpus_web_fonctions.php
+++ b/corpus_web_fonctions.php
@@ -330,7 +330,7 @@ function recup_liens_int($html) {
foreach($match as $m) {
$m[7] = preg_replace('/<(img|(\/)?span)[^>]*>/si', '', $m[7]);
// liens exclus
- if(in_array($m[3], $url_exclus) || str_contains($m[3], '#comment') || str_contains($m[3], '/user/') || in_array($m[7], $txt_exclus) || str_replace([' ', ' '], '', $m[7]) === '') {
+ if(in_array($m[3], $url_exclus) || str_contains($m[3], '#comment') || str_contains($m[3], '/user/') || str_contains($m[3], '/members/') || str_contains($m[3], '/subscription/') || in_array($m[7], $txt_exclus) || str_replace([' ', ' '], '', $m[7]) === '') {
continue;
}
if (!array_key_exists($m[3], $liens_int)) {
@@ -347,3 +347,18 @@ function recup_liens_int($html) {
return count($l_int) ? "
" : '';
}
+
+function sans_li($html) {
+ $html = str_replace(['', ''], '', $html);
+ $html = str_replace('', "\r\n", $html);
+ $html = str_replace(' ', " ", $html);
+
+ return $html;
+}
+
+function sans_br($html) {
+ $html = str_replace('
', "\r\n", $html);
+ $html = str_replace(' ', " ", $html);
+
+ return $html;
+}
\ No newline at end of file
diff --git a/json_affiche_corpus.json.html b/json_affiche_corpus.json.html
index cf8bee7..147468a 100644
--- a/json_affiche_corpus.json.html
+++ b/json_affiche_corpus.json.html
@@ -94,12 +94,12 @@
"title": [(#GET{title}|json_encode)],
"desc": [(#GET{description}|json_encode)],
"keywords": [(#GET{keywords}|json_encode)],
- "trackers": [(#GET{trackers}|json_encode)],
- "rezos": [(#GET{rezos}|json_encode)],
- "titres": [(#GET{titres}|json_encode)],
- "liens_ext": [(#GET{liens_ext}|json_encode)],
- "liens_int": [(#GET{liens_int}|json_encode)],
- "text": [(#GET{text}|json_encode)]
+ "trackers": [(#GET{trackers}|sans_li|json_encode)],
+ "rezos": [(#GET{rezos}|sans_li|json_encode)],
+ "titres": [(#GET{titres}|sans_li|json_encode)],
+ "liens_ext": [(#GET{liens_ext}|sans_li|json_encode)],
+ "liens_int": [(#GET{liens_int}|sans_li|json_encode)],
+ "text": [(#GET{text}sans_br|json_encode)]
}
} [(#SET{id_jsonl, #GET{id_jsonl}|plus{1}})]