[(#CLE|replace{htmlmeta:head:, meta }|replace{boilerpipe:, ''}|ucfirst)] :
- [(#VALEUR|print|replace{
,§§}|replace{
,§§}|replace{<,<}|replace{>,>}|replace{§§,
}|replace{"\n",
})]
+ [(#VALEUR|print|supprimer_com|replace{
,§§}|replace{
,§§}|replace{<,<}|replace{>,>}|replace{§§,
}|replace{"\n",
})]
]
diff --git a/corpus_web_fonctions.php b/corpus_web_fonctions.php
index de2b8d3..395ea17 100644
--- a/corpus_web_fonctions.php
+++ b/corpus_web_fonctions.php
@@ -191,4 +191,159 @@ function recup_aspi($nom_fichier_jsonl) {
$T = explode('_', $nom_fichier_jsonl)[1];
$T = explode('-', $T);
return $T[0].'-'.$T[1].'-'.$T[2];
-}
\ No newline at end of file
+}
+
+function supprimer_com($boiler_text) {
+ return preg_replace('/Poster un nouveau commentaire.*/si', '', $boiler_text);
+}
+
+function trouve_trackers($html) {
+ // google(-?)analytics|xiti.com|ranktrackr.net
+ $trackers = [];
+ preg_match_all('/google(-?)analytics|xiti\.com|ranktrackr\.net|seoposition\.com/si', $html, $match, PREG_SET_ORDER);
+ if ($match) {
+ foreach($match as $m) {
+ if (strtolower(str_replace(['-', ' '], '', $m[0])) === 'googleanalytics') {
+ if (!in_array('google analytics', $trackers)) {
+ $trackers[] = 'google analytics';
+ }
+ } else {
+ if (!in_array(strtolower($m[0]), $trackers)) {
+ $trackers[] = strtolower($m[0]);
+ }
+ }
+ }
+ }
+
+ return count($trackers) ? "