From 1301403740fe3e9487f67a07870796f9e3dfb1f9 Mon Sep 17 00:00:00 2001 From: pierre Date: Sun, 4 Feb 2024 22:58:12 +0100 Subject: [PATCH] ... --- PrintRScript.py | 14 ++++++++++++-- corpus.py | 6 ++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/PrintRScript.py b/PrintRScript.py index 27c4d27..0699b3f 100755 --- a/PrintRScript.py +++ b/PrintRScript.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud -#Copyright (c) 2008-2020 Pierre Ratinaud -#modification pour python 3 : Laurent Mérat, 6x7 - mai 2020 +#Copyright (c) 2008-2011 Pierre Ratinaud #License: GNU/GPL #------------------------------------ @@ -1385,6 +1384,17 @@ class LabbeScript(PrintRScript): tab <- read.csv2("%s", header=TRUE, sep=';', row.names=1) """ % (ffr(self.pathout['tableafcm.csv'])) txt += """ + cs <- colSums(tab) + if (min(cs) == 0) { + print('empty columns !!') + vide <- which(cs==0) + print(vide) + tab <- tab[,-vide] + } + #print('#### RcppIramuteq for C++ Labbe ####') + #library(RcppIramuteq) + #dist.mat <- labbe(as.matrix(tab)) + #rownames(dist.mat) <- colnames(tab) dist.mat <- dist.labbe(tab) dist.mat <- as.dist(dist.mat, upper=F, diag=F) write.table(as.matrix(dist.mat), "%s", sep='\t') diff --git a/corpus.py b/corpus.py index 7ab0ebb..e6b0bf2 100644 --- a/corpus.py +++ b/corpus.py @@ -20,6 +20,9 @@ from operator import itemgetter from uuid import uuid4 import datetime from copy import copy +#------test spacy------------ +#import spacy +#nlp = spacy.load("fr_core_news_lg") #------------------------------------ # import des fichiers du projet @@ -1546,6 +1549,7 @@ class BuildSubCorpus(BuildCorpus): class BuildFromAlceste(BuildCorpus) : def read_corpus(self, infile) : + if self.dlg is not None : self.dlg.Pulse('textes : 0 - segments : 0') self.limitshow = 0 @@ -1565,6 +1569,8 @@ class BuildFromAlceste(BuildCorpus) : if self.testuci(line) : iduci += 1 if txt != [] : + #doc = nlp(' '.join(txt)) + #print([[word, word.pos_, word.lemma_] for word in doc]) iduce, idpara = self.treattxt(txt, iduce, idpara, iduci - 1) txt = [] self.corpus.ucis.append(Uci(iduci, line)) -- 2.7.4