...

author pierre <ratinaud@univ-tlse2.fr>

Sun, 4 Feb 2024 21:58:12 +0000 (22:58 +0100)

committer pierre <ratinaud@univ-tlse2.fr>

Sun, 4 Feb 2024 21:58:12 +0000 (22:58 +0100)
author pierre <ratinaud@univ-tlse2.fr>
Sun, 4 Feb 2024 21:58:12 +0000 (22:58 +0100)
committer pierre <ratinaud@univ-tlse2.fr>
Sun, 4 Feb 2024 21:58:12 +0000 (22:58 +0100)
diff --git a/PrintRScript.py b/PrintRScript.py

index 27c4d27..0699b3f 100755 (executable)
--- a/PrintRScript.py
+++ b/PrintRScript.py
@@ -1,7 +1,6 @@
  # -*- coding: utf-8 -*-
  #Author: Pierre Ratinaud
-#Copyright (c) 2008-2020 Pierre Ratinaud
-#modification pour python 3 : Laurent Mérat, 6x7 - mai 2020
+#Copyright (c) 2008-2011 Pierre Ratinaud
  #License: GNU/GPL
  
  #------------------------------------
@@ -1385,6 +1384,17 @@ class LabbeScript(PrintRScript):
          tab <- read.csv2("%s", header=TRUE, sep=';', row.names=1)
          """ % (ffr(self.pathout['tableafcm.csv']))
          txt += """
+        cs <- colSums(tab)
+        if (min(cs) == 0) {
+            print('empty columns !!')
+            vide <- which(cs==0)
+            print(vide)
+            tab <- tab[,-vide]
+        }
+        #print('#### RcppIramuteq for C++ Labbe ####')
+        #library(RcppIramuteq)
+        #dist.mat <- labbe(as.matrix(tab))
+        #rownames(dist.mat) <- colnames(tab)
          dist.mat <- dist.labbe(tab)
          dist.mat <- as.dist(dist.mat, upper=F, diag=F)
          write.table(as.matrix(dist.mat), "%s", sep='\t')
diff --git a/corpus.py b/corpus.py

index 7ab0ebb..e6b0bf2 100644 (file)
--- a/corpus.py
+++ b/corpus.py
@@ -20,6 +20,9 @@ from operator import itemgetter
  from uuid import uuid4
  import datetime
  from copy import copy
+#------test spacy------------
+#import spacy
+#nlp = spacy.load("fr_core_news_lg")
  
  #------------------------------------
  # import des fichiers du projet
@@ -1546,6 +1549,7 @@ class BuildSubCorpus(BuildCorpus):
  class BuildFromAlceste(BuildCorpus) :
  
      def read_corpus(self, infile) :
+
          if self.dlg is not None :
              self.dlg.Pulse('textes : 0 - segments : 0')
          self.limitshow = 0
@@ -1565,6 +1569,8 @@ class BuildFromAlceste(BuildCorpus) :
                      if self.testuci(line) :
                          iduci += 1
                          if txt != [] :
+                            #doc = nlp(' '.join(txt))
+                            #print([[word, word.pos_, word.lemma_] for word in doc])
                              iduce, idpara = self.treattxt(txt, iduce, idpara, iduci - 1)
                              txt = []
                              self.corpus.ucis.append(Uci(iduci, line))
author	pierre <ratinaud@univ-tlse2.fr>
	Sun, 4 Feb 2024 21:58:12 +0000 (22:58 +0100)
committer	pierre <ratinaud@univ-tlse2.fr>
	Sun, 4 Feb 2024 21:58:12 +0000 (22:58 +0100)
PrintRScript.py		patch \| blob \| history
corpus.py		patch \| blob \| history