Merge branch '3.0' of http://www.iramuteq.org/git/iramuteq into 3.0

author pierre <pierre.ratinaud@univ-tlse2.fr>

Mon, 12 Feb 2024 16:00:10 +0000 (17:00 +0100)

committer pierre <pierre.ratinaud@univ-tlse2.fr>

Mon, 12 Feb 2024 16:00:10 +0000 (17:00 +0100)
author pierre <pierre.ratinaud@univ-tlse2.fr>
Mon, 12 Feb 2024 16:00:10 +0000 (17:00 +0100)
committer pierre <pierre.ratinaud@univ-tlse2.fr>
Mon, 12 Feb 2024 16:00:10 +0000 (17:00 +0100)
diff --git a/PrintRScript.py b/PrintRScript.py

index 27c4d27..f8aaa96 100755 (executable)
--- a/PrintRScript.py
+++ b/PrintRScript.py
@@ -1,7 +1,6 @@
  # -*- coding: utf-8 -*-
  #Author: Pierre Ratinaud
-#Copyright (c) 2008-2020 Pierre Ratinaud
-#modification pour python 3 : Laurent Mérat, 6x7 - mai 2020
+#Copyright (c) 2008-2011 Pierre Ratinaud
  #License: GNU/GPL
  
  #------------------------------------
@@ -59,7 +58,7 @@ class PrintRScript:
              self.add('load("%s")' % ffr(val))
  
      def write(self):
-        with open(self.scriptout, 'w') as f:
+        with open(self.scriptout, 'w', encoding='utf8') as f:
              f.write(self.script)
  
  
@@ -263,7 +262,7 @@ def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, svdmethod = 'sv
      txt += """
      #save.image(file="%s")
      """ % (ffr(DicoPath['RData']))
-    fileout = open(DicoPath['Rchdtxt'], 'w')
+    fileout = open(DicoPath['Rchdtxt'], 'w', encoding='utf8')
      fileout.write(txt)
      fileout.close()
  
@@ -288,7 +287,7 @@ def RPamTxt(corpus, RscriptPath):
      txt += """
      save.image(file="%s")
      """ % DicoPath['RData']
-    fileout = open(DicoPath['Rchdtxt'], 'w')
+    fileout = open(DicoPath['Rchdtxt'], 'w', encoding='utf8')
      fileout.write(txt)
      fileout.close()
  
@@ -325,7 +324,7 @@ def RchdQuest(DicoPath, RscriptPath, nbcl = 10, mincl = 10):
      txt += """
      save.image(file="%s")
      """ % ffr(DicoPath['RData'])
-    fileout = open(DicoPath['Rchdquest'], 'w')
+    fileout = open(DicoPath['Rchdquest'], 'w', encoding='utf8')
      fileout.write(txt)
      fileout.close()
      
@@ -429,7 +428,7 @@ rm(tablesqrpsup)
  rm(tablesqrpet)
  save.image(file="%s")
  """ % ffr(DictChdTxtOut['RData'])
-    file = open(DictChdTxtOut['RTxtProfGraph'], 'w')
+    file = open(DictChdTxtOut['RTxtProfGraph'], 'w', encoding='utf8')
      file.write(txt)
      file.close()
  
@@ -468,7 +467,7 @@ def write_afc_graph(self):
      else:
          nodesfile = 'NULL'
          edgesfile = 'NULL'
-    with open(self.RscriptsPath['afc_graph'], 'r') as f:
+    with open(self.RscriptsPath['afc_graph'], 'r', encoding='utf8') as f:
          txt = f.read()
  #    self.DictPathOut['RData'], \
      scripts = txt % (ffr(self.RscriptsPath['Rgraph']),\
@@ -524,7 +523,7 @@ def print_simi3d(self):
      make.simi.afc(dm,chistabletot, lim=%i, alpha = %.2f, movie = %s)
      """ % (simi3d.spin_1.GetValue(), float(simi3d.slider_1.GetValue())/100, movie)
      tmpfile = tempfile.mktemp(dir=self.parent.TEMPDIR)
-    tmp = open(tmpfile,'w')
+    tmp = open(tmpfile,'w', encoding='utf8')
      tmp.write(txt)
      tmp.close()
      return tmpfile
@@ -1173,7 +1172,7 @@ class PrintSimiScript(PrintRScript):
          if (!is.null(graph.simi$com)) {
              com <- graph.simi$com
              colm <- rainbow(length(com))
-            if (vertex.size != 0 || graph.simi$halo) {
+            if (sum(vertex.size) != 0 || graph.simi$halo) {
                  vertex.label.color <- 'black'
                  vertex.col <- colm[membership(com)]
              } else {
@@ -1385,6 +1384,17 @@ class LabbeScript(PrintRScript):
          tab <- read.csv2("%s", header=TRUE, sep=';', row.names=1)
          """ % (ffr(self.pathout['tableafcm.csv']))
          txt += """
+        cs <- colSums(tab)
+        if (min(cs) == 0) {
+            print('empty columns !!')
+            vide <- which(cs==0)
+            print(vide)
+            tab <- tab[,-vide]
+        }
+        #print('#### RcppIramuteq for C++ Labbe ####')
+        #library(RcppIramuteq)
+        #dist.mat <- labbe(as.matrix(tab))
+        #rownames(dist.mat) <- colnames(tab)
          dist.mat <- dist.labbe(tab)
          dist.mat <- as.dist(dist.mat, upper=F, diag=F)
          write.table(as.matrix(dist.mat), "%s", sep='\t')
diff --git a/ProfList.py b/ProfList.py

index 6038edb..ebc9a53 100644 (file)
--- a/ProfList.py
+++ b/ProfList.py
@@ -116,12 +116,12 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
          self.InsertColumn(6, "forme", wx.LIST_FORMAT_RIGHT)
          self.InsertColumn(7, "p", wx.LIST_FORMAT_RIGHT)
          self.SetColumnWidth(0, 60)
-        self.SetColumnWidth(1, 70)
-        self.SetColumnWidth(2, 80)
-        self.SetColumnWidth(3, 100)
-        self.SetColumnWidth(4, 70)
-        self.SetColumnWidth(5, 60)
-        self.SetColumnWidth(6, 140)
+        self.SetColumnWidth(1, 100)
+        self.SetColumnWidth(2, 100)
+        self.SetColumnWidth(3, 120)
+        self.SetColumnWidth(4, 150)
+        self.SetColumnWidth(5, 100)
+        self.SetColumnWidth(6, 300)
          self.SetColumnWidth(7, wx.LIST_AUTOSIZE)
          #These two should probably be passed to init more cleanly
          #setting the numbers of items = number of elements in the dictionary
@@ -131,8 +131,10 @@ class ProfListctrlPanel(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.Col
          #mixins
          listmix.ListCtrlAutoWidthMixin.__init__(self)
          listmix.ColumnSorterMixin.__init__(self, len(classen[0]))
-        #sort by genre (column 2), A->Z ascending order (1)
          self.SortListItems(0, True)
+        #sort by genre (column 2), A->Z ascending order (1)
+
+
          #events
          #self.Bind(wx.EVT_LIST_ITEM_SELECTED, self.OnItemSelected)
          self.Bind(wx.EVT_LIST_ITEM_ACTIVATED, self.OnPopupTwo, self)
diff --git a/Rscripts/CHD.R b/Rscripts/CHD.R

index a480df9..4e64b01 100644 (file)
--- a/Rscripts/CHD.R
+++ b/Rscripts/CHD.R
@@ -230,7 +230,7 @@ CHD<-function(data.in, x=9, mode.patate = FALSE, svd.method, libsvdc.path=NULL){
                         print('longueur < 0')
                         malcl<-length(vdelta)
                         if ((it>1)&&(!is.logical(listsub[[it]]))&&(!is.logical(listsub[[it-1]]))){
-                               if (listsub[[it]]==listsub[[(it-1)]]){
+                               if (all(listsub[[it]]==listsub[[(it-1)]])){
                                         malcl<-0
                                 }
                         }
diff --git a/Rscripts/simi.R b/Rscripts/simi.R

index 2a4c8b7..bf42008 100755 (executable)
--- a/Rscripts/simi.R
+++ b/Rscripts/simi.R
@@ -376,7 +376,7 @@ plot.simi <- function(graph.simi, p.type = 'tkplot',filename=NULL, communities =
              require(tcltk)
              ReturnVal <- tkmessageBox(title="RGL 3 D",message="Cliquez pour commencer le film",icon="info",type="ok")
  
-            movie3d(spin3d(axis=c(0,1,0),rpm=6), movie = 'film_graph', frames = "tmpfilm", duration=10, clean=TRUE, top = TRUE, dir = movie)
+            movie3d(spin3d(axis=c(0,0,1),rpm=6), movie = 'film_graph', frames = "tmpfilm", duration=10, clean=TRUE, top = TRUE, dir = movie)
              ReturnVal <- tkmessageBox(title="RGL 3 D",message="Film fini !",icon="info",type="ok")
          }
          #play3d(spin3d(axis=c(0,1,0),rpm=6))
diff --git a/checkinstall.py b/checkinstall.py

index 79b7c45..7b8e51b 100644 (file)
--- a/checkinstall.py
+++ b/checkinstall.py
@@ -135,7 +135,7 @@ def FindRPAthWin32():
          for progpath in progpaths :
              rpath = os.path.join(progpath, "R")
              if os.path.exists(rpath) :
-                for maj in range(2,4) :
+                for maj in range(3,5) :
                      for i in range(0,30):
                          for j in range(0,20):
                              for poss in ['', 'i386', 'x64'] :
diff --git a/configparser.py b/configparser.py

index 5a52260..11baaaf 100644 (file)
--- a/configparser.py
+++ b/configparser.py
@@ -713,7 +713,7 @@ class RawConfigParser(MutableMapping):
          read_ok = []
          for filename in filenames:
              try:
-                with open(filename, encoding=encoding) as fp:
+                with open(filename, encoding='utf8') as fp:
                      self._read(fp, filename)
              except IOError:
                  continue
diff --git a/configuration/global.cfg b/configuration/global.cfg

index bfa07bd..8f28997 100644 (file)
--- a/configuration/global.cfg
+++ b/configuration/global.cfg
@@ -7,6 +7,6 @@ author = Pierre Ratinaud
  gpl-fr = gpl-2.0-fr.txt
  dev = Pierre Ratinaud (Université de Toulouse - Laboratoire LERASS - ratinaud@univ-tlse2.fr);Sébastien Déjean (Université de Toulouse);David Skalinder (Mash Strategy - davids@mash.uk.com);
  translators = Italian : Emanuele Fino;Portuguese : Brigido Vizeu Camargo;Spanish : Mariola Moreno;English : Pierre Ratinaud;French : Pierre Ratinaud
-version = 0.8 alpha 1
+version = 0.8 alpha 2
  licence = GNU GPL (v2)
-version_nb = 0.8.a1
+version_nb = 0.8.a2
diff --git a/configuration/iramuteq.cfg b/configuration/iramuteq.cfg

index 263dbdd..7d24336 100644 (file)
--- a/configuration/iramuteq.cfg
+++ b/configuration/iramuteq.cfg
@@ -5,8 +5,8 @@ language=french
  guilanguage=french
  R_mem = false
  R_max_mem = 1535
-version_nb = 0.8.a1
+version_nb = 0.8.a2
  rlibs = false
  libsvdc = false
  libsvdc_path = /usr/bin/svd
-rmirror = http://cran.rstudio.com/
-\ No newline at end of file
+rmirror = http://cran.rstudio.com/
diff --git a/corpus.py b/corpus.py

index 7ab0ebb..489d4f1 100644 (file)
--- a/corpus.py
+++ b/corpus.py
@@ -20,6 +20,9 @@ from operator import itemgetter
  from uuid import uuid4
  import datetime
  from copy import copy
+#------test spacy------------
+#import spacy
+#nlp = spacy.load("fr_core_news_lg")
  
  #------------------------------------
  # import des fichiers du projet
@@ -141,7 +144,7 @@ class Corpus :
  
      def read_corpus(self) :
          log.info('read corpus')
-        self.parametres['syscoding'] = sys.getdefaultencoding()
+        self.parametres['syscoding'] = 'utf8'
          if self.conncorpus is None :
              self.conn_all()
          res = self.ccorpus.execute('SELECT * FROM etoiles;')
@@ -563,7 +566,7 @@ class Corpus :
          self.make_iduces()
          actuci = ''
          actpara = False
-        with open(outf,'w') as f :
+        with open(outf,'w', encoding='utf8') as f :
              for uce in res :
                  if self.iduces[uce[0]].uci == actuci and self.iduces[uce[0]].para == actpara :
                      f.write(uce[1] + '\n')
@@ -586,7 +589,7 @@ class Corpus :
          longueur_max = max([len(val) for val in metas])
          first = ['column_%i' % i for i in range(longueur_max)]
          metas.insert(0, first)
-        with open(outf, 'w') as f :
+        with open(outf, 'w', encoding='utf8') as f :
              f.write('\n'.join(['\t'.join(line) for line in metas]))
  
      def export_corpus_classes(self, outf, alc = True, lem = False, uci = False) :
@@ -601,7 +604,7 @@ class Corpus :
              self.make_iduces()
          else :
              res = self.getallucis()
-        with open(outf, 'w') as f :
+        with open(outf, 'w', encoding='utf8') as f :
              for uce in res :
                  guce = uce[1]
                  if not uci :
@@ -624,7 +627,7 @@ class Corpus :
              self.make_iduces()
          else :
              res = self.getuciconcorde(sts)
-        with open(outf, 'w') as f :
+        with open(outf, 'w', encoding='utf8') as f :
              for uce in res :
                  guce = uce[1]
                  if not uci :
@@ -649,7 +652,7 @@ class Corpus :
              outf = os.path.join(rep, outf)
              if lem :
                  guce = ' '.join([self.formes[forme].lem for forme in guce.split()])
-            with open(outf, 'w') as f :
+            with open(outf, 'w', encoding='utf8') as f :
                  f.write(guce) #.encode('cp1252', errors = 'replace'))
  
      def export_tropes(self, fileout, classe, lem = False, uci = False) :
@@ -659,7 +662,7 @@ class Corpus :
              self.make_iduces()
          else :
              res = self.getuciconcorde(sts)
-        with open(fileout, 'w') as f :
+        with open(fileout, 'w', encoding='utf8') as f :
              for uce in res :
                  guce = uce[1]
                  if lem :
@@ -800,7 +803,7 @@ class Corpus :
          else :
              tab = [[lem] + [len(set(self.getlemuces(lem)).intersection(classe)) for classe in ucecl] for lem in actives]
          tab = [[line[0]] + [repr(val) for val in line[1:]] for line in tab if sum(line[1:]) >= 3]
-        with open(fileout, 'w') as f :
+        with open(fileout, 'w', encoding='utf8') as f :
              f.write('\n'.join([';'.join(line) for line in tab]))
  
      def make_etoiles(self) :
@@ -876,7 +879,7 @@ class Corpus :
          else :
              etoileuces = self.getetoileucis()
          etoileuces = dict([[et, etoileuces[et]] for et in etoileuces if len(etoileuces[et]) > 1])
-        with open(fileout, 'w') as f :
+        with open(fileout, 'w', encoding='utf8') as f :
              f.write('\n'.join([';'.join([et] + [repr(len(set(etoileuces[et]).intersection(classe))) for classe in ucecl]) for et in etoileuces])) #.encode(self.parametres['syscoding'])
          #etoiles = self.make_etoiles()
          #with open(fileout, 'w') as f :
@@ -891,9 +894,9 @@ class Corpus :
              ucecl[uce] = 0
          color = ['black'] + colors[len(self.lc) - 1]
          txt = '''<html>
-        <meta http-equiv="content-Type" content="text/html; charset=%s" />
+        <meta http-equiv="content-Type" content="text/html; charset=utf8" />
          <body>
-''' % sys.getdefaultencoding()
+'''
          if not uci :
              res = self.getalluces()
              self.make_iduces()
@@ -999,7 +1002,7 @@ class Corpus :
                  for taille_segment in range(lenmin,lenmax) :
                      d =self.count_from_list_cl([' '.join(uce[i:i+taille_segment]) for i in range(len(uce)-(taille_segment - 1))], d, b, len(self.lc))
          result = [[seg] + [str(val) for val in d[seg]] for seg in d if sum(d[seg]) >= effmin]
-        with open(fileout, 'w') as f :
+        with open(fileout, 'w', encoding='utf8') as f :
              f.write('\n'.join([';'.join(line) for line in result]))
  
      def make_proftype(self, outf) :
@@ -1014,7 +1017,7 @@ class Corpus :
                  res[gram][i] += sum([lemuceeff[uce] for uce in concern])
          res = [[gram] + [repr(val) for val in res[gram]] for gram in res]
          res.sort()
-        with open(outf, 'w') as f :
+        with open(outf, 'w', encoding='utf8') as f :
              f.write('\n'.join([';'.join(line) for line in res]))
  
      def make_ucecl_from_R(self, filein) :
@@ -1054,7 +1057,7 @@ class Corpus :
          log.info('%f' % (time() - t1))
          if outf is not None :
              toprint = '\n'.join([';'.join([repr(i), repr(occurrences[i]), repr(formescl[i]), repr(hapaxcl[i]), repr(lenclasses[i]), repr(float(hapaxcl[i])/float(formescl[i]))]) for i in occurrences])
-            with open(outf, 'w') as f :
+            with open(outf, 'w', encoding='utf8') as f :
                  f.write(toprint)
          else :
              return [[repr(occurrences[i]), repr(formescl[i]), repr(hapaxcl[i]), repr(lenclasses[i]), repr(float(hapaxcl[i])/float(formescl[i]))] for i in occurrences]
@@ -1118,21 +1121,21 @@ class Corpus :
          txt += """
          </body></html>
          """
-        with open('/tmp/testhapxuce.html','w') as f :
+        with open('/tmp/testhapxuce.html','w', encoding='utf8') as f :
              f.write(txt)
  
      def export_dictionary(self, fileout, syscoding) :
          listformes = [[self.formes[forme].freq, forme, self.formes[forme].lem, self.formes[forme].gram] for forme in self.formes]
          listformes.sort(reverse = True)
          listformes = [forme[1:] + [repr(forme[0])] for forme in listformes]
-        with open(fileout, 'w') as f :
+        with open(fileout, 'w', encoding='utf8') as f :
              f.write('\n'.join(['\t'.join(forme) for forme in listformes]))
  
      def export_lems(self, fileout, syscoding) :
          self.make_idformes()
          listlem = [[lem, '\t'.join(['\t'.join([self.idformes[forme].forme, repr(self.lems[lem].formes[forme])]) for forme in self.lems[lem].formes])] for lem in self.lems]
          listlem.sort()
-        with open(fileout, 'w') as f :
+        with open(fileout, 'w', encoding='utf8') as f :
              f.write('\n'.join(['\t'.join(lem) for lem in listlem]))
  
  
@@ -1546,6 +1549,7 @@ class BuildSubCorpus(BuildCorpus):
  class BuildFromAlceste(BuildCorpus) :
  
      def read_corpus(self, infile) :
+
          if self.dlg is not None :
              self.dlg.Pulse('textes : 0 - segments : 0')
          self.limitshow = 0
@@ -1565,6 +1569,8 @@ class BuildFromAlceste(BuildCorpus) :
                      if self.testuci(line) :
                          iduci += 1
                          if txt != [] :
+                            #doc = nlp(' '.join(txt))
+                            #print([[word, word.pos_, word.lemma_] for word in doc])
                              iduce, idpara = self.treattxt(txt, iduce, idpara, iduci - 1)
                              txt = []
                              self.corpus.ucis.append(Uci(iduci, line))
diff --git a/dialog.py b/dialog.py

index 93fd3e4..4143526 100755 (executable)
--- a/dialog.py
+++ b/dialog.py
@@ -559,11 +559,11 @@ memory for R""")
              parent.pref.set('iramuteq', 'libsvdc', 'false')
          parent.pref.set('iramuteq', 'libsvdc_path', self.fbb.GetValue())
          self.parent.pref.set('iramuteq', 'rmirror', self.mirror_value.GetValue())
-        file = open(parent.ConfigPath['preferences'], 'w')
+        file = open(parent.ConfigPath['preferences'], 'w', encoding='utf8')
          parent.pref.write(file)
          file.close()
          self.parent.PathPath.set('PATHS', 'rpath', self.Rpath_value.GetValue())
-        with open(self.parent.ConfigPath['path'], 'w') as f:
+        with open(self.parent.ConfigPath['path'], 'w', encoding='utf8') as f:
              self.parent.PathPath.write(f)
          self.Close()
  
@@ -1231,7 +1231,7 @@ class PrefSegProf(wx.Dialog) :
          bSizer1 = wx.BoxSizer( wx.VERTICAL )
          txt = _("Be carefull : computation of repeated segments profiles can be very long on large corpus")
          self.label = wx.StaticText( self, wx.ID_ANY, txt, wx.DefaultPosition, wx.DefaultSize, 0 )
-        bSizer1.Add( self.label, 0, wx.ALL | wx.ALIGN_CENTER_VERTICAL, 5 )
+        bSizer1.Add( self.label, 0, wx.ALL, 5 )
          fgSizer1 = wx.FlexGridSizer( 0, 2, 0, 0 )
          fgSizer1.SetFlexibleDirection( wx.BOTH )
          fgSizer1.SetNonFlexibleGrowMode( wx.FLEX_GROWMODE_SPECIFIED )
@@ -1269,7 +1269,7 @@ class PrefSegProf(wx.Dialog) :
          btn_ok.SetDefault()
          btnsizer.AddButton(btn_ok)
          btnsizer.Realize()
-        bSizer1.Add(btnsizer, 0, wx.ALIGN_CENTER_VERTICAL|wx.ALIGN_RIGHT, 5)
+        bSizer1.Add(btnsizer, 0, wx.ALIGN_RIGHT, 5)
          self.SetSizer( bSizer1 )
          self.Layout()
          bSizer1.Fit( self )
@@ -2499,7 +2499,7 @@ class message(wx.Frame):
          self.html = '<br>'.join([self.items[i] for i in range(0,len(self.items))])
          if dlg.ShowModal() == wx.ID_OK:
              path = dlg.GetPath()
-            with open(path, 'w') as f :
+            with open(path, 'w', encoding='utf8') as f :
                  f.write(self.html)
  
      def OnLink(self, evt):
@@ -3024,7 +3024,7 @@ class BarFrame :
                  parametres['tree'] = tree
              txt = barplot(table, parametres)
              tmpscript = tempfile.mktemp(dir=ira.TEMPDIR)
-            with open(tmpscript,'w') as f :
+            with open(tmpscript,'w', encoding='utf8') as f :
                  f.write(txt)
              exec_rcode(ira.RPath, tmpscript, wait = True)
              win = ImageViewer(ira, parametres, _("Graphic"), size=(700, 500))
diff --git a/elcategorizator.py b/elcategorizator.py

new file mode 100644 (file)

index 0000000..b9f531c
--- /dev/null
+++ b/elcategorizator.py
@@ -0,0 +1,1005 @@
+# -*- coding: utf-8 -*-
+#Author: Pierre Ratinaud
+#Copyright (c) 2022 Pierre Ratinaud
+#License: GNU/GPL
+
+import os
+import wx
+import wx.xrc
+#from wx.lib.splitter import MultiSplitterWindow
+from listlex import *
+import pickle
+import json
+
+
+
+class CategoDict :
+    def __init__(self, pathout = None):
+        self.pathout = pathout
+        self.cate = self.readjson()
+        self.lenwords = len(self.cate['TOCATE']) + len([word for categorie in self.cate['CATE'] for word in self.cate['CATE'][categorie][1]])
+
+    def readjson(self):
+        if self.pathout is not None :
+            with open(self.pathout['cate.json'], 'r') as f :
+                cate = json.load(f)
+        else :
+            cate = {'TOCATE' : {'word1': 3, 'word2' : 2, 'word3' : 5}, 'CATE': {'cat1' : [34,{'word6':30, 'word7':4}], 'cat2' : [20,{'word20':20}]}}
+        return cate
+
+    def save(self) :
+        with open(self.pathout['cate.json'], 'w', encoding='utf8') as f :
+            f.write(json.dumps(self.cate, indent=4))
+        print("json saved!")
+
+    def exportdict(self):
+        pass
+
+    def getcate(self) :
+        cate = []
+        i = 0
+        for val in self.cate['CATE'] :
+            cate.append([i, [val, self.cate['CATE'][val][0]]])
+            i += 1
+        return dict(cate)
+
+    def getwordstocate(self) :
+        words = []
+        i = 0
+        for val in self.cate['TOCATE'] :
+            words.append([i, [val, self.cate['TOCATE'][val]]])
+            i+= 1
+        return dict(words)
+
+    def getcatewords(self, cat) :
+        catewords = []
+        i = 0
+        if cat not in self.cate['CATE'] :
+            return {}
+        for val in self.cate['CATE'][cat][1] :
+            catewords.append([i, [val, self.cate['CATE'][cat][1][val]]])
+            i += 1
+        return dict(catewords)
+
+    def getwordscate(self) :
+        wc = {}
+        for word in self.cate['TOCATE'] :
+            wc[word] = word
+        for categorie in self.cate['CATE'] :
+            for word in self.cate['CATE'][categorie][1] :
+                wc[word] = categorie
+        return wc
+
+    def addwordincate(self, categorie, word, eff) :
+        self.cate['CATE'][categorie][1][word] = eff
+        self.cate['CATE'][categorie][0] += eff
+        del(self.cate['TOCATE'][word])
+
+    def addwordinwords(self, categorie, word, eff) :
+        print(categorie, word, eff)
+        self.cate['TOCATE'][word] = eff
+        self.cate['CATE'][categorie][0] -= eff
+        del(self.cate['CATE'][categorie][1][word])
+        if self.cate['CATE'][categorie][0] == 0 :
+            del(self.cate['CATE'][categorie])
+
+    def findcatefromword(self, word) :
+        for categorie in self.cate['CATE'] :
+            if word in self.cate['CATE'][categorie][1] :
+                return categorie
+        return None
+
+    def changewordcate(self, newcate, word, eff) :
+        oldcat = self.findcatefromword(word)
+        del(self.cate['CATE'][oldcat][1][word])
+        self.cate['CATE'][oldcat][0] -= eff
+        self.cate['CATE'][newcate][1][word] = eff
+        self.cate['CATE'][newcate][0] += eff
+        if self.cate['CATE'][oldcat][0] == 0 :
+            del(self.cate['CATE'][oldcat])
+
+    def addcatefromwordtocate(self, word, eff) :
+        if word in self.cate['CATE'] :
+            return False
+        else :
+            self.cate['CATE'][word]=[eff,{word:eff}]
+            del(self.cate['TOCATE'][word])
+            return True
+
+    def addcatefromwordcate(self, word, eff) :
+        if word in self.cate['CATE'] :
+            return False
+        else :
+            oldcat = self.findcatefromword(word)
+            self.cate['CATE'][word]=[eff,{word:eff}]
+            del(self.cate['CATE'][oldcat][1][word])
+            self.cate['CATE'][oldcat][0] -= eff
+            if self.cate['CATE'][oldcat][0] == 0 :
+                del(self.cate['CATE'][oldcat])
+            return True
+
+    def delcate(self, categorie) :
+        for word in self.cate['CATE'][categorie][1] :
+            self.cate['TOCATE'][word] = self.cate['CATE'][categorie][1][word]
+        del(self.cate['CATE'][categorie])
+
+    def loadcate(self, infile) :
+        if self.cate['CATE'] != {} :
+            print("Categories should be empty")
+            return False
+        with open(infile, 'r') as f :
+            newcate = json.load(f)
+        for categorie in newcate['CATE'] :
+            self.cate['CATE'][categorie] = [0,{}]
+            for word in newcate['CATE'][categorie][1] :
+                if word in self.cate['TOCATE'] :
+                    self.cate['CATE'][categorie][1][word] = self.cate['TOCATE'][word]
+                    self.cate['CATE'][categorie][0] += self.cate['TOCATE'][word]
+                    del(self.cate['TOCATE'][word])
+
+    def makestat(self) :
+        totocat = sum([self.cate['TOCATE'][word] for word in self.cate['TOCATE']])
+        nbtocat = len(self.cate['TOCATE'])
+        nbcate = len(self.cate['CATE'])
+        totcate = sum([self.cate['CATE'][categorie][0] for categorie in self.cate['CATE']])
+        lenwordincate = len([word for categorie in self.cate['CATE'] for word in self.cate['CATE'][categorie][1]])
+        return nbtocat, totocat, nbcate, totcate, lenwordincate
+
+
+#cate = CategoDict()
+
+
+class ElCategorizator ( wx.Panel ):
+
+    def __init__( self, parent, pathout, tableau, id = wx.ID_ANY, pos = wx.DefaultPosition, size = wx.Size( 500,300 ), style = wx.TAB_TRAVERSAL, name = wx.EmptyString ):
+        wx.Panel.__init__ ( self, parent, id = id, pos = pos, size = size, style = style, name = name )
+        self.pathout = pathout
+        self.parent = parent
+        self.tableau = tableau
+
+        self.cate = CategoDict(self.pathout)
+        gsizer =  wx.BoxSizer( wx.VERTICAL )
+
+        bSizer1 = wx.BoxSizer( wx.HORIZONTAL )
+
+        self.m_listToCate = ListForWords(self, dlist = self.cate, first = ['eff'])
+        bSizer1.Add( self.m_listToCate, 2, wx.ALL|wx.EXPAND, 5 )
+
+        self.m_listCate = ListForCate(self, dlist = self.cate, first = ['eff'])
+        bSizer1.Add( self.m_listCate, 1, wx.ALL|wx.EXPAND, 5 )
+
+        self.m_listCateWords = ListForCateWords(self, dlist = self.cate, first = ['eff'])
+        bSizer1.Add( self.m_listCateWords, 1, wx.ALL|wx.EXPAND, 5 )
+
+        bSizer2 = wx.BoxSizer( wx.HORIZONTAL )
+
+        self.butsave = wx.Button( self, wx.ID_SAVE, u"Save", wx.DefaultPosition, wx.DefaultSize, 0 )
+        bSizer2.Add( self.butsave, 0, wx.ALL, 5 )
+
+        self.butcsv = wx.Button( self, wx.ID_ANY, u"Export Columns", wx.DefaultPosition, wx.DefaultSize, 0 )
+        bSizer2.Add( self.butcsv, 0, wx.ALL, 5 )
+
+        self.butdict = wx.Button( self, wx.ID_ANY, u"Export dictonary", wx.DefaultPosition, wx.DefaultSize, 0 )
+        bSizer2.Add( self.butdict, 0, wx.ALL, 5 )
+
+        self.butload = wx.Button( self, wx.ID_ANY, u"Load a categorization", wx.DefaultPosition, wx.DefaultSize, 0 )
+        bSizer2.Add( self.butload, 0, wx.ALL, 5 )
+
+        bSizer3 = wx.BoxSizer( wx.HORIZONTAL )
+
+        self.nbword = """Words : {:d} ({:d}) | """
+
+        self.stat = """ Words to categorize : {:d} ({}%) - {:d} ({}%) -- Categories : {:d} - {:d} ({}%) - {:d} ({}%)"""
+#        nbtocat, totocat, nbcate, totcate = self.cate.makestat()
+#        lenwords = self.cate.lenwords
+#        totwords = totocat + totcate
+#        prtocat = repr(nbtocat/lenwords)
+#        prtotocat = repr(totocat/totwords)
+#        prcate = repr(totcate/totwords)
+        self.wordtxt = wx.StaticText(self, -1, "")
+        bSizer3.Add( self.wordtxt, 0, wx.ALL, 5 )
+        self.stattxt = wx.StaticText(self, -1, "")
+        bSizer3.Add( self.stattxt, 0, wx.ALL, 5 )
+
+
+        gsizer.Add( bSizer2, 0, wx.EXPAND, 5 )
+        gsizer.Add( bSizer1, 2, wx.EXPAND, 5 )
+        gsizer.Add( bSizer3, 0, wx.EXPAND, 5 )
+
+        self.butsave.Bind(wx.EVT_BUTTON, self.OnSave)
+        self.butcsv.Bind(wx.EVT_BUTTON, self.OnCSV)
+        self.butdict.Bind(wx.EVT_BUTTON, self.OnDict)
+        self.butsave.SetBackgroundColour((14, 242, 14, 255))
+        self.butload.Bind(wx.EVT_BUTTON, self.OnLoad)
+        self.OnStat()
+        self.SetSizer( gsizer )
+        self.Layout()
+
+    def __del__( self ):
+        pass
+
+
+    def OnLoad(self, event) :
+        if len(self.cate.cate['CATE']) != 0 :
+            print("Categories should be empty")
+            event.Skip()
+            return
+        wildcard = "json|*.json|" \
+                   "All file|*.*"
+        dlg = wx.FileDialog(
+             self, message="Choose a file",
+             defaultDir=self.pathout.dirout,
+             defaultFile="",
+             wildcard=wildcard,
+             style=wx.FD_OPEN |
+                   wx.FD_CHANGE_DIR | wx.FD_FILE_MUST_EXIST |
+                   wx.FD_PREVIEW
+             )
+
+        if dlg.ShowModal() == wx.ID_OK:
+            paths = dlg.GetPaths()
+            path = paths[0]
+            self.cate.loadcate(path)
+            self.m_listCate.RefreshData(self.cate.getcate())
+            self.m_listToCate.RefreshData(self.cate.getwordstocate())
+        dlg.Destroy()
+
+    def OnSave(self, event) :
+        self.cate.save()
+        self.butsave.SetBackgroundColour((14, 242, 14, 255))
+
+    def OnCSV(self, event) :
+        wordscate = self.cate.getwordscate()
+        newtab = [['category%i' % i for i in range(1, len(self.tableau.selected_col)+1)]]
+        for line in self.tableau.select_col(self.tableau.selected_col):
+            newline = []
+            for word in line :
+                newline.append(wordscate.get(word,word))
+            newtab.append(newline)
+        with open(self.pathout['tableout.csv'], 'w') as f :
+            f.write('\n'.join(['\t'.join(line) for line in newtab]))
+        print("csv exported !")
+
+    def OnDict(self, event):
+        with open(self.pathout['dictionnary.txt'], 'w') as f :
+            for categorie in self.cate.cate['CATE'] :
+                f.write(categorie + ': \t' + repr(self.cate.cate['CATE'][categorie][0]) + '\n')
+                for word in self.cate.cate['CATE'][categorie][1] :
+                    f.write('\t' + word + ': \t' + repr(self.cate.cate['CATE'][categorie][1][word]) + '\n')
+            for word in self.cate.cate['TOCATE'] :
+                f.write(word + ':\t' + repr(self.cate.cate['TOCATE'][word]) + '\n')
+        print("dictionnary exported !")
+
+    def OnStat(self) :
+        nbtocat, totocat, nbcate, totcate, lenwordincate = self.cate.makestat()
+        totwords = totocat + totcate
+        prtocat = repr(round((nbtocat/self.cate.lenwords) * 100 ,2))
+        prtotocat = repr(round((totocat/totwords) * 100, 2))
+        prcate = repr(round((totcate/totwords)*100, 2))
+        prwordincate = repr(round((lenwordincate/self.cate.lenwords)*100, 2))
+        self.stattxt.SetLabel(self.stat.format(nbtocat, prtocat, totocat, prtotocat, nbcate, lenwordincate, prwordincate, totcate, prcate))
+
+    def OnAddToTable(self) :
+        wordscate = self.cate.getwordscate()
+        newtab = [['category%i' % i for i in range(1, len(self.tableau.selected_col)+1)]]
+        for line in self.tableau.select_col(self.tableau.selected_col):
+            newline = []
+            for word in line :
+                newline.append(wordscate.get(word,word))
+            newtab.append(newline)
+
+
+
+#class ListPanel(wx.Panel) :
+#     def __init__(self, parent, gparent, List):
+#        wx.Panel.__init__(self, parent, style=wx.BORDER_SUNKEN)
+#        self.parent = parent
+#        self.cate = gparent.cate
+#        self.list = List(self, dlist = gparent.cate, first = ['eff'])
+
+
+#class ElCategorizator ( wx.Panel ):
+#
+#    def __init__( self, parent, pathout, tableau, id = wx.ID_ANY, pos = wx.DefaultPosition, size = wx.Size( 500,300 ), style = wx.TAB_TRAVERSAL, name = wx.EmptyString ):
+#        wx.Panel.__init__ ( self, parent, id = id, pos = pos, size = size, style = style, name = name )
+#        self.pathout = pathout
+#        self.parent = parent
+#        self.tableau = tableau
+#
+#        self.cate = CategoDict(self.pathout)
+#
+#        splitter = MultiSplitterWindow(self, style=wx.SP_LIVE_UPDATE)
+#        self.splitter = splitter
+#        sizer = wx.BoxSizer(wx.HORIZONTAL)
+#        sizer.Add(splitter, 1, wx.EXPAND)
+#        self.SetSizer(sizer)
+#
+#        panelwords = ListPanel(splitter, self, ListForWords)
+#        splitter.AppendWindow(panelwords, 150)
+#        panelcate = ListPanel(splitter, self, ListForCate)
+#        splitter.AppendWindow(panelcate, 150)
+#        panelwordscate = ListPanel(splitter, self, ListForCateWords)
+#        splitter.AppendWindow(panelwordscate, 150)
+#        self.m_listToCate = panelwords.list
+#        self.m_listCate = panelcate.list
+#        self.m_listCateWords = panelwordscate.list
+#
+#    def __del__( self ):
+#        pass
+#
+#    def OnSave(self, event) :
+#        self.cate.save()
+#        self.butsave.SetBackgroundColour((14, 242, 14, 255))
+#
+#    def OnCSV(self, event) :
+#        wordscate = self.cate.getwordscate()
+#        newtab = [['category%i' % i for i in range(1, len(self.tableau.selected_col)+1)]]
+#        for line in self.tableau.select_col(self.tableau.selected_col):
+#            newline = []
+#            for word in line :
+#                newline.append(wordscate.get(word,word))
+#            newtab.append(newline)
+#        with open(self.pathout['tableout.csv'], 'w') as f :
+#            f.write('\n'.join(['\t'.join(line) for line in newtab]))
+#        print("csv exported !")
+#
+#    def OnDict(self, event):
+#        with open(self.pathout['dictionnary.txt'], 'w') as f :
+#            for categorie in self.cate.cate['CATE'] :
+#                f.write(categorie + ': \t' + repr(self.cate.cate['CATE'][categorie][0]) + '\n')
+#                for word in self.cate.cate['CATE'][categorie][1] :
+#                    f.write('\t' + word + ': \t' + repr(self.cate.cate['CATE'][categorie][1][word]) + '\n')
+#            for word in self.cate.cate['TOCATE'] :
+#                f.write(word + ':\t' + repr(self.cate.cate['TOCATE'][word]) + '\n')
+#        print("dictionnary exported !")
+
+
+class ListForCate(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSorterMixin):
+
+    def __init__(self, parent, dlist = {}, first = [], usefirst = False, menu = True):
+        wx.ListCtrl.__init__( self, parent, -1, style=wx.LC_REPORT|wx.LC_VIRTUAL|wx.LC_HRULES|wx.LC_VRULES|wx.LC_EDIT_LABELS|wx.LC_SINGLE_SEL)
+        self.parent=parent
+        self.cate = self.parent.cate
+        self.dlist= self.cate.getcate()
+        self.first = first
+        self.il = wx.ImageList(20, 20)
+        a={"sm_up":"GO_UP","sm_dn":"GO_DOWN","w_idx":"WARNING","e_idx":"ERROR","i_idx":"QUESTION", "p_idx":"PLUS"}
+        for k,v in list(a.items()):
+            s="self.%s= self.il.Add(wx.ArtProvider.GetBitmap(wx.ART_%s,wx.ART_TOOLBAR,(20,20)))" % (k,v)
+            exec(s)
+        self.SetImageList(self.il, wx.IMAGE_LIST_SMALL)
+        self.attr1 = wx.ListItemAttr()
+        self.attr1.SetBackgroundColour((230, 230, 230))
+        self.attr2 = wx.ListItemAttr()
+        self.attr2.SetBackgroundColour("light blue")
+        #self.attrselected = wx.ListItemAttr()
+        #self.attrselected.SetBackgroundColour("red")
+        self.SetListFont()
+        self.selected = {}
+        i = 0
+        for name in ['Categories'] + self.first :
+            self.InsertColumn(i,name,wx.LIST_FORMAT_LEFT)
+            i += 1
+        self.itemDataMap = self.dlist
+        self.itemIndexMap = list(self.dlist.keys())
+        self.SetItemCount(len(self.dlist))
+        listmix.ListCtrlAutoWidthMixin.__init__(self)
+        listmix.ColumnSorterMixin.__init__(self, len(self.first) + 1)
+
+        #self.SortListItems(1, False)
+        self.SetColumnWidth(0, 300)
+        self.SetColumnWidth(1, wx.LIST_AUTOSIZE)
+
+        self.Bind(wx.EVT_LIST_BEGIN_DRAG, self.StartDrag)
+        self.Bind(wx.EVT_LIST_ITEM_FOCUSED, self.ShowWords)
+        self.Bind(wx.EVT_LIST_ITEM_ACTIVATED, self.OnBeginEdit)
+        self.Bind(wx.EVT_LIST_END_LABEL_EDIT, self.OnEndEdit)
+        self.Bind(wx.EVT_LIST_ITEM_SELECTED, self.OnItemSelected)
+        self.Bind(wx.EVT_LIST_COL_CLICK, self.OnSortColumn)
+
+        if self.GetItemCount() != 0 :
+            #self.SetItemState(0, wx.LIST_STATE_SELECTED, wx.LIST_STATE_SELECTED)
+            self.Select(0, on=1)
+
+        dt = MyListDropCate(self)
+        self.SetDropTarget(dt)
+
+    def OnSortColumn(self, evt) :
+        print(self.currentItem)
+        evt.Skip()
+
+    def SetListFont(self) :
+        self.SetFont(wx.Font(16, wx.FONTFAMILY_DEFAULT, wx.FONTSTYLE_NORMAL, wx.FONTWEIGHT_NORMAL))
+
+    def OnGetItemImage(self, item):
+        return self.p_idx
+
+    def OnBeginEdit(self, event) :
+        self.EditLabel(event.GetIndex())
+        event.Skip()
+
+    def OnEndEdit(self, event) :
+        newlabel = event.GetLabel()
+        idx = event.GetIndex()
+        oldlabel = self.GetItemText(idx)
+        if newlabel not in self.cate.cate['CATE'] :
+            self.cate.cate['CATE'][newlabel] = self.cate.cate['CATE'][oldlabel]
+            del(self.cate.cate['CATE'][oldlabel])
+        self.RefreshData(self.cate.getcate())
+
+    def ShowWords(self, event) :
+        index = event.GetIndex()
+        try :
+            data = self.cate.getcatewords(self.GetItemText(index))
+            self.parent.m_listCateWords.RefreshData(data)
+            self.parent.m_listCateWords.SetSelection(0)
+        except :
+            pass
+        event.Skip()
+
+    def RefreshData(self, data):
+        try :
+            item = self.currentItem
+        except :
+            item = 0
+        self.itemDataMap = data
+        self.itemIndexMap = list(data.keys())
+        self.SetItemCount(len(data))
+        order = self._colSortFlag[self._col]
+        self.SortListItems(self._col, order)
+        #self.SetColumnWidth(0, wx.LIST_AUTOSIZE)
+        #self.SetColumnWidth(1, wx.LIST_AUTOSIZE)
+        #self.SetColumnWidth(0,300)
+        self.parent.OnStat()
+        self.Refresh()
+        try :
+            self.SetSelection(item)
+            self.Focus(item)
+        except :
+            pass
+
+    def GetListCtrl(self):
+        return self
+
+    def GetSortImages(self):
+        return (self.sm_dn, self.sm_up)
+
+    def SortItems(self, sorter=None):
+        try :
+            select = self.currentItem
+            word = self.GetItemData(select)[0]
+        except Exception as e: print('word',e)
+
+        listTemp = sorted(self.itemDataMap.items(),
+            key=lambda x:x[1][self._col], reverse= (self._colSortFlag[self._col]!=True))
+        dlist = dict([[line[0],line[1]] for line in listTemp])
+        self.itemDataMap = dlist
+        self.itemIndexMap = list(dlist.keys())
+        self.Refresh() # redraw the list
+        try :
+            formes = [self.getColumnText(i, 0) for i in range(self.GetItemCount())]
+            idx = [i for i, val in enumerate(formes) if val == word][0]
+            self.SetSelection(idx)
+            self.Focus(idx)
+        except Exception as e: print(e)
+
+    def OnGetItemText(self, item, col):
+        index=self.itemIndexMap[item]
+        s = self.itemDataMap[index][col]
+        if isinstance(s, (int, float)):
+            return str(s)
+        else:
+            return s #modification pour python 3
+
+    def OnGetItemAttr(self, item):
+#        if self.IsSelected(index) == True :
+#            print('selected', index)
+        index=self.itemIndexMap[item]
+        if item % 2 :
+           return self.attr1
+        else :
+           return self.attr2
+
+    def getselectedwords(self) :
+        words = [self.getColumnText(self.GetFirstSelected(), 0)]
+        last = self.GetFirstSelected()
+        while self.GetNextSelected(last) != -1:
+            last = self.GetNextSelected(last)
+            words.append(self.getColumnText(last, 0))
+        return words
+
+    def GetString(self):
+        return self.getselectedwords()[0]
+
+    def GetSelections(self):
+        return self.getselectedwords()
+
+    def getColumnText(self, index, col):
+        item = self.GetItem(index, col)
+        return item.GetText()
+
+    def GetItemData(self, item) :
+        index=self.itemIndexMap[item]
+        s = self.itemDataMap[index]
+        return s
+
+    def OnItemSelected(self, event):
+        self.currentItem = event.GetIndex() #event.m_itemIndex
+        event.Skip()
+
+    def SetSelection(self, index) :
+        for i in range(0, self.GetItemCount(), 1) :
+            self.Select(i, on=0)
+        self.Select(index, on=1)
+
+    def GetItemInfo(self, idx):
+        """
+        Collect all relevant data of a listitem, and put it in a list.
+        """
+
+        l = []
+        l.append(idx) # We need the original index, so it is easier to eventualy delete it.
+        l.append(self.GetItemData(idx)) # Itemdata.
+        l.append(self.GetItemText(idx)) # Text first column.
+        for i in range(1, self.GetColumnCount()): # Possible extra columns.
+            l.append(self.GetItem(idx, i).GetText())
+        l.append('cate')
+        return l
+
+
+    def StartDrag(self, event):
+        """
+        Put together a data object for drag-and-drop _from_ this list.
+        """
+
+        l = []
+        idx = -1
+        while True: # Find all the selected items and put them in a list.
+            idx = self.GetNextItem(idx, wx.LIST_NEXT_ALL, wx.LIST_STATE_SELECTED)
+            if idx == -1:
+                break
+            l.append(self.GetItemInfo(idx))
+
+        # Pickle the items list.
+        itemdata = pickle.dumps(l, 1)
+        # Create our own data format and use it
+        # in a Custom data object.
+        ldata = wx.CustomDataObject("ListCtrlItems")
+        ldata.SetData(itemdata)
+        # Now make a data object for the  item list.
+        data = wx.DataObjectComposite()
+        data.Add(ldata)
+
+        # Create drop source and begin drag-and-drop.
+        dropSource = wx.DropSource(self)
+        dropSource.SetData(data)
+        res = dropSource.DoDragDrop(flags=wx.Drag_DefaultMove)
+
+        # If move, we want to remove the item from this list.
+        if res == wx.DragMove and l[0][-1] != 'cate' :
+            # It's possible we are dragging/dropping from this list to this list.
+            # In which case, the index we are removing may have changed...
+
+            # Find correct position.
+            l.reverse() # Delete all the items, starting with the last item.
+            for i in l:
+                pos = self.FindItem(i[0], i[2])
+                self.DeleteItem(pos)
+
+
+    def Insert(self, x, y, seq):
+        """
+        Insert text at given x, y coordinates --- used with drag-and-drop.
+        """
+
+        # Find insertion point.
+        index, flags = self.HitTest((x, y))
+
+        if index == wx.NOT_FOUND: # Not clicked on an item.
+            if flags & (wx.LIST_HITTEST_NOWHERE|wx.LIST_HITTEST_ABOVE|wx.LIST_HITTEST_BELOW): # Empty list or below last item.
+                index = self.GetItemCount() # Append to end of list.
+            elif self.GetItemCount() > 0:
+                if y <= self.GetItemRect(0).y: # Clicked just above first item.
+                    index = -1 # Append to top of list.
+                else:
+                    index = self.GetItemCount() + 1 # Append to end of list.
+        else: # Clicked on an item.
+            # Get bounding rectangle for the item the user is dropping over.
+            rect = self.GetItemRect(index)
+
+            # If the user is dropping into the lower half of the rect,
+            # we want to insert _after_ this item.
+            # Correct for the fact that there may be a heading involved.
+            #if y > rect.y - self.GetItemRect(0).y + rect.height/2:
+            #    index += 1
+        print('Insert de ListForCate', index, flags)
+        word, eff = seq[0][1]
+        if seq[0][-1] == 'words' :
+            if index < self.GetItemCount() and index != -1 :
+                for val in seq :
+                    word, eff = val[1]
+                    self.cate.addwordincate(self.GetItemData(index)[0], word, eff)
+            else :
+                index = self.GetItemCount()
+                if self.cate.addcatefromwordtocate(word, eff) :
+                    pass
+                else :
+                    dial = wx.MessageDialog(self, "This category name is already used", style=wx.OK|wx.CENTRE).ShowModal()
+            self.dlist = self.cate.getcate()
+            self.RefreshData(self.dlist)
+            self.parent.m_listToCate.RefreshData(self.cate.getwordstocate())
+            self.parent.m_listCateWords.RefreshData(self.parent.cate.getcatewords(self.GetItemData(index)[0]))
+            for i in range(0, self.GetItemCount(), 1):
+                self.Select(i, on=0)
+            self.Select(index, on=1)
+            self.parent.butsave.SetBackgroundColour((255,0,0,255))
+        if seq[0][-1] == 'catewords' :
+            if index < self.GetItemCount() and index != -1 :
+                for val in seq :
+                    word, eff = val[1]
+                    if word not in self.cate.cate['CATE'][self.GetItemData(index)[0]][1] :
+                        self.cate.changewordcate(self.GetItemData(index)[0], word, eff)
+                        self.parent.butsave.SetBackgroundColour((255,0,0,255))
+            else :
+                index = self.GetItemCount()
+                if self.cate.addcatefromwordcate(word, eff) :
+                    self.parent.butsave.SetBackgroundColour((255,0,0,255))
+                else :
+                    dial = wx.MessageDialog(self, "This category name is already used", style=wx.OK|wx.CENTRE).ShowModal()
+                #self.cate.addwordincate(self.GetItemData(index)[0], word, eff)
+            self.dlist = self.cate.getcate()
+            self.RefreshData(self.dlist)
+            self.parent.m_listToCate.RefreshData(self.cate.getwordstocate())
+            #self.parent.m_listCateWords.RefreshData(self.parent.cate.getcatewords(self.GetItemData(index)[0]))
+            self.parent.m_listCateWords.RefreshData(self.parent.cate.getcatewords(self.GetItemData(self.currentItem)[0]))
+            #self.SetSelection(index)
+
+
+
+#        for i in seq: # Insert the item data.
+#            idx = self.InsertItem(index, i[2])
+#            self.SetItemData(idx, i[1])
+#            for j in range(1, self.GetColumnCount()):
+#                try: # Target list can have more columns than source.
+#                    self.SetItem(idx, j, i[2+j])
+#                except:
+#                    pass # Ignore the extra columns.
+#            index += 1
+
+class ListForWords(ListForCate) :
+    def __init__(self, parent, dlist = {}, first = []):
+        wx.ListCtrl.__init__( self, parent, -1, style=wx.LC_REPORT|wx.LC_VIRTUAL|wx.LC_HRULES|wx.LC_VRULES)
+        self.parent=parent
+        self.cate = self.parent.cate
+        self.dlist= self.cate.getwordstocate()
+        self.first = first
+        self.il = wx.ImageList(16, 16)
+        a={"sm_up":"GO_UP","sm_dn":"GO_DOWN","w_idx":"WARNING","e_idx":"ERROR","i_idx":"QUESTION"}
+        for k,v in list(a.items()):
+            s="self.%s= self.il.Add(wx.ArtProvider.GetBitmap(wx.ART_%s,wx.ART_TOOLBAR,(16,16)))" % (k,v)
+            exec(s)
+        self.SetImageList(self.il, wx.IMAGE_LIST_SMALL)
+        self.attr1 = wx.ListItemAttr()
+        self.attr1.SetBackgroundColour((230, 230, 230))
+        self.attr2 = wx.ListItemAttr()
+        self.attr2.SetBackgroundColour("light blue")
+        #self.attrselected = wx.ListItemAttr()
+        #self.attrselected.SetBackgroundColour("red")
+        self.SetListFont()
+        self.selected = {}
+        i = 0
+        for name in ['To categorize'] + self.first :
+            self.InsertColumn(i,name,wx.LIST_FORMAT_LEFT)
+            i += 1
+        self.itemDataMap = self.dlist
+        self.itemIndexMap = list(self.dlist.keys())
+        self.SetItemCount(len(self.dlist))
+        listmix.ListCtrlAutoWidthMixin.__init__(self)
+        listmix.ColumnSorterMixin.__init__(self, len(self.first) + 1)
+        self.SetColumnWidth(0, 400)
+        self.SetColumnWidth(1, wx.LIST_AUTOSIZE)
+
+        self.SortListItems(1, False)
+
+        self.Bind(wx.EVT_LIST_BEGIN_DRAG, self.StartDrag)
+        self.Bind(wx.EVT_LIST_ITEM_ACTIVATED, self.OnDClick)
+        self.Bind(wx.EVT_LIST_ITEM_SELECTED, self.OnItemSelected)
+
+        dt = MyListDropCate(self)
+        self.SetDropTarget(dt)
+
+    def OnDClick(self, event) :
+        idx = event.GetIndex()
+        event.Skip()
+
+    def OnItemSelected(self, event):
+        self.currentItem = event.GetIndex() #event.m_itemIndex
+        event.Skip()
+
+    def OnGetItemImage(self, item):
+        return self.i_idx
+
+    def GetItemInfo(self, idx):
+        """
+        Collect all relevant data of a listitem, and put it in a list.
+        """
+
+        l = []
+        l.append(idx) # We need the original index, so it is easier to eventualy delete it.
+        l.append(self.GetItemData(idx)) # Itemdata.
+        l.append(self.GetItemText(idx)) # Text first column.
+        for i in range(1, self.GetColumnCount()): # Possible extra columns.
+            l.append(self.GetItem(idx, i).GetText())
+        l.append('words')
+        return l
+
+
+    def StartDrag(self, event):
+        """
+        Put together a data object for drag-and-drop _from_ this list.
+        """
+
+        l = []
+        idx = -1
+        while True: # Find all the selected items and put them in a list.
+            idx = self.GetNextItem(idx, wx.LIST_NEXT_ALL, wx.LIST_STATE_SELECTED)
+            if idx == -1:
+                break
+            l.append(self.GetItemInfo(idx))
+
+        # Pickle the items list.
+        itemdata = pickle.dumps(l, 1)
+        # Create our own data format and use it
+        # in a Custom data object.
+        ldata = wx.CustomDataObject("ListCtrlItems")
+        ldata.SetData(itemdata)
+        # Now make a data object for the  item list.
+        data = wx.DataObjectComposite()
+        data.Add(ldata)
+
+        # Create drop source and begin drag-and-drop.
+        dropSource = wx.DropSource(self)
+        dropSource.SetData(data)
+        res = dropSource.DoDragDrop(flags=wx.Drag_DefaultMove)
+
+
+        # If move, we want to remove the item from this list.
+        if res == wx.DragMove and l[0][-1] != 'words':
+            # It's possible we are dragging/dropping from this list to this list.
+            # In which case, the index we are removing may have changed...
+
+            # Find correct position.
+            l.reverse() # Delete all the items, starting with the last item.
+            for i in l:
+                pos = self.FindItem(i[0], i[2])
+                print('detruit : ',pos)
+                self.DeleteItem(pos)
+
+
+    def Insert(self, x, y, seq):
+        """
+        Insert text at given x, y coordinates --- used with drag-and-drop.
+        """
+
+        # Find insertion point.
+        index, flags = self.HitTest((x, y))
+
+        if index == wx.NOT_FOUND: # Not clicked on an item.
+            if flags & (wx.LIST_HITTEST_NOWHERE|wx.LIST_HITTEST_ABOVE|wx.LIST_HITTEST_BELOW): # Empty list or below last item.
+                index = self.GetItemCount() # Append to end of list.
+            elif self.GetItemCount() > 0:
+                if y <= self.GetItemRect(0).y: # Clicked just above first item.
+                    index = 0 # Append to top of list.
+                else:
+                    index = self.GetItemCount() + 1 # Append to end of list.
+        else: # Clicked on an item.
+            # Get bounding rectangle for the item the user is dropping over.
+            rect = self.GetItemRect(index)
+
+            # If the user is dropping into the lower half of the rect,
+            # we want to insert _after_ this item.
+            # Correct for the fact that there may be a heading involved.
+            if y > rect.y - self.GetItemRect(0).y + rect.height/2:
+                index += 1
+        word, eff = seq[0][1]
+        if seq[0][-1] == 'catewords' :
+            for val in seq :
+                word, eff = val[1]
+                categorie = self.cate.findcatefromword(word)
+                self.cate.addwordinwords(categorie, word, eff)
+            self.RefreshData(self.cate.getwordstocate())
+            self.parent.m_listCate.RefreshData(self.cate.getcate())
+            self.parent.m_listCateWords.RefreshData(self.cate.getcatewords(categorie))
+            self.parent.butsave.SetBackgroundColour((255,0,0,255))
+        elif seq[0][-1] == 'cate' :
+            categorie = seq[0][1][0]
+            self.cate.delcate(categorie)
+            self.RefreshData(self.cate.getwordstocate())
+            self.parent.m_listCate.RefreshData(self.cate.getcate())
+            self.parent.m_listCate.SetSelection(0)
+            self.parent.m_listCateWords.RefreshData(self.cate.getcatewords(self.parent.m_listCate.GetItemText(0)))
+            self.parent.butsave.SetBackgroundColour((255,0,0,255))
+
+
+class ListForCateWords(ListForCate) :
+    def __init__(self, parent, dlist = {}, first = []):
+        wx.ListCtrl.__init__( self, parent, -1, style=wx.LC_REPORT|wx.LC_VIRTUAL|wx.LC_HRULES|wx.LC_VRULES)
+        self.parent=parent
+        self.cate = self.parent.cate
+        self.dlist= {}
+        self.first = first
+        self.il = wx.ImageList(16, 16)
+        a={"sm_up":"GO_UP","sm_dn":"GO_DOWN","p_idx":"TIP","e_idx":"ERROR","i_idx":"QUESTION"}
+        for k,v in list(a.items()):
+            s="self.%s= self.il.Add(wx.ArtProvider.GetBitmap(wx.ART_%s,wx.ART_TOOLBAR,(16,16)))" % (k,v)
+            exec(s)
+        self.SetImageList(self.il, wx.IMAGE_LIST_SMALL)
+        self.attr1 = wx.ListItemAttr()
+        self.attr1.SetBackgroundColour((230, 230, 230))
+        self.attr2 = wx.ListItemAttr()
+        self.attr2.SetBackgroundColour("light blue")
+        #self.attrselected = wx.ListItemAttr()
+        #self.attrselected.SetBackgroundColour("red")
+        self.SetListFont()
+        self.selected = {}
+        i = 0
+        for name in ['Contents'] + self.first :
+            self.InsertColumn(i,name,wx.LIST_FORMAT_LEFT)
+            i += 1
+        self.itemDataMap = self.dlist
+        self.itemIndexMap = list(self.dlist.keys())
+        self.SetItemCount(len(self.dlist))
+        listmix.ListCtrlAutoWidthMixin.__init__(self)
+        listmix.ColumnSorterMixin.__init__(self, len(self.first) + 1)
+        self.SetColumnWidth(0, 300)
+        self.SetColumnWidth(1, wx.LIST_AUTOSIZE)
+
+        self.SortListItems(1, False)
+
+        self.Bind(wx.EVT_LIST_BEGIN_DRAG, self.StartDrag)
+        self.Bind(wx.EVT_LIST_ITEM_SELECTED, self.OnItemSelected)
+
+        dt = MyListDropCate(self)
+        self.SetDropTarget(dt)
+
+    def OnItemSelected(self, event):
+        self.currentItem = event.GetIndex() #event.m_itemIndex
+        event.Skip()
+
+
+    def GetItemInfo(self, idx):
+        """
+        Collect all relevant data of a listitem, and put it in a list.
+        """
+
+        l = []
+        l.append(idx) # We need the original index, so it is easier to eventualy delete it.
+        l.append(self.GetItemData(idx)) # Itemdata.
+        l.append(self.GetItemText(idx)) # Text first column.
+        for i in range(1, self.GetColumnCount()): # Possible extra columns.
+            l.append(self.GetItem(idx, i).GetText())
+        l.append('catewords')
+        return l
+
+
+    def StartDrag(self, event):
+        """
+        Put together a data object for drag-and-drop _from_ this list.
+        """
+
+        l = []
+        idx = -1
+        while True: # Find all the selected items and put them in a list.
+            idx = self.GetNextItem(idx, wx.LIST_NEXT_ALL, wx.LIST_STATE_SELECTED)
+            if idx == -1:
+                break
+            l.append(self.GetItemInfo(idx))
+
+        # Pickle the items list.
+        itemdata = pickle.dumps(l, 1)
+        # Create our own data format and use it
+        # in a Custom data object.
+        ldata = wx.CustomDataObject("ListCtrlItems")
+        ldata.SetData(itemdata)
+        # Now make a data object for the  item list.
+        data = wx.DataObjectComposite()
+        data.Add(ldata)
+
+        # Create drop source and begin drag-and-drop.
+        dropSource = wx.DropSource(self)
+        dropSource.SetData(data)
+        res = dropSource.DoDragDrop(flags=wx.Drag_DefaultMove)
+        print('current')
+        print(self.parent.m_listCate.currentItem)
+
+        # If move, we want to remove the item from this list.
+        #if res == wx.DragMove:
+        #    # It's possible we are dragging/dropping from this list to this list.
+        #    # In which case, the index we are removing may have changed...
+
+        #    # Find correct position.
+        #    l.reverse() # Delete all the items, starting with the last item.
+        #    for i in l:
+        #        pos = self.FindItem(i[0], i[2])
+        #        self.DeleteItem(pos)
+
+
+    def Insert(self, x, y, seq):
+        """
+        Insert text at given x, y coordinates --- used with drag-and-drop.
+        """
+        pass
+        # Find insertion point.
+        index, flags = self.HitTest((x, y))
+#
+#        if index == wx.NOT_FOUND: # Not clicked on an item.
+#            if flags & (wx.LIST_HITTEST_NOWHERE|wx.LIST_HITTEST_ABOVE|wx.LIST_HITTEST_BELOW): # Empty list or below last item.
+#                index = self.GetItemCount() # Append to end of list.
+#            elif self.GetItemCount() > 0:
+#                if y <= self.GetItemRect(0).y: # Clicked just above first item.
+#                    index = 0 # Append to top of list.
+#                else:
+#                    index = self.GetItemCount() + 1 # Append to end of list.
+#        else: # Clicked on an item.
+#            # Get bounding rectangle for the item the user is dropping over.
+#            rect = self.GetItemRect(index)
+#
+#            # If the user is dropping into the lower half of the rect,
+#            # we want to insert _after_ this item.
+#            # Correct for the fact that there may be a heading involved.
+#            if y > rect.y - self.GetItemRect(0).y + rect.height/2:
+#                index += 1
+        print('Insert de ListForCateWords', index,flags)
+        if seq[0][-1] == 'words' :
+            for val in seq :
+                word, eff = val[1]
+                categorie = self.parent.m_listCate.getColumnText(self.parent.m_listCate.GetFirstSelected(),0)
+                self.cate.addwordincate(categorie, word, eff)
+            self.dlist = self.cate.getwordstocate()
+            self.RefreshData(self.cate.getcatewords(categorie))
+            self.parent.m_listCate.RefreshData(self.cate.getcate())
+            self.parent.m_listToCate.RefreshData(self.dlist)
+            self.parent.butsave.SetBackgroundColour((255,0,0,255))
+
+
+class MyListDropCate(wx.DropTarget):
+    """
+    Drop target for simple lists.
+    """
+    def __init__(self, source):
+        """
+        Arguments:
+        source: source listctrl.
+        """
+        wx.DropTarget.__init__(self)
+
+        #------------
+
+        self.dv = source
+
+        #------------
+
+        # Specify the type of data we will accept.
+        self.data = wx.CustomDataObject("ListCtrlItems")
+        self.SetDataObject(self.data)
+
+    #-----------------------------------------------------------------------
+
+    # Called when OnDrop returns True.
+    # We need to get the data and do something with it.
+    def OnData(self, x, y, d):
+        """
+        ...
+        """
+
+        # Copy the data from the drag source to our data object.
+        if self.GetData():
+            # Convert it back to a list and give it to the viewer.
+            ldata = self.data.GetData()
+            l = pickle.loads(ldata)
+            self.dv.Insert(x, y, l)
+
+        # What is returned signals the source what to do
+        # with the original data (move, copy, etc.)  In this
+        # case we just return the suggested value given to us.
+        return d
diff --git a/functions.py b/functions.py

index a18fbc5..3472b77 100755 (executable)
--- a/functions.py
+++ b/functions.py
@@ -83,13 +83,13 @@ class TGen :
      def write(self, path = None):
          if path is None :
              path = self.path
-        with open(path, 'w') as f :
+        with open(path, 'w', encoding='utf8') as f :
              f.write('\n'.join(['\t'.join([val] + self.tgen[val]) for val in self.tgen]))
  
      def writetable(self, pathout, tgens, totocc):
          etoiles = list(totocc.keys())
          etoiles.sort()
-        with open(pathout, 'w') as f :
+        with open(pathout, 'w', encoding='utf8') as f :
              line = '\t'.join(['tgens'] + etoiles) + '\n'
              f.write(line)
              for t in tgens :
@@ -117,7 +117,9 @@ class History :
          self.read()
  
      def read(self) :
-        d = shelve.open(self.filein)
+        with open(self.filein, 'r') as fjson :
+            d = json.load(fjson)
+#        d = shelve.open(self.filein, protocol=1)
          self.history = d.get('history', [])
          self.matrix = d.get('matrix', [])
          self.ordercorpus = dict([[corpus['uuid'], i] for i, corpus in enumerate(self.history)])
@@ -125,13 +127,16 @@ class History :
          self.analyses = dict([[analyse['uuid'], analyse] for corpus in self.history for analyse in corpus.get('analyses', [])])
          self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
          self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
-        d.close()
+#        d.close()
  
      def write(self) :
-        d = shelve.open(self.filein)
+        d = {}
          d['history'] = self.history
          d['matrix'] = self.matrix
-        d.close()
+        with open(self.filein, 'w') as f :
+            f.write(json.dumps(d, indent=4, default=str))
+       #d = shelve.open(self.filein, protocol=1)
+       #d.close()
  
      def add(self, analyse) :
          log.info('add to history %s' % analyse.get('corpus_name', 'pas un corpus'))
@@ -364,7 +369,7 @@ class DoConf :
          if outfile is None :
              outfile = self.configfile
          outfile = normpath_win32(outfile)
-        with open(outfile, 'w') as f :
+        with open(outfile, 'w', encoding="utf-8") as f :
              f.write(txt)
              #self.conf.write(f)
  
@@ -384,7 +389,7 @@ class DoConf :
  
  
  def write_tab(tab, fileout) :
-        csvWriter = csv.writer(open(fileout, 'w'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
+        csvWriter = csv.writer(open(fileout, 'w', newline='', encoding='utf8'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
          csvWriter.writerows(tab)
  
  class BugDialog(wx.Dialog):
@@ -428,7 +433,7 @@ def CreateIraFile(DictPathOut, clusternb, corpname='corpus_name', section = 'ana
      AnalyseConf.set(section, 'clusternb', clusternb)
      AnalyseConf.set(section, 'corpus_name', corpname)
  
-    fileout = open(DictPathOut['ira'], 'w')
+    fileout = open(DictPathOut['ira'], 'w', encoding='utf8')
      AnalyseConf.write(fileout)
      fileout.close()
  
@@ -511,10 +516,10 @@ def treat_line_alceste(i, line) :
          line[5] = str(float(line[5].replace(',', '.')))[0:7]
      return [i, int(line[0]), int(line[1]), float(line[2]), float(line[3]), line[6], line[4], line[5]]
  
-def ReadProfileAsDico(File, Alceste=False, encoding = sys.getdefaultencoding()):
+def ReadProfileAsDico(File, Alceste=False, encoding = 'utf8'):
      dictlem = {}
      print('lecture des profiles')
-    FileReader = codecs.open(File, 'r', encoding)
+    FileReader = open(File, 'r', encoding='utf8')
      Filecontent = FileReader.readlines()
      FileReader.close()
      DictProfile = {}
@@ -674,7 +679,7 @@ def PlaySound(parent):
              print('pas de son')
  
  def ReadDicoAsDico(dicopath):
-    with codecs.open(dicopath, 'r', 'UTF8') as f:
+    with open(dicopath, 'r', encoding='UTF8') as f:
          content = f.readlines()
      lines = [line.rstrip('\n\r').replace('\n', '').replace('"', '').split('\t') for line in content if line != '']
      return dict([[line[0], line[1:]] for line in lines])
@@ -691,9 +696,9 @@ def ReadLexique(parent, lang = 'french', filein = None):
          else :
              parent.lexique = ReadDicoAsDico(filein)
  
-def ReadList(filein, encoding = sys.getdefaultencoding(), sep = ';'):
+def ReadList(filein, encoding = 'utf8', sep = ';'):
      #file = open(filein)
-    with codecs.open(filein, 'r', encoding) as f :
+    with open(filein, 'r', encoding='utf8') as f :
          content = f.read()
      content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
      #file = codecs.open(filein, 'r', encoding)
@@ -805,11 +810,12 @@ def launchcommand(mycommand):
      Popen(mycommand)
  
  def print_liste(filename,liste):
-    with open(filename,'w') as f :
+    with open(filename,'w', encoding='utf8') as f :
          for graph in liste :
              f.write(';'.join(graph) +'\n')
-def read_list_file(filename, encoding = sys.getdefaultencoding()):
-    with codecs.open(filename,'r', encoding) as f:
+
+def read_list_file(filename, encoding = 'utf8'):
+    with open(filename,'r', encoding='utf8') as f:
          content=f.readlines()
          ncontent=[line.replace('\n','').split(';') for line in content if line.strip() != '']
      return ncontent
@@ -880,7 +886,7 @@ def doconcorde(corpus, uces, mots, uci = False) :
  
  def getallstcarac(corpus, analyse) :
     pathout = PathOut(analyse['ira'])
-   profils =  ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, self.encoding)
+   profils =  ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, 'utf8')
     print(profils)
  
  def read_chd(filein, fileout):
@@ -1016,7 +1022,7 @@ def translateprofile(corpus, dictprofile, lf='it', lt='fr', maxword = 50) :
  
  def write_translation_profile(prof, lems, language, dictpathout) :
      if os.path.exists(dictpathout['translations.txt']) :
-        with codecs.open(dictpathout['translations.txt'], 'r', 'utf8') as f :
+        with open(dictpathout['translations.txt'], 'r', encoding='utf8') as f :
              translist = f.read()
          translist = [line.split('\t') for line in translist.splitlines()]
      else :
@@ -1034,13 +1040,13 @@ def write_translation_profile(prof, lems, language, dictpathout) :
              elif line[0] == '*****' :
                  rest[i] = ['*****','*','*', '*', '*', '*']
          toprint += rest
-    with open(dictpathout['translation_profile_%s.csv' % language], 'w') as f :
+    with open(dictpathout['translation_profile_%s.csv' % language], 'w', encoding='utf8') as f :
          f.write('\n'.join([';'.join(line) for line in toprint]))
-    with open(dictpathout['translation_words_%s.csv' % language], 'w') as f :
+    with open(dictpathout['translation_words_%s.csv' % language], 'w', encoding='utf8') as f :
          f.write('\n'.join(['\t'.join([val, lems[val]]) for val in lems]))
      if 'translation_profile_%s.csv' % language not in [val[0] for val in translist] :
          translist.append(['translation_profile_%s.csv' % language, 'translation_words_%s.csv' % language])
-        with open(dictpathout['translations.txt'], 'w') as f :
+        with open(dictpathout['translations.txt'], 'w', encoding='utf8') as f :
              f.write('\n'.join(['\t'.join(line) for line in translist]))
  
  def makesentidict(infile, language) :
diff --git a/guifunct.py b/guifunct.py

index 2b6afe7..2612792 100644 (file)
--- a/guifunct.py
+++ b/guifunct.py
@@ -163,7 +163,7 @@ class SelectColumn :
                  dial.Destroy()
                  column = [actives.index(val) for val in indexes]
                  column.sort()
-                with open(pathout, 'w') as f :
+                with open(pathout, 'w' ,encoding='utf8') as f :
                      f.write('\n'.join([repr(val) for val in column]))
                  self.ok = True
              else :
@@ -172,7 +172,7 @@ class SelectColumn :
              self.ok = True
              if selected is None :
                  selected = [i for i in range(0, len(actives))]
-            with open(pathout, 'w') as f :
+            with open(pathout, 'w', encoding='utf8') as f :
                  f.write('\n'.join([repr(i) for i in selected]))
  
  
@@ -950,7 +950,7 @@ class PrepSimi :
                      indexes.append(self.dial.listcol.getColumnText(last,0))
                  column = [actives.index(val) for val in indexes]
                  column.sort()
-                with open(pathout, 'w') as f :
+                with open(pathout, 'w', encoding='utf8') as f :
                      f.write('\n'.join([repr(val) for val in column]))
                  self.make_param()
                  self.dial.Destroy()
@@ -1288,15 +1288,15 @@ class ExportMetaTable :
  
  
  def redosimi(self, evt) :
-    with open(self.pathout['selected.csv'],'r') as f :
+    with open(self.pathout['selected.csv'],'r', encoding='utf8') as f :
          selected = f.read()
      selected = [int(val) for val in selected.splitlines()]
      if self.actives is None :
-        with codecs.open(self.pathout['actives.csv'], 'r') as f :
+        with open(self.pathout['actives.csv'], 'r', encoding='utf8') as f :
              self.actives = f.read()
          self.actives = self.actives.splitlines()#[act for act in self.actives.splitlines()]
      if os.path.exists(self.pathout['actives_nb.csv']) :
-        with open(self.pathout['actives_nb.csv'], 'r') as f :
+        with open(self.pathout['actives_nb.csv'], 'r', encoding='utf8') as f :
              act_nb = f.read()
              act_nb = act_nb.splitlines()
          dictcol = dict([[i, [self.actives[i], int(act_nb[i])]] for i, val in enumerate(self.actives)])
@@ -1313,7 +1313,7 @@ def redosimi(self, evt) :
          prep.parametres = self.parametres
          order_actives = [[i, act, self.corpus.getlemeff(act)] for i, act in enumerate(self.actives)]
          order_actives = sorted(order_actives, key=itemgetter(2), reverse = True)
-        with open(self.pathout['selected.csv'], 'w') as f :
+        with open(self.pathout['selected.csv'], 'w', encoding='utf8') as f :
              f.write('\n'.join([repr(order_actives[val][0]) for val in self.parametres['selected']]))
      if prep.val == wx.ID_OK or evt is None:
          self.parametres = prep.parametres
diff --git a/iramuteq.py b/iramuteq.py

index 5c61d5b..a8afd32 100755 (executable)
--- a/iramuteq.py
+++ b/iramuteq.py
@@ -56,6 +56,7 @@ from tabfrequence import Frequences, FreqMultiple
  from tableau import Tableau
  from tabrsimple import InputText
  from tabsimi import DoSimi
+from tabcatego import Categorisation
  from tabsplitvar import SplitMatrixFromVar
  from tabverges import Prototypical
  from textaslexico import Lexico
@@ -93,6 +94,7 @@ ID_CHECKCORPUS = wx.Window.NewControlId()
  ID_Tabcontent = wx.Window.NewControlId()
  ID_AFCM = wx.Window.NewControlId()
  ID_SIMI = wx.Window.NewControlId()
+ID_CATE = wx.Window.NewControlId()
  ID_CloseTab = wx.Window.NewControlId()
  ID_SaveTab = wx.Window.NewControlId()
  ID_CreateText = wx.Window.NewControlId()
@@ -182,8 +184,8 @@ class printer(object) :
      def flush(self):
          pass
  
-sys.stderr = writer()
-sys.stdout = printer()
+#sys.stderr = writer()
+#sys.stdout = printer()
  
  images_analyses = {
          'textroot' : 'textroot.png',
@@ -217,7 +219,8 @@ images_analyses = {
          'preferences' : 'preferences.png',
          'exportmetatable' : 'exportmetatable.png',
          'importdmi' : 'twitter.png',
-        'labbe' : 'spec.png'
+        'labbe' : 'spec.png',
+        'categorisation' : 'spec.png',
           }
  
  
@@ -237,6 +240,8 @@ class IraFrame(wx.Frame):
          log.info('version : %s' % ConfigGlob.get('DEFAULT', 'version'))
          print(size)
          wx.Frame.__init__(self, parent, id, title, pos, size, style)
+        #Font
+        self.SetFont(wx.Font(12, wx.FONTFAMILY_DEFAULT, wx.FONTSTYLE_NORMAL, wx.FONTWEIGHT_NORMAL))
          # configuration
          self.AppliPath = AppliPath
          self.images_path = os.path.join(AppliPath,'images')
@@ -352,6 +357,7 @@ class IraFrame(wx.Frame):
                         [ID_SIMI, _("Similarities Analysis"), 'simimatrix'],
                         [ID_proto, _("Prototypical Analysis"), 'proto'],
                         [ID_Splitfromvar, _("Split from variable"), 'subcorpusmeta'],
+                       [ID_CATE, _("ElCaTeGoRiZatoR"), 'categorisation'],
                        ]
          for analyse in matanalyses :
              if not isinstance(analyse, dict) :
@@ -489,10 +495,10 @@ class IraFrame(wx.Frame):
          #------------------------------------------------------------------------------------------------
          # fichier d'historique de Iramuteq
          #------------------------------------------------------------------------------------------------
-        #if not os.path.exists(os.path.join(UserConfigPath, 'history.db')) :
-        #    with open(os.path.join(UserConfigPath, 'history.db'), 'w') as f :
-        #        f.write('')
-        self.history = History(os.path.join(UserConfigPath, 'historyIramuteq'))
+        if not os.path.exists(os.path.join(UserConfigPath, 'history.db')) :
+            with open(os.path.join(UserConfigPath, 'history.db'), 'w') as f :
+                f.write('{}')
+        self.history = History(os.path.join(UserConfigPath, 'history.db'))
          # l'extension ".db" est ajoutée automatiquement par le module
  
          #------------------------------------------------------------------------------------------------
@@ -505,7 +511,7 @@ class IraFrame(wx.Frame):
              Name("lefttree").
              Caption(_("Historic")).
              Left().
-            MinSize(wx.Size(200,500)).
+            MinSize(wx.Size(300,400)).
              Layer(1).
              Position(1).
              CloseButton(False).
@@ -603,6 +609,7 @@ class IraFrame(wx.Frame):
          self.Bind(wx.EVT_MENU, self.OnAFCM, id=ID_AFCM)
          self.Bind(wx.EVT_MENU, self.OnProto, id=ID_proto)
          self.Bind(wx.EVT_MENU, self.OnSplitVar, id = ID_Splitfromvar)
+        self.Bind(wx.EVT_MENU, self.OnCategorisation, id = ID_CATE)
          #self.Bind(wx.EVT_MENU, self.OnRCode, id=ID_RCODE) #???
          #self.Bind(wx.EVT_MENU, self.OnSplitVar, id=ID_SPLITVAR) #???
          #self.Bind(wx.EVT_MENU, self.OnCheckcorpus, id = ID_CHECKCORPUS) #???
@@ -701,7 +708,12 @@ class IraFrame(wx.Frame):
          BestRPath = False
          if not CheckRPath(self.PathPath) :
              if sys.platform == 'win32':
-                BestRPath = FindRPAthWin32()
+                if os.path.exists(self.AppliPath + '\\R\\R\\x64\\R.exe') :
+                    BestRPath = self.AppliPath + '\\R\\R\\bin\\x64\\R.exe'
+                elif os.path.exists(self.AppliPath + '\\R\\R\\i386\\R.exe') :
+                    BestRPath = self.AppliPath + '\\R\\R\\bin\\i386\\R.exe'
+                else :
+                    BestRPath = FindRPAthWin32()
              elif os.path.exists(self.AppliPath + '/R/R') :
                  BestRPath = self.AppliPath + '/R/R'
              else:
@@ -759,7 +771,7 @@ class IraFrame(wx.Frame):
          if not menu_pos is None :
              self.mb.EnableTop(menu_pos, Show)
              self.mb.Refresh()
-        self._mgr.Update()
+        #self._mgr.Update()
  
      #--------------------------------------------------------------------
      # fin de __init__ du wx.Frame
@@ -913,13 +925,16 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, États-Unis."""
          if isinstance(parent, IraFrame) :
              npage = self.nb.GetPage(new)
              if 'parametres' in dir(npage) :
-                self.tree.GiveFocus(uuid=npage.parametres['uuid'])
+                #self.tree.GiveFocus(uuid=npage.parametres['uuid'])
                  if npage.parametres.get('matrix', False) :
                      self.ShowMenu('text', False)
                      self.ShowMenu('matrix', True)
                  elif npage.parametres.get('corpus', False) :
                      self.ShowMenu('text')
                      self.ShowMenu('matrix', False)
+        #self._mgr.Update()
+        #wx.CallAfter(self.nb.SendSizeEvent)
+        #self.Refresh()
  
      # action ou évènement ?
      def OnCloseTab(self, evt):
@@ -961,6 +976,9 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, États-Unis."""
                  pane.Hide()
          self._mgr.GetPane(panel).Show()
          self._mgr.Update()
+        print('show a pane refresh')
+        wx.CallAfter(self.nb.SendSizeEvent)
+        self.Refresh()
  
      # action ou évènement ?
      def OnAcceuil(self, event):
@@ -1010,6 +1028,10 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, États-Unis."""
      def OnSimiTab(self, event, matrix = None):
          self.analyse_matrix(DoSimi, matrix = matrix, analyse_type = 'simimatrix', dlgnb = 5)
  
+    def OnCategorisation(self, event, matrix = None) :
+        self.analyse_matrix(Categorisation, matrix = matrix, analyse_type = 'categorisation', dlgnb = 1)
+
+
      def OnCHDReinert(self, event, matrix = None):
          #if matrix is None :
          #    matrix = self.tree.getmatrix()
diff --git a/layout.py b/layout.py

index 5fd493c..7c7edd9 100644 (file)
--- a/layout.py
+++ b/layout.py
@@ -38,6 +38,7 @@ from PrintRScript import write_afc_graph, print_simi3d, PrintSimiScript
  from profile_segment import ProfileSegment
  from listlex import *
  from Liste import *
+from elcategorizator import ElCategorizator
  from search_tools import SearchFrame
  from dialog import PrefGraph, PrefExport, PrefSimpleFile, PrefDendro, SimpleDialog, ImageViewer
  from guifunct import SelectColumn, PrepSimi, PrefSimi, redosimi
@@ -419,7 +420,7 @@ class OpenCHDS():
          for i in range(0, clnb) :
              clusternames[i] = ' '.join(['%i' % (i + 1), _('Cluster'),  '%i' % (i + 1)])
          if os.path.exists(self.pathout['classes_names.txt']) :
-            with codecs.open(self.pathout['classes_names.txt'], 'r', self.parent.syscoding) as f :
+            with open(self.pathout['classes_names.txt'], 'r', encoding='utf8') as f :
                  clusternames_ = f.read()
              clusternames_ =  dict([[i, ' '.join([repr(i + 1), line])] for i, line in enumerate(clusternames_.splitlines())])
              clusternames.update(clusternames_)
@@ -430,7 +431,7 @@ class OpenCHDS():
          panel = wx.Panel(parent, -1)
          sizer1 = wx.BoxSizer(wx.VERTICAL)
          if os.path.exists(DictPathOut['pre_rapport']):
-            with codecs.open(DictPathOut['pre_rapport'], 'r') as f :
+            with open(DictPathOut['pre_rapport'], 'r', encoding='utf8') as f :
                  txt = f.read()
              self.debtext = txt
          else :
@@ -543,7 +544,7 @@ class OpenCHDS():
              self.parametres['tgenspec'] = os.path.join(self.parametres['pathout'], 'tgenchi2.csv')
              TgenLayout(panel)
          if os.path.exists(self.dictpathout['translations.txt']) :
-            with codecs.open(self.dictpathout['translations.txt'], 'r', 'utf8') as f:
+            with open(self.dictpathout['translations.txt'], 'r', encoding='utf8') as f:
                  translist = f.read()
              translist = [line.split('\t') for line in translist.splitlines()]
              for line in translist :
@@ -562,7 +563,7 @@ class OpenCHDS():
  
      def opentrans(self, trans) :
          prof = ReadProfileAsDico(self.dictpathout[trans[0]], False)
-        with codecs.open(self.dictpathout[trans[1]], 'r') as f :
+        with open(self.dictpathout[trans[1]], 'r', encoding='utf8') as f :
              lems = f.read()
          lems = [line.split('\t') for line in lems.splitlines()]
          lems = dict(lems)
@@ -633,7 +634,7 @@ def PrintRapport(self, corpus, parametres, istxt = True):
  
      txt += ''.join([sep, '###########################', sep, _('time'), ' : %s' % parametres.get('time', ''), sep, '###########################', sep])
      # ecriture du resultat dans le fichier
-    with open(self.pathout['pre_rapport'], 'w') as f :
+    with open(self.pathout['pre_rapport'], 'w', encoding='utf8') as f :
          f.write(txt)
  
  
@@ -735,6 +736,7 @@ class dolexlayout :
          self.dictpathout = StatTxtPathOut(parametres['pathout'])
          #self.corpus.read_corpus_from_shelves(self.corpus.dictpathout['db'])
          self.parent = ira
+        self.corpus.parametres['syscoding'] = 'UTF8'
          self.encoding = self.corpus.parametres['syscoding']
          self.parametres = parametres
          self.DictSpec, first = ReadList(self.dictpathout['tablespecf'], self.corpus.parametres['syscoding'])
@@ -823,10 +825,10 @@ class StatLayout:
          lcle = {'total' :'total.csv', 'formes_actives':'formes_actives.csv', 'formes_supplémentaires':'formes_supplémentaires.csv', 'hapax': 'hapax.csv'}
          self.result = {}
          for key in lcle :
-            with codecs.open(self.pathout[lcle[key]], 'r', sys.getdefaultencoding()) as f :
+            with open(self.pathout[lcle[key]], 'r', encoding='utf-8') as f :
                  self.result[key] = [line.split(';') for line in f.read().splitlines()]
                  self.result[key] = dict([[i,[line[0],int(line[1]), line[2]]] for i, line in enumerate(self.result[key])])
-        with open(self.pathout['glob.txt'], 'r') as f :
+        with open(self.pathout['glob.txt'], 'r', encoding='utf-8') as f :
              self.result['glob'] = f.read()
  
  
@@ -1324,7 +1326,7 @@ class SimiLayout(DefaultTextLayout) :
          #saveAsGEXF(graph, filepath = fileout)
          """ % (ffr(self.pathout['RData.RData']), ffr(self.parent.RscriptsPath['simi']), fileout)
          filetmp = tempfile.mktemp()
-        with open(filetmp, 'w') as f :
+        with open(filetmp, 'w', encoding='utf8') as f :
              f.write(txt)
          exec_rcode(self.ira.RPath, filetmp)
          mss = wx.MessageDialog(self.ira, fileout, _('File exported'), wx.OK)
@@ -1397,6 +1399,11 @@ class ProtoLayout(DefaultMatLayout) :
          #self.ira.nb.SetSelection(self.ira.nb.GetPageCount() - 1)
          #self.ira.ShowAPane("Tab_content")
  
+class CateLayout(DefaultMatLayout) :
+
+    def dolayout(self) :
+        TabCate = ElCategorizator(self.ira.nb, self.pathout, self.tableau)
+        self.ira.nb.AddPage(TabCate, ' - '.join([_('ElCaTeGoRiZaToR'), self.parametres['name']]))
  
  class SimiMatLayout(DefaultMatLayout) :
  
diff --git a/openanalyse.py b/openanalyse.py

index e894bee..873f4b3 100644 (file)
--- a/openanalyse.py
+++ b/openanalyse.py
@@ -14,7 +14,7 @@ import logging
  # import des fichiers du projet
  #------------------------------------
  from chemins import ChdTxtPathOut, StatTxtPathOut, PathOut
-from layout import OpenCHDS, dolexlayout, StatLayout, WordCloudLayout, OpenCorpus, SimiLayout, SimiMatLayout, ProtoLayout, MatLayout, FreqLayout, Chi2Layout, LabbeLayout
+from layout import OpenCHDS, dolexlayout, StatLayout, WordCloudLayout, OpenCorpus, SimiLayout, SimiMatLayout, ProtoLayout, MatLayout, FreqLayout, Chi2Layout, LabbeLayout, CateLayout
  from corpus import Corpus, copycorpus
  from tableau import Tableau
  from functions import DoConf, ReadDicoAsDico
@@ -181,3 +181,6 @@ class OpenAnalyse():
              FreqLayout(self.parent, corpus, self.conf)
          elif self.conf['type'] == 'chi2' or self.conf['type'] == 'chi2mcnemar':
              Chi2Layout(self.parent, corpus, self.conf)
+        elif self.conf['type'] == 'categorisation' :
+            CateLayout(self.parent, corpus, self.conf)
+            print(self.conf)
diff --git a/profile_segment.py b/profile_segment.py

index 932a2d6..e432d96 100644 (file)
--- a/profile_segment.py
+++ b/profile_segment.py
@@ -68,7 +68,7 @@ class ProfileSegment() :
          PrintProfile(n1,to[4],NULL,to[5],NULL,clnb,"%s","%s")
          """ % (ffr(self.corpus.dictpathout['segments_classes']), ffr(self.dictpathout['prof_seg']), ffr(self.dictpathout['antiprof_seg']))
          fo = tempfile.mktemp(dir=self.parent.TEMPDIR)
-        with open(fo, 'w') as f :
+        with open(fo, 'w', encoding='utf8') as f :
              f.write(txt)
          pid = exec_rcode(self.parent.RPath, fo, wait=False)
          while pid.poll() == None :
@@ -79,7 +79,7 @@ class ProfileSegment() :
      def do_layout(self) :
          SelectTab = self.parent.nb.GetSelection()
          page = self.parent.nb.GetPage(SelectTab).TabChdSim
-        prof_seg = ReadProfileAsDico(self.dictpathout['prof_seg'], True, self.parent.syscoding)
+        prof_seg = ReadProfileAsDico(self.dictpathout['prof_seg'], True, 'utf8')
          prof_seg_nb = aui.AuiNotebook(self.parent, -1, wx.DefaultPosition)
          for i in range(0, len(self.corpus.lc)) :
              ntab = ProfListctrlPanel(self.parent, self, prof_seg[str(i + 1)], False, i + 1)
@@ -142,7 +142,7 @@ class ProfilType() :
              # write.csv2(to[[3]], file = "%s")
              # % (self.outprof)
          fo = tempfile.mktemp(dir=self.parent.TEMPDIR)
-        with open(fo, 'w') as f :
+        with open(fo, 'w', encoding='utf8') as f :
              f.write(txt)
          pid = exec_rcode(self.parent.RPath, fo, wait=False)
          while pid.poll() == None :
diff --git a/search_list.py b/search_list.py

index 4fd031f..6aff98b 100644 (file)
--- a/search_list.py
+++ b/search_list.py
@@ -73,10 +73,7 @@ class SearchList(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSort
          self.Bind(wx.EVT_COMMAND_RIGHT_CLICK, self.OnRightClick)
          # for wxGTK
          self.Bind(wx.EVT_RIGHT_UP, self.OnRightClick)
-        self.itemDataMap = dlist
-        self.itemIndexMap = list(dlist.keys())
-        self.SetItemCount(len(dlist))
-        listmix.ColumnSorterMixin.__init__(self, len(first)+2)
+
          #-----------------------------------------------------------
          first = ['id','formes']+first
          for i, name in enumerate(first) :
@@ -84,6 +81,10 @@ class SearchList(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSort
          self.SetColumnWidth(0, wx.LIST_AUTOSIZE)
          for i in range(1,len(first)-1):
              self.SetColumnWidth(i, 130)
+        self.itemDataMap = dlist
+        self.itemIndexMap = list(dlist.keys())
+        self.SetItemCount(len(dlist))
+        listmix.ColumnSorterMixin.__init__(self, len(first)+2)
          self.SortListItems(0, True)
  
      # Used by the ColumnSorterMixin, see wx/lib/mixins/listctrl.py
@@ -115,7 +116,10 @@ class SearchList(wx.ListCtrl, listmix.ListCtrlAutoWidthMixin, listmix.ColumnSort
      def OnGetItemText(self, item, col):
          index=self.itemIndexMap[item]
          s = self.itemDataMap[index][col]
-        return s
+        if isinstance(s, (int,float)):
+            return str(s)
+        else :
+            return s
  
      def OnGetItemData(self, item) :
          index = self.itemIndexMap[item]
diff --git a/search_tools.py b/search_tools.py

index 5dd03a0..bddaa5f 100644 (file)
--- a/search_tools.py
+++ b/search_tools.py
@@ -38,7 +38,7 @@ class SearchFrame(wx.Frame):
          dlg = wx.ProgressDialog("Traitements", "lecture du tableau...", maximum = 4, parent=self, style = wx.PD_APP_MODAL|wx.PD_AUTO_HIDE|wx.PD_ELAPSED_TIME)
          dlg.Center()
          dlg.Update(1)
-        with codecs.open(corpus.dictpathout['chisqtable'], 'r', parent.SysEncoding) as f :
+        with open(corpus.dictpathout['chisqtable'], 'r', encoding='utf8') as f :
              chisqtable = [line.replace('\n','').replace('"','').replace(',','.').split(';') for line in f]
          first = chisqtable[0]
          first.pop(0)
@@ -49,9 +49,9 @@ class SearchFrame(wx.Frame):
          #self.text_ctrl_1 = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE)
          #nbactives = len(self.corpus.actives)
          dlg.Update(3)
-        with open(corpus.dictpathout['ContEtOut'], 'r') as f :
+        with open(corpus.dictpathout['ContEtOut'], 'r', encoding='utf8') as f :
              nbetoiles = len(f.readlines())
-        with open(corpus.dictpathout['Contout'], 'r') as f :
+        with open(corpus.dictpathout['Contout'], 'r', encoding='utf8') as f :
              nbactives = len(f.readlines())
          dlg.Update(4, "Ouverture...")
          self.liste = SearchList(self, parent, self.dchisqtable, first, nbactives, nbetoiles) 
diff --git a/tabcatego.py b/tabcatego.py

new file mode 100644 (file)

index 0000000..5f4052e
--- /dev/null
+++ b/tabcatego.py
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+#Author: Pierre Ratinaud
+#Copyright (c) 2008-2020 Pierre Ratinaud
+#modification pour python 3 : Laurent Mérat, 6x7 - mai 2020
+#License: GNU/GPL
+
+#------------------------------------
+# import des modules python
+#------------------------------------
+import os
+import tempfile
+import json
+import datetime
+from time import sleep
+from uuid import uuid4
+
+import langue
+langue.run()
+
+#------------------------------------
+# import des modules wx
+#------------------------------------
+import wx
+
+#------------------------------------
+# import des fichiers du projet
+#------------------------------------
+from chemins import ConstructPathOut, simipath, ffr, PathOut
+from functions import print_liste, exec_rcode, read_list_file, check_Rresult, indices_simi, treat_var_mod, normpath_win32
+from dialog import SelectColDial, FreqDialog
+from analysematrix import AnalyseMatrix
+from listlex import *
+from configparser import RawConfigParser
+
+
+class Categorisation(AnalyseMatrix):
+
+    def doparametres(self, dlg = None) :
+        self.listactives = self.parametres.get('listactives', False)
+        self.actives = self.parametres.get('actives', False)
+        self.cmd = self.parametres.get('cmd', False)
+        self.dirout = self.parametres.get('pathout', False)
+        self.Source = None
+        if self.dirout :
+            self.pathout = PathOut(dirout = self.dirout)
+        if not self.parametres.get('isopen', False) :
+            if self.tableau is None :
+                self.tableau = parent.tableau
+            self.tableau.parametres['mineff'] = 0
+            dialcol = FreqDialog(self.parent, self.tableau.get_colnames(), _("Select columns"), size=(600, 250), showNA = False)
+            dialcol.CenterOnParent()
+            res = dialcol.ShowModal()
+            if res == wx.ID_OK :
+                if not self.actives :
+                    self.tableau.selected_col = dialcol.m_listBox1.GetSelections()
+                    actives = self.tableau.getactlistfromselection(self.tableau.selected_col)
+                else :
+                    actives = self.actives
+                if isinstance(actives, dict) :
+                    actives = [[val, actives[val][0]] for val in actives]
+                    self.tableau.actives = dict(actives)
+                self.tableau.make_listactives()
+                actives = dict([[i, val] for i, val in enumerate(actives)])
+                if not self.pathout :
+                    self.parametres['pathout'] = ConstructPathOut(self.parametres['pathout'], 'Categorisation')
+                else :
+                    self.parametres['pathout'] = self.dirout
+                self.pathout.createdir(self.parametres['pathout'])
+                self.pathout.dirout = self.parametres['pathout']
+                #self.doanalyse2()
+            else :
+                dialcol.Destroy()
+                self.parametres = None
+                return False
+
+    def doanalyse(self) :
+        count = 1
+        keepGoing = self.dlg.Update(count)
+        #----------------------------------------------------------------
+        self.DictForme = {}
+        self.Min = 10
+        self.Linecontent = []
+        self.tableau.dictpathout = self.pathout
+        #self.DictPathOut['mat01'] = fromprof
+        #self.PrintScript()
+        cate = {'TOCATE':{}, 'CATE':{}}
+        for val in self.tableau.actives :
+            cate['TOCATE'][val] = self.tableau.actives[val][0]
+        with open(self.pathout['cate.json'], 'w', encoding='utf8') as f :
+            f.write(json.dumps(cate))
+        self.tableau.save_tableau(self.pathout['analyse.db'])
+        self.dlg.Destroy()
+
diff --git a/tableau.py b/tableau.py

index 032b395..7325279 100644 (file)
--- a/tableau.py
+++ b/tableau.py
@@ -173,7 +173,6 @@ class Tableau() :
              self.read_ods()
          self.parametres['csvfile'] = os.path.join(self.parametres['pathout'], 'csvfile.csv')
          self.make_tmpfile()
-        print(self.parametres)
          DoConf().makeoptions(['matrix'],[self.parametres], self.parametres['ira'])
          self.parent.history.addMatrix(self.parametres)
  
@@ -181,7 +180,7 @@ class Tableau() :
          self.parametres['csvfile'] = os.path.join(self.parametres['pathout'], 'csvfile.csv')
          self.make_tmpfile()
          DoConf().makeoptions(['matrix'],[self.parametres], self.parametres['ira'])
-        self.parent.history.addMatrix(self.parametres)        
+        self.parent.history.addMatrix(self.parametres)
  
      def read_xls(self) :
          #FIXME : encodage
@@ -207,7 +206,7 @@ class Tableau() :
  
      def read_csv(self) :
          with codecs.open(self.parametres['originalpath'], 'r', self.parametres['encodage']) as f :
-            content = f.read() 
+            content = f.read()
          self.linecontent = [line.split(self.parametres['colsep']) for line in content.splitlines()]
          self.linecontent = [[val.replace('"','').replace(';',' ').replace('\t', ' ').strip() for val in line] for line in self.linecontent]
  
@@ -234,7 +233,7 @@ class Tableau() :
          else :
              self.rownames = [repr(i) for i in range(self.rownb)]
              self.idname = 'identifiant'
-        self.csvtable = [[self.idname] + self.colnames] + [[self.rownames[i]] + self.linecontent[i] for i in range(len(self.rownames))] 
+        self.csvtable = [[self.idname] + self.colnames] + [[self.rownames[i]] + self.linecontent[i] for i in range(len(self.rownames))]
          self.write_csvfile()
  
      def read_csvfile(self):
@@ -287,10 +286,9 @@ class Tableau() :
      def getactlistfromselection(self, listact) :
          selcol = self.select_col(listact)
          self.actives = self.make_dico(selcol)
-        return [[val, self.actives[val][0]] for val in self.actives]       
+        return [[val, self.actives[val][0]] for val in self.actives]
  
      def make_listactives(self) :
-        print(self.actives, self.parametres['mineff'])
          self.listactives = [val for val in self.actives if val != 'NA' and self.actives[val][0] >= self.parametres['mineff']]
  
      def write01(self, fileout, dico, linecontent) :
@@ -300,7 +298,7 @@ class Tableau() :
          for i, forme in enumerate(self.listactives) :
              for line in dico[forme][1] :
                  out[line][i] = '1'
-        #out = [[self.rownames[i]] + out[i] for i in range(len(linecontent))] 
+        #out = [[self.rownames[i]] + out[i] for i in range(len(linecontent))]
          #out.insert(0,[self.idname] + self.listactives)
          out.insert(0, self.listactives)
          with open(fileout, 'w') as f :
@@ -321,7 +319,7 @@ class Tableau() :
                      if forme[0] == '*':
                          UpdateDico(self.sups, forme, i)
                      else:
-                        UpdateDico(self.actives, forme, i)        
+                        UpdateDico(self.actives, forme, i)
          self.listactives = [val for val in self.actives if self.actives[val][0] >= self.parametres['mineff']]
          table = [['0' for i in range(len(self.listactives))] for j in range(self.rownb)]
          for i, val in enumerate(self.listactives) :
@@ -378,9 +376,9 @@ class Tableau() :
                          if sup in self.linecontent[uce]:
                              line[i + 1] += 1
              tablecontet.append([line[0]] + [repr(don) for don in line if type(don) == type(1)])
-            
+
          self.printtable(self.pathout['ContEtOut'], tablecontet)
-        self.printtable(self.pathout['Contout'], tablecont)        
+        self.printtable(self.pathout['Contout'], tablecont)
  
      def get_colnames(self) :
          return self.colnames[:]
diff --git a/textaslexico.py b/textaslexico.py

index ff1bc49..14f054b 100644 (file)
--- a/textaslexico.py
+++ b/textaslexico.py
@@ -148,7 +148,7 @@ class Lexico(AnalyseText) :
          save.image("%s")
          """ % ffr(self.dictpathout['RData'])
          tmpfile = tempfile.mktemp(dir=self.parent.TEMPDIR)
-        tmpscript = open(tmpfile, 'w')
+        tmpscript = open(tmpfile, 'w' ,encoding='utf8')
          tmpscript.write(txt)
          tmpscript.close()
          self.doR(tmpfile, dlg = self.dlg, message = 'R...')
diff --git a/textsimi.py b/textsimi.py

index 2a0ea52..08c9f63 100644 (file)
--- a/textsimi.py
+++ b/textsimi.py
@@ -59,7 +59,7 @@ class SimiTxt(AnalyseText):
          else :
              order_actives = [[i, act, self.corpus.getlemeff(act)] for i, act in enumerate(self.actives)]
              order_actives = sorted(order_actives, key=itemgetter(2), reverse = True)
-            with open(self.pathout['selected.csv'], 'w') as f :
+            with open(self.pathout['selected.csv'], 'w', encoding='utf8') as f :
                  f.write('\n'.join([repr(order_actives[val][0]) for val in self.parametres['selected']]))
              continu = True
          if continu :
@@ -129,7 +129,7 @@ class SimiTxt(AnalyseText):
          self.parametres['nbactives'] = len(self.actives)
          self.parametres['fromprof'] = False
          self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv'])
-        with open(self.pathout['actives.csv'], 'w') as f :
+        with open(self.pathout['actives.csv'], 'w', encoding='utf8') as f :
              f.write('\n'.join(self.actives))
  
  
@@ -197,9 +197,9 @@ class SimiFromCluster(SimiTxt) :
          self.parametres['nbactives'] = len(self.actives)
          self.parametres['fromprof'] = True
          self.corpus.make_and_write_sparse_matrix_from_classe(self.actives, self.corpus.lc[self.numcluster], self.pathout['mat01.csv'])
-        with open(self.pathout['actives.csv'], 'w') as f :
+        with open(self.pathout['actives.csv'], 'w', encoding='utf8') as f :
              f.write('\n'.join(self.actives))
-        with open(self.pathout['actives_nb.csv'], 'w') as f :
+        with open(self.pathout['actives_nb.csv'], 'w', encoding='utf8') as f :
              f.write('\n'.join([repr(val) for val in self.lfreq]))
-        with open(self.pathout['actives_chi.csv'], 'w') as f :
+        with open(self.pathout['actives_chi.csv'], 'w', encoding='utf8') as f :
              f.write('\n'.join([repr(val) for val in self.lchi]))
diff --git a/textstat.py b/textstat.py

index 1effd2a..56e0942 100755 (executable)
--- a/textstat.py
+++ b/textstat.py
@@ -88,9 +88,9 @@ class Stat(AnalyseText) :
          open_file_graph("%s", width = 400, height = 400)
          barplot(table(stsize[,1]))
          dev.off()
-        """ % (self.pathout['stsize.csv'], self.pathout['segments_size.png'])
+        """ % (ffr(self.pathout['stsize.csv']), ffr(self.pathout['segments_size.png']))
          tmpscript = tempfile.mktemp(dir=self.parent.TEMPDIR)
-        with open(tmpscript, 'w') as f :
+        with open(tmpscript, 'w', encoding='utf8') as f :
              f.write(txt)
          pid = exec_rcode(self.parent.RPath, tmpscript, wait = False)
          while pid.poll() == None :
@@ -104,8 +104,8 @@ class Stat(AnalyseText) :
              if key != 'glob' :
                  dico = self.result[key]
                  toprint = [[dico[val][0],repr(dico[val][1]), dico[val][2]] for val in dico]
-                with open(self.pathout['%s.csv' % key], 'w') as f :
+                with open(self.pathout['%s.csv' % key], 'w',  encoding='utf8') as f :
                      f.write('\n'.join([';'.join([val for val in ligne]) for ligne in toprint]))
              else :
-                with open(self.pathout['%s.txt' % 'glob'], 'w') as f :
+                with open(self.pathout['%s.txt' % 'glob'], 'w',  encoding='utf8') as f :
                      f.write(self.result['glob'])
diff --git a/textwordcloud.py b/textwordcloud.py

index d8b6ada..aa669eb 100644 (file)
--- a/textwordcloud.py
+++ b/textwordcloud.py
@@ -95,7 +95,7 @@ class WordCloud(AnalyseText):
  
      def make_wordcloud(self) :
          act = ['\t'.join([act, repr(self.corpus.getlemeff(act))]) for act in self.actives]
-        with open(self.pathout['actives_eff.csv'], 'w') as f :
+        with open(self.pathout['actives_eff.csv'], 'w', encoding='utf8') as f :
              f.write('\n'.join(act))
  
  
@@ -115,7 +115,7 @@ class ClusterCloud(WordCloud):
              else: 
                  tokeep = 2
              prof = [[val[0], int(round(val[tokeep]))] for val in prof]
-            with open(self.pathout['actives_eff.csv'], 'w') as f :
+            with open(self.pathout['actives_eff.csv'], 'w', encoding='utf8') as f :
                  f.write('\n'.join(['\t'.join([val[0], repr(val[1])]) for val in prof]))
              dictcol = dict([[i, val] for i, val in enumerate(prof)])
              self.actives = [val[0] for val in prof]
diff --git a/tree.py b/tree.py

index 70e5d7b..4b61d84 100755 (executable)
--- a/tree.py
+++ b/tree.py
@@ -568,7 +568,7 @@ class LeftTree(CT.CustomTreeCtrl):
                  ucis_txt, ucestxt = doconcorde(corpus, nuces, page.la, uci = uci)
                  items = ['<br>'.join([ucis_txt[j], '<table bgcolor = #1BF0F7 border=0><tr><td><b>score : %.2f</b></td></tr></table><br>' % ntab2[j][0], ucestxt[j]]) for j, uce in enumerate(nuces)]
                  filename = self.page.pathout['st_caract_cl_%i.html' % (rcl+1)]
-                with open(filename, 'w') as f :
+                with open(filename, 'w', encoding='utf8') as f :
                      f.write('\n'.join(items))
              dlg.Destroy()
  
@@ -598,7 +598,7 @@ class LeftTree(CT.CustomTreeCtrl):
          uci = False
          fileout = os.path.join(os.path.dirname(self.page.pathout['ira']), 'segmented_corpus.txt')
          txt = self.page.corpus.make_cut_corpus(uci = uci)
-        with open(fileout, 'w') as f :
+        with open(fileout, 'w', encoding='utf8') as f :
              f.write(txt)
          msg = '\n'.join([_("Done !"), fileout])
          dlg = wx.MessageDialog(self.parent, msg, _("Segmented corpus"), wx.OK | wx.ICON_INFORMATION)
@@ -619,7 +619,7 @@ class LeftTree(CT.CustomTreeCtrl):
              else :
                  uci = True
              txt = self.page.corpus.make_colored_corpus(uci = uci)
-            with open(fileout, 'w') as f :
+            with open(fileout, 'w', encoding='utf8') as f :
                  f.write(txt)
              msg = ' !\n'.join([_("Done"), _("Open in a web browser ?")])
              dlg = wx.MessageDialog(self.parent, msg, "Corpus en couleur", wx.NO | wx.YES | wx.ICON_QUESTION)
@@ -657,7 +657,7 @@ class LeftTree(CT.CustomTreeCtrl):
                  find = True
                  break
          if not find :
-            open_antiprofil(self.page, self.page.dictpathout['ANTIPRO_OUT'], self.parent.syscoding)
+            open_antiprofil(self.page, self.page.dictpathout['ANTIPRO_OUT'], 'utf8')
              self.page.TabChdSim.SetSelection(self.page.TabChdSim.GetPageCount() - 1)
  
      def OnTranslate(self, evt) :
@@ -687,7 +687,7 @@ class LeftTree(CT.CustomTreeCtrl):
          if res == wx.ID_OK :
              fileout = dial.fbb.GetValue()
              dial.Destroy()
-            with open(fileout, 'w') as f :
+            with open(fileout, 'w', encoding='utf8') as f :
                  f.write(self.page.debtext + '\n' + GetTxtProfile(self.page.DictProfile, self.page.cluster_size))
              msg = "Fini !"
              dlg = wx.MessageDialog(self.parent, msg, _("Report"), wx.OK | wx.ICON_INFORMATION)
@@ -1099,6 +1099,7 @@ class LeftTree(CT.CustomTreeCtrl):
          if event is not None :
              item = event.GetItem()
              pydata = self.GetPyData(item)
+        print('selchange',pydata)
          if pydata is not None :
              if 'corpus_name' in pydata or 'corpus' in pydata :
                  self.ira.ShowMenu('matrix', False)
@@ -1118,8 +1119,11 @@ class LeftTree(CT.CustomTreeCtrl):
                          if self.page.parametres['uuid'] == pydata['uuid'] :
                              self.parent.nb.SetSelection(i)
                              break
-        if event is not None :
-            event.Skip()
+        #self.parent._mgr.Update()
+        #wx.CallAfter(self.parent.nb.SendSizeEvent)
+        self.parent.Refresh()
+        #if event is not None :
+        #    event.Skip()
  
      def OnSelChanging(self, event):
          item = event.GetItem()
author	pierre <pierre.ratinaud@univ-tlse2.fr>
	Mon, 12 Feb 2024 16:00:10 +0000 (17:00 +0100)
committer	pierre <pierre.ratinaud@univ-tlse2.fr>
	Mon, 12 Feb 2024 16:00:10 +0000 (17:00 +0100)
PrintRScript.py		patch \| blob \| history
ProfList.py		patch \| blob \| history
Rscripts/CHD.R		patch \| blob \| history
Rscripts/simi.R		patch \| blob \| history
checkinstall.py		patch \| blob \| history
configparser.py		patch \| blob \| history
configuration/global.cfg		patch \| blob \| history
configuration/iramuteq.cfg		patch \| blob \| history
corpus.py		patch \| blob \| history
dialog.py		patch \| blob \| history
elcategorizator.py	[new file with mode: 0644]	patch \| blob
functions.py		patch \| blob \| history
guifunct.py		patch \| blob \| history
iramuteq.py		patch \| blob \| history
layout.py		patch \| blob \| history
openanalyse.py		patch \| blob \| history
profile_segment.py		patch \| blob \| history
search_list.py		patch \| blob \| history
search_tools.py		patch \| blob \| history
tabcatego.py	[new file with mode: 0644]	patch \| blob
tableau.py		patch \| blob \| history
textaslexico.py		patch \| blob \| history
textsimi.py		patch \| blob \| history
textstat.py		patch \| blob \| history
textwordcloud.py		patch \| blob \| history
tree.py		patch \| blob \| history