www.iramuteq.org Git - iramuteq/blob - functions.py

   1 #!/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #Author: Pierre Ratinaud
   4 #Copyright (c) 2008-2012 Pierre Ratinaud
   5 #License: GNU/GPL
   6
   7 import wx
   8 import re
   9 from ConfigParser import ConfigParser
  10 from subprocess import Popen, call, PIPE
  11 import thread
  12 import os
  13 import ast
  14 import sys
  15 import csv
  16 import platform
  17 import traceback
  18 import codecs
  19 import locale
  20 import datetime
  21 from copy import copy
  22 from shutil import copyfile
  23 import shelve
  24 #from dialog import BugDialog
  25 import logging
  26
  27 log = logging.getLogger('iramuteq')
  28
  29
  30 indices_simi = [u'cooccurrence' ,'pourcentage de cooccurrence',u'Russel',u'Jaccard', 'Kulczynski1', 'Kulczynski2', 'Mountford', 'Fager', 'simple matching', 'Hamman', 'Faith', 'Tanimoto', 'Dice', 'Phi', 'Stiles', 'Michael', 'Mozley', 'Yule', 'Yule2', 'Ochiai', 'Simpson', 'Braun-Blanquet','Chi-squared', 'Phi-squared', 'Tschuprow', 'Cramer', 'Pearson', 'binomial']
  31
  32
  33
  34 def open_folder(folder):
  35     if sys.platform == "win32":
  36         os.startfile(folder)
  37     else:
  38         opener ="open" if sys.platform == "darwin" else "xdg-open"
  39         call([opener, folder])
  40
  41 def normpath_win32(path) :
  42     if not sys.platform == 'win32' :
  43         return path
  44     while '\\\\' in path :
  45         path = path.replace('\\\\', '\\')
  46     if path.startswith('\\') and not path.startswith('\\\\') :
  47         path = '\\' + path
  48     return path
  49
  50 class TGen :
  51     def __init__(self, path = None, encoding = 'utf8'):
  52         self.path = path
  53         self.tgen = {}
  54         self.encoding = encoding
  55
  56     def __getitem__(self, key):
  57         return self.tgen[key]
  58
  59     def read(self, path = None):
  60         if path is None :
  61             path = self.path
  62         with codecs.open(path, 'r', self.encoding) as f :
  63             tgen = f.read()
  64         tgen = [line.split('\t') for line in tgen.splitlines()]
  65         tgen = dict([[line[0], line[1:]] for line in tgen])
  66         self.tgen = tgen
  67         self.path = path
  68
  69     def write(self, path = None):
  70         if path is None :
  71             path = self.path
  72         with open(path, 'w') as f :
  73             f.write('\n'.join(['\t'.join([val] + self.tgen[val]) for val in self.tgen]).encode(self.encoding))
  74
  75     def writetable(self, pathout, tgens, totocc):
  76         etoiles = totocc.keys()
  77         etoiles.sort()
  78         with open(pathout, 'w') as f :
  79             line = '\t'.join([u'tgens'] + etoiles) + '\n'
  80             f.write(line.encode(self.encoding))
  81             for t in tgens :
  82                 line = '\t'.join([t] + [`tgens[t][et]` for et in etoiles]) + '\n'
  83                 f.write(line.encode(self.encoding))
  84             i = 0
  85             totname = 'total'
  86             while totname + `i` in tgens :
  87                 i += 1
  88             totname = totname + `i`
  89             line = '\t'.join([totname] + [`totocc[et]` for et in etoiles]) + '\n'
  90             f.write(line.encode(self.encoding))
  91
  92 class History :
  93     def __init__(self, filein, syscoding = 'utf8') :
  94         self.filein = filein
  95         self.syscoding = syscoding
  96         self.corpus = {}
  97         self.openedcorpus = {}
  98         self.openedmatrix = {}
  99         self.orph = []
 100         self.analyses = {}
 101         self.history = []
 102         self.opened = {}
 103         self.read()
 104
 105     def read(self) :
 106         d = shelve.open(self.filein)
 107         self.history = d.get('history', [])
 108         self.matrix = d.get('matrix', [])
 109         self.ordercorpus = dict([[corpus['uuid'], i] for i, corpus in enumerate(self.history)])
 110         self.corpus = dict([[corpus['uuid'], corpus] for corpus in self.history])
 111         self.analyses = dict([[analyse['uuid'], analyse] for corpus in self.history for analyse in corpus.get('analyses', [])])
 112         self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
 113         self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
 114         d.close()
 115
 116     def write(self) :
 117         d = shelve.open(self.filein)
 118         d['history'] = self.history
 119         d['matrix'] = self.matrix
 120         d.close()
 121
 122     def add(self, analyse) :
 123         log.info('add to history %s' % analyse.get('corpus_name', 'pas un corpus'))
 124         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 125         if tosave['uuid'] in self.corpus :
 126             log.info('problem : this uuid is already in history : %s' % tosave['uuid'])
 127             return
 128         if analyse.get('corpus', False) :
 129             if analyse['uuid'] in self.analyses :
 130                 return
 131             tosave['corpus'] = analyse['corpus']
 132             tosave['name'] = analyse['name']
 133             acorpus_uuid =  analyse['corpus']
 134             if acorpus_uuid in self.corpus :
 135                 if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] :
 136                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave)
 137                 else :
 138                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'] = [tosave]
 139             else :
 140                 self.orph.append(tosave)
 141         else :
 142             tosave['corpus_name'] = analyse['corpus_name']
 143             #self.ordercorpus[tosave['uuid']] = len(history)
 144             #self.corpus[tosave['uuid']] = analyse
 145             self.history.append(tosave)
 146         self.write()
 147         self.read()
 148
 149     def addMatrix(self, analyse) :
 150         tosave = analyse
 151         #tosave['matrix_name'] = analyse['matrix_name']
 152         tosave['analyses'] = []
 153         self.matrix.append(tosave)
 154         self.write()
 155         self.read()
 156
 157     def addMatrixAnalyse(self, analyse) :
 158         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type'], 'matrix' : analyse['matrix']}
 159         tosave['name'] = analyse['name']
 160         if tosave['matrix'] in self.ordermatrix :
 161             self.matrix[self.ordermatrix[tosave['matrix']]]['analyses'].append(tosave)
 162         self.write()
 163         self.read()
 164
 165     def addmultiple(self, analyses) :
 166         log.info('add multiple')
 167         for analyse in analyses :
 168             tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 169             corpus = analyse['corpus']
 170             tosave['corpus'] = corpus
 171             tosave['name'] = analyse['name']
 172             if corpus in self.corpus :
 173                 if 'analyses' in self.history[self.ordercorpus[corpus]] :
 174                     self.history[self.ordercorpus[corpus]]['analyses'].append(tosave)
 175                 else :
 176                     self.history[self.ordercorpus[corpus]]['analyses'] = [tosave]
 177         self.write()
 178         self.read()
 179
 180     def delete(self, analyse, corpus = False) :
 181         log.info('delete %s' % analyse.get('name', 'noname'))
 182         if corpus :
 183             self.history.pop(self.ordercorpus[analyse['uuid']])
 184             if analyse['uuid'] in self.openedcorpus :
 185                 del self.openedcorpus[analyse['uuid']]
 186             log.info('delete corpus : %s' % analyse['uuid'])
 187         elif analyse['uuid'] in self.analyses :
 188             todel = [i for i, ana in enumerate(self.corpus[analyse['corpus']]['analyses']) if ana['uuid'] == analyse['uuid']][0]
 189             self.history[self.ordercorpus[analyse['corpus']]]['analyses'].pop(todel)
 190         elif analyse['uuid'] in self.matrixanalyse :
 191             self.matrix = [mat for mat in self.matrix if mat['uuid'] != analyse['uuid']]
 192         self.write()
 193         self.read()
 194
 195     def addtab(self, analyse) :
 196         self.opened[analyse['uuid']] = analyse
 197
 198     def rmtab(self, analyse) :
 199         del self.opened[analyse['uuid']]
 200
 201     def update(self, analyse) :
 202         if 'matrix_name' in analyse :
 203             self.matrixanalyse[analyse['uuid']].update(analyse)
 204         elif 'corpus_name' in analyse :
 205             self.corpus[analyse['uuid']].update(analyse)
 206         elif 'corpus' in analyse :
 207             self.analyses[analyse['uuid']].update(analyse)
 208         else :
 209             toupdate = [an for an in self.matrixanalyse[analyse['matrix']]['analyses'] if an['uuid'] == analyse['uuid']]
 210             toupdate[0].update(analyse)
 211         self.write()
 212         self.read()
 213
 214     def clean(self) :
 215         corpustodel = [corpus for corpus in self.history if not os.path.exists(corpus['ira'])]
 216         print corpustodel
 217         for corpus in corpustodel :
 218             print 'cleaning :', corpus['corpus_name']
 219             self.delete(corpus, corpus = True)
 220         anatodel = [analyse for corpus in self.history for analyse in corpus.get('analyses', []) if not os.path.exists(analyse.get('ira', '/'))]
 221         for analyse in anatodel :
 222             print 'cleaning :', analyse['name']
 223             self.delete(analyse)
 224
 225     def __str__(self) :
 226         return str(self.history)
 227
 228 class DoConf :
 229     def __init__(self, configfile=None, diff = None, parametres = None) :
 230         self.configfile = configfile
 231         self.conf = ConfigParser()
 232
 233         if configfile is not None :
 234             configfile = normpath_win32(configfile)
 235             self.conf.readfp(codecs.open(configfile, 'r', 'utf8'))
 236         self.parametres = {}
 237         if parametres is not None :
 238             self.doparametres(parametres)
 239
 240     def doparametres(self, parametres) :
 241         return parametres
 242
 243     def getsections(self) :
 244         return self.conf.sections()
 245
 246     def getoptions(self, section = None, diff = None):
 247         parametres = {}
 248         if section is None :
 249             section = self.conf.sections()[0]
 250         for option in self.conf.options(section) :
 251             if self.conf.get(section, option).isdigit() :
 252                 parametres[option] = int(self.conf.get(section, option))
 253             elif self.conf.get(section, option) == 'False' :
 254                 parametres[option] = False
 255             elif self.conf.get(section, option) == 'True' :
 256                 parametres[option] = True
 257             elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') :
 258                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 259             elif self.conf.get(section, option).startswith('[') and self.conf.get(section, option).endswith(']') :
 260                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 261             else :
 262                 parametres[option] = self.conf.get(section, option)
 263         if 'type' not in parametres :
 264             parametres['type'] = section
 265         return parametres
 266
 267     def makeoptions(self, sections, parametres, outfile = None) :
 268         txt = ''
 269         for i, section in enumerate(sections) :
 270             txt += '[%s]\n' % section
 271             if not self.conf.has_section(section) :
 272                 self.conf.add_section(section)
 273             for option in parametres[i] :
 274                 if isinstance(parametres[i][option], int) :
 275                     self.conf.set(section, option, `parametres[i][option]`)
 276                     txt += '%s = %i\n' % (option, parametres[i][option])
 277                 elif isinstance(parametres[i][option], basestring) :
 278                     self.conf.set(section, option, parametres[i][option].encode('utf8'))
 279                     txt += '%s = %s\n' % (option, parametres[i][option])
 280                 elif isinstance(parametres[i][option], wx.Colour) :
 281                     self.conf.set(section, option, str(parametres[i][option]))
 282                     txt += '%s = %s\n' % (option, str(parametres[i][option]))
 283                 elif option == 'analyses' :
 284                     pass
 285                 else :
 286                     self.conf.set(section, option, `parametres[i][option]`)
 287                     txt += '%s = %s\n' % (option, `parametres[i][option]`)
 288         if outfile is None :
 289             outfile = self.configfile
 290         outfile = normpath_win32(outfile)
 291         with open(outfile, 'w') as f :
 292             f.write(txt.encode('utf8'))
 293             #self.conf.write(f)
 294
 295     def totext(self, parametres) :
 296         #txt = ['Corpus']
 297         txt = []
 298         for val in parametres :
 299             if isinstance(parametres[val], int) :
 300                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 301             elif isinstance(parametres[val], basestring) :
 302                 txt.append(' \t\t: '.join([val, parametres[val]]))
 303             elif val in ['listet', 'stars'] :
 304                 pass
 305             else :
 306                 txt.append(' \t\t: '.join([val, `parametres[val]`]))
 307         return '\n'.join(txt)
 308
 309
 310 def write_tab(tab, fileout) :
 311         writer = csv.writer(open(fileout, 'wb'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
 312         writer.writerows(tab)
 313
 314 class BugDialog(wx.Dialog):
 315     def __init__(self, *args, **kwds):
 316         # begin wxGlade: MyDialog.__init__
 317         kwds["style"] = wx.DEFAULT_DIALOG_STYLE | wx.STAY_ON_TOP
 318         kwds["size"] = wx.Size(500, 200)
 319         wx.Dialog.__init__(self, *args, **kwds)
 320         self.SetTitle(kwds['title'])
 321         self.text_ctrl_1 = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE)
 322         self.text_ctrl_1.SetBackgroundColour('#DDE8EB')
 323         self.button_1 = wx.Button(self, wx.ID_OK, "")
 324
 325         self.__set_properties()
 326         self.__do_layout()
 327         # end wxGlade
 328
 329     def __set_properties(self):
 330         # begin wxGlade: MyDialog.__set_properties
 331         self.SetMinSize(wx.Size(500, 200))
 332         self.text_ctrl_1.SetMinSize(wx.Size(500, 200))
 333
 334         # end wxGlade
 335
 336     def __do_layout(self):
 337         # begin wxGlade: MyDialog.__do_layout
 338         sizer_1 = wx.BoxSizer(wx.VERTICAL)
 339         sizer_1.Add(self.text_ctrl_1, 1, wx.EXPAND, 0)
 340         sizer_1.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
 341         self.SetSizer(sizer_1)
 342         sizer_1.Fit(self)
 343         self.Layout()
 344
 345
 346 def CreateIraFile(DictPathOut, clusternb, corpname='corpus_name', section = 'analyse'):
 347     AnalyseConf = ConfigParser()
 348     AnalyseConf.read(DictPathOut['ira'])
 349     AnalyseConf.add_section(section)
 350     date = datetime.datetime.now().ctime()
 351     AnalyseConf.set(section, 'date', str(date))
 352     AnalyseConf.set(section, 'clusternb', clusternb)
 353     AnalyseConf.set(section, 'corpus_name', corpname)
 354
 355     fileout = open(DictPathOut['ira'], 'w')
 356     AnalyseConf.write(fileout)
 357     fileout.close()
 358
 359 def sortedby(list, direct, *indices):
 360
 361     """
 362         sortedby: sort a list of lists (e.g. a table) by one or more indices
 363                   (columns of the table) and return the sorted list
 364
 365         e.g.
 366          for list = [[2,3],[1,2],[3,1]]:
 367          sortedby(list,1) will return [[3, 1], [1, 2], [2, 3]],
 368          sortedby(list,0) will return [[1, 2], [2, 3], [3, 1]]
 369     """
 370
 371     nlist = map(lambda x, indices=indices:
 372                  map(lambda i, x=x: x[i], indices) + [x],
 373                  list)
 374     if direct == 1:
 375         nlist.sort()
 376     elif direct == 2:
 377         nlist.sort(reverse=True)
 378     return map(lambda l: l[-1], nlist)
 379
 380 def add_type(line, dictlem):
 381     if line[4] in dictlem:
 382         line.append(dictlem[line[4]])
 383     else :
 384         line.append('')
 385     return line
 386
 387 def treat_line_alceste(i, line) :
 388     if line[0] == '*' or line[0] == '*****' :
 389         return line + ['']
 390     if line[5] == 'NA':
 391         print 'NA', line[5]
 392         pass
 393     elif float(line[5].replace(',', '.')) < 0.0001:
 394         line[5] = '< 0,0001'
 395     elif float(line[5].replace(',', '.')) > 0.05:
 396         line[5] = 'NS (%s)' % str(float(line[5].replace(',', '.')))[0:7]
 397     else:
 398         line[5] = str(float(line[5].replace(',', '.')))[0:7]
 399     return [i, int(line[0]), int(line[1]), float(line[2]), float(line[3]), line[6], line[4], line[5]]
 400
 401 def ReadProfileAsDico(File, Alceste=False, encoding = sys.getdefaultencoding()):
 402     dictlem = {}
 403     print 'lecture des profiles'
 404     FileReader = codecs.open(File, 'r', encoding)
 405     Filecontent = FileReader.readlines()
 406     FileReader.close()
 407     DictProfile = {}
 408     count = 0
 409     #rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace(',', '.').replace('\r','').split(';') for row in Filecontent]
 410     rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace('\r','').split(';') for row in Filecontent]
 411     rows.pop(0)
 412     ClusterNb = rows[0][2]
 413     rows.pop(0)
 414     clusters = [row[2] for row in rows if row[0] == u'**']
 415     valclusters = [row[1:4] for row in rows if row[0] == u'****']
 416     lp = [i for i, line in enumerate(rows) if line[0] == u'****']
 417     prof = [rows[lp[i] + 1:lp[i+1] - 1] for i in range(0, len(lp)-1)] + [rows[lp[-1] + 1:len(rows)]]
 418     if Alceste :
 419         prof = [[add_type(row, dictlem) for row in pr] for pr in prof]
 420         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 421     else :
 422         prof = [[line + [''] for line in pr] for pr in prof]
 423         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 424     for i, cluster in enumerate(clusters):
 425         DictProfile[cluster] = [valclusters[i]] + prof[i]
 426     return DictProfile
 427
 428 def GetTxtProfile(dictprofile, cluster_size) :
 429     proflist = []
 430     for classe in range(0, len(dictprofile)) :
 431         prof = dictprofile[str(classe + 1)]
 432         clinfo = cluster_size[classe]
 433         proflist.append('\n'.join([' '.join(['classe %i' % (classe + 1), '-', '%s uce sur %s - %s%%' % (clinfo[0], clinfo[1], clinfo[2])]), '\n'.join(['%5s|%5s|%6s|%6s|%8s|%8s|%20s\t%10s' % tuple([str(val) for val in line]) for line in prof if len(line)==8])]))
 434     return '\n\n'.join(proflist)
 435
 436 def formatExceptionInfo(maxTBlevel=5):
 437     cla, exc, trbk = sys.exc_info()
 438     try :
 439         excName = cla.__name__
 440     except :
 441         excName = 'None'
 442     try:
 443         excArgs = exc.args[0]
 444     except :
 445         excArgs = "<no args>"
 446     excTb = traceback.format_tb(trbk, maxTBlevel)
 447     return (excName, excArgs, excTb)
 448
 449
 450 #fonction des etudiants de l'iut
 451 def decoupercharact(chaine, longueur, longueurOptimale, separateurs = None) :
 452     """
 453         on part du dernier caractère, et on recule jusqu'au début de la chaîne.
 454         Si on trouve un '$', c'est fini.
 455         Sinon, on cherche le meilleur candidat. C'est-à-dire le rapport poids/distance le plus important.
 456     """
 457     separateurs = [[u'.', 60.0], [u'?', 60.0], [u'!', 60.0], [u'£$£', 60], [u':', 50.0], [u';', 40.0], [u',', 10.0], [u' ', 0.1]]
 458     trouve = False                 # si on a trouvé un bon séparateur
 459     iDecoupe = 0                # indice du caractere ou il faut decouper
 460
 461     # on découpe la chaine pour avoir au maximum 240 caractères
 462     longueur = min(longueur, len(chaine) - 1)
 463     chaineTravail = chaine[:longueur + 1]
 464     nbCar = longueur
 465     meilleur = ['', 0, 0]        # type, poids et position du meilleur separateur
 466
 467     # on vérifie si on ne trouve pas un '$'
 468     indice = chaineTravail.find(u'$')
 469     if indice > -1:
 470         trouve = True
 471         iDecoupe = indice
 472
 473     # si on ne trouve rien, on cherche le meilleur séparateur
 474     if not trouve:
 475         while nbCar >= 0:
 476             caractere = chaineTravail[nbCar]
 477             distance = abs(longueurOptimale - nbCar) + 1
 478             meilleureDistance = abs(longueurOptimale - meilleur[2]) + 1
 479
 480             # on vérifie si le caractére courant est une marque de ponctuation
 481             for s in separateurs:
 482                 if caractere == s[0]:
 483                     # si c'est une ponctuation
 484
 485                     if s[1] / distance > float(meilleur[1]) / meilleureDistance:
 486                         # print nbCar, s[0]
 487                         meilleur[0] = s[0]
 488                         meilleur[1] = s[1]
 489                         meilleur[2] = nbCar
 490                         trouve = True
 491                         iDecoupe = nbCar
 492
 493                     # et on termine la recherche
 494                     break
 495
 496             # on passe au caractère précédant
 497             nbCar = nbCar - 1
 498
 499     # si on a trouvé
 500     if trouve:
 501         fin = chaine[iDecoupe + 1:]
 502         retour = chaineTravail[:iDecoupe]
 503         return len(retour) > 0, retour.split(), fin
 504     # si on a rien trouvé
 505     return False, chaine.split(), ''
 506
 507
 508 exceptions = {'paragrapheOT' : u"Un problème de formatage (présence d'un marqueur de paragraphe (-*) en dehors d'un texte) est survenu à la ligne ",
 509               'EmptyText' : u"Texte vide (probablement un problème de formatage du corpus). Le problème est apparu à la ligne ",
 510               'CorpusEncoding' : u"Problème d'encodage.",
 511               'TextBeforeTextMark' : u"Problème de formatage : du texte avant le premier marqueur de texte (****). Le problème est survenu à la ligne ",
 512               'MissingAnalyse' : u'Aucun fichier à cet emplacement :\n',
 513 }
 514
 515 def BugReport(parent, error = None):
 516     for ch in parent.GetChildren():
 517         if "<class 'wx._windows.ProgressDialog'>" == str(type(ch)):
 518             ch.Destroy()
 519     excName, exc, excTb = formatExceptionInfo()
 520     if excName == 'Exception' :
 521         print exc
 522         if len(exc.split()) == 2 :
 523             mss, linenb = exc.split()
 524             if mss in exceptions :
 525                 txt = exceptions[mss] + linenb
 526             else :
 527                 txt = exc
 528         else :
 529             if exc in exceptions :
 530                 txt = exceptions[exc]
 531             else :
 532                 txt = exc
 533         title = "Information"
 534     else :
 535         txt = u'            !== BUG ==!       \n'
 536         txt += u'*************************************\n'
 537         txt += '\n'.join(excTb).replace('    ', ' ')
 538         txt += excName + '\n'
 539         txt += `exc`
 540         title = "Bug"
 541
 542     dial = BugDialog(parent, **{'title' : title})
 543     if 'Rerror' in dir(parent) :
 544         txt += parent.Rerror
 545         parent.Rerror = ''
 546     log.info(txt)
 547     dial.text_ctrl_1.write(txt)
 548     dial.CenterOnParent()
 549     dial.ShowModal()
 550     dial.Destroy()
 551
 552 def PlaySound(parent):
 553     if parent.pref.getboolean('iramuteq', 'sound') :
 554         try:
 555             if "gtk2" in wx.PlatformInfo:
 556                 error = Popen(['aplay','-q',os.path.join(parent.AppliPath,'son_fin.wav')])
 557             else :
 558                 sound = wx.Sound(os.path.join(parent.AppliPath, 'son_fin.wav'))
 559                 sound.Play(wx.SOUND_SYNC)
 560         except :
 561             print 'pas de son'
 562
 563 def ReadDicoAsDico(dicopath):
 564     with codecs.open(dicopath, 'r', 'UTF8') as f:
 565         content = f.readlines()
 566     lines = [line.rstrip('\n\r').replace(u'\n', '').replace('"', '').split('\t') for line in content if line != u'']
 567     return dict([[line[0], line[1:]] for line in lines])
 568
 569 def ReadLexique(parent, lang = 'french', filein = None):
 570     if lang != 'other' :
 571         if filein is None :
 572             parent.lexique = ReadDicoAsDico(parent.DictPath.get(lang, 'french'))
 573         else :
 574             parent.lexique = ReadDicoAsDico(filein)
 575     else :
 576         if filein is None :
 577             parent.lexique = {}
 578         else :
 579             parent.lexique = ReadDicoAsDico(filein)
 580
 581 def ReadList(filein, encoding = sys.getdefaultencoding(), sep = ';'):
 582     #file = open(filein)
 583     with codecs.open(filein, 'r', encoding) as f :
 584         content = f.read()
 585     content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
 586     #file = codecs.open(filein, 'r', encoding)
 587     #content = file.readlines()
 588     #file.close()
 589     first = content.pop(0)
 590     #first = first.replace('\n', '').replace('\r','').replace('\"', '').split(sep)
 591     dict = {}
 592     i = 0
 593     for line in content:
 594         #line = line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.')
 595         #line = line.split(';')
 596         nline = [line[0]]
 597         for val in line[1:]:
 598             if val == u'NA' :
 599                 don = ''
 600             else:
 601                 try:
 602                     don = int(val)
 603                 except:
 604                     don = float('%.5f' % float(val))
 605             nline.append(don)
 606         dict[i] = nline
 607         i += 1
 608     return dict, first
 609
 610 def exec_RCMD(rpath, command) :
 611     log.info('R CMD INSTALL %s' % command)
 612     rpath = rpath.replace('\\','\\\\')
 613     error = call(["%s" % rpath, 'CMD', 'INSTALL', "%s" % command])
 614     return error
 615
 616 def exec_rcode(rpath, rcode, wait = True, graph = False):
 617     log.info("R Script : %s" % rcode)
 618     needX11 = False
 619     if sys.platform == 'darwin' :
 620         try :
 621             macversion = platform.mac_ver()[0].split('.')
 622             if int(macversion[1]) < 5 :
 623                 needX11 = True
 624             else :
 625                 needX11 = False
 626         except :
 627             needX11 = False
 628
 629     rpath = rpath.replace('\\','\\\\')
 630     env = os.environ.copy()
 631     if sys.platform == 'darwin' and 'LC_ALL' not in env:
 632         env['LC_ALL'] = 'en_US.UTF-8'
 633     if not graph :
 634         if wait :
 635             if sys.platform == 'win32':
 636                 error = call(["%s" % rpath, "--vanilla","--slave","-f", "%s" % rcode])
 637             else :
 638                 error = call([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], env = env)
 639             return error
 640         else :
 641             if sys.platform == 'win32':
 642                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 643             else :
 644                 pid = Popen([rpath, '--slave', "--vanilla", "-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 645             return pid
 646     else :
 647         if wait :
 648             if sys.platform == 'win32':
 649                 error = call(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 650             elif sys.platform == 'darwin' and needX11:
 651                 os.environ['DISPLAY'] = ':0.0'
 652                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 653             else :
 654                 error = call([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], env = env)
 655             return error
 656         else :
 657             if sys.platform == 'win32':
 658                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 659             elif sys.platform == 'darwin' and needX11:
 660                 os.environ['DISPLAY'] = ':0.0'
 661                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 662             else :
 663                 pid = Popen([rpath, '--vanilla','--slave',"-f %s" % rcode, "--encoding=UTF-8"], stderr = PIPE, env = env)
 664             return pid
 665
 666 def check_Rresult(parent, pid) :
 667     if isinstance(pid, Popen) :
 668         if pid.returncode != 0 :
 669             error = pid.communicate()
 670             error = [str(error[0]), error[1]]
 671             if error[1] is None :
 672                 error[1] = 'None'
 673             parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)])
 674             try :
 675                 raise Exception('\n'.join([u'Erreur R', '\n'.join(error[1:])]))
 676             except :
 677                 BugReport(parent)
 678             return False
 679         else :
 680             return True
 681     else :
 682         if pid != 0 :
 683             try :
 684                 raise Exception(u'Erreur R')
 685             except :
 686                 BugReport(parent)
 687             return False
 688         else :
 689             return True
 690
 691 def print_liste(filename,liste):
 692     with open(filename,'w') as f :
 693         for graph in liste :
 694             f.write(';'.join(graph)+'\n')
 695
 696 def read_list_file(filename, encoding = sys.getdefaultencoding()):
 697     with codecs.open(filename,'rU', encoding) as f :
 698         content=f.readlines()
 699         ncontent=[line.replace('\n','').split(';') for line in content if line.strip() != '']
 700     return ncontent
 701
 702
 703
 704
 705 def progressbar(self, maxi) :
 706     ira = wx.GetApp().GetTopWindow()
 707     parent = ira
 708     try :
 709         maxi = int(maxi)
 710     except :
 711         maxi = 1
 712     prog = wx.ProgressDialog("Traitements",
 713                              "Veuillez patienter...",
 714                              maximum=maxi,
 715                              parent=parent,
 716                              style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT
 717                              )
 718     prog.SetSize((400,150))
 719     prog.SetIcon(ira._icon)
 720     return prog
 721
 722 def treat_var_mod(variables) :
 723     var_mod = {}
 724     variables = list(set(variables))
 725     varmod = [variable.split('_') for variable in variables]
 726     vars = list(set([var[0] for var in varmod if len(var) >=2]))
 727     for var in vars :
 728         mods = ['_'.join(v) for v in varmod if v[0] == var]
 729         var_mod[var] = mods
 730
 731 #     for variable in variables :
 732 #         if u'_' in variable :
 733 #             forme = variable.split(u'_')
 734 #             var = forme[0]
 735 #             mod = forme[1]
 736 #             if not var in var_mod :
 737 #                 var_mod[var] = [variable]
 738 #             else :
 739 #                 if not mod in var_mod[var] :
 740 #                     var_mod[var].append(variable)
 741     return var_mod
 742
 743 def doconcorde(corpus, uces, mots, uci = False) :
 744     if not uci :
 745         ucestxt1 = [row for row in corpus.getconcorde(uces)]
 746     else :
 747         ucestxt1 = [row for row in corpus.getuciconcorde(uces)]
 748     ucestxt1 = dict(ucestxt1)
 749     ucestxt = []
 750     ucis_txt = []
 751     listmot = [corpus.getlems()[lem].formes for lem in mots]
 752     listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
 753     mothtml = ['<font color=red><b>%s</b></font>' % mot for mot in listmot]
 754     dmots = dict(zip(listmot, mothtml))
 755     for uce in uces :
 756         ucetxt = ucestxt1[uce].split()
 757         ucetxt = ' '.join([dmots.get(mot, mot) for mot in ucetxt])
 758         if not uci :
 759             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[corpus.getucefromid(uce).uci].etoiles) + '</b></p>')
 760         else :
 761             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[uce].etoiles) + '</b></p>')
 762         ucestxt.append(ucetxt)
 763     return ucis_txt, ucestxt
 764
 765
 766 def getallstcarac(corpus, analyse) :
 767    pathout = PathOut(analyse['ira'])
 768    profils =  ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, self.encoding)
 769    print profils