www.iramuteq.org Git - iramuteq/blob - functions.py

   1 # -*- coding: utf-8 -*-
   2 #Author: Pierre Ratinaud
   3 #Copyright (c) 2008-2020 Pierre Ratinaud
   4 #modification pour python 3 : Laurent Mérat, 6x7 - mai 2020
   5 #License: GNU/GPL
   6
   7 #------------------------------------
   8 # import des modules python
   9 #------------------------------------
  10 import re
  11 from subprocess import Popen, call, PIPE
  12 import _thread
  13 import os
  14 import ast
  15 import sys
  16 import csv
  17 import platform
  18 import traceback
  19 import codecs
  20 import locale
  21 import datetime
  22 from copy import copy
  23 from shutil import copyfile
  24 import shelve
  25 import json
  26 #from dialog import BugDialog
  27 import logging
  28 from operator import itemgetter
  29
  30 #------------------------------------
  31 # import des modules wx
  32 #------------------------------------
  33 import wx
  34 import wx.adv
  35
  36 #------------------------------------
  37 # import des fichiers du projet
  38 #------------------------------------
  39 from configparser import ConfigParser
  40
  41
  42 log = logging.getLogger('iramuteq')
  43
  44
  45 indices_simi = ['cooccurrence' ,'pourcentage de cooccurrence','Russel','Jaccard', 'Kulczynski1', 'Kulczynski2', 'Mountford', 'Fager', 'simple matching', 'Hamman', 'Faith', 'Tanimoto', 'Dice', 'Phi', 'Stiles', 'Michael', 'Mozley', 'Yule', 'Yule2', 'Ochiai', 'Simpson', 'Braun-Blanquet','Chi-squared', 'Phi-squared', 'Tschuprow', 'Cramer', 'Pearson', 'binomial']
  46
  47 def open_folder(folder):
  48     if sys.platform == "win32":
  49         os.startfile(folder)
  50     else:
  51         opener ="open" if sys.platform == "darwin" else "xdg-open"
  52         #call([opener, folder])
  53         call(["%s %s &" % (opener, folder)], shell=True)
  54
  55 def normpath_win32(path) :
  56     if not sys.platform == 'win32' :
  57         return path
  58     while '\\\\' in path :
  59         path = path.replace('\\\\', '\\')
  60     if path.startswith('\\') and not path.startswith('\\\\') :
  61         path = '\\' + path
  62     return path
  63
  64 class TGen :
  65     def __init__(self, path = None, encoding = 'utf8'):
  66         self.path = path
  67         self.tgen = {}
  68         self.encoding = encoding
  69
  70     def __getitem__(self, key):
  71         return self.tgen[key]
  72
  73     def read(self, path = None):
  74         if path is None :
  75             path = self.path
  76         with codecs.open(path, 'r', self.encoding) as f :
  77             tgen = f.read()
  78         tgen = [line.split('\t') for line in tgen.splitlines()]
  79         tgen = dict([[line[0], line[1:]] for line in tgen])
  80         self.tgen = tgen
  81         self.path = path
  82
  83     def write(self, path = None):
  84         if path is None :
  85             path = self.path
  86         with open(path, 'w') as f :
  87             f.write('\n'.join(['\t'.join([val] + self.tgen[val]) for val in self.tgen]))
  88
  89     def writetable(self, pathout, tgens, totocc):
  90         etoiles = list(totocc.keys())
  91         etoiles.sort()
  92         with open(pathout, 'w') as f :
  93             line = '\t'.join(['tgens'] + etoiles) + '\n'
  94             f.write(line)
  95             for t in tgens :
  96                 line = '\t'.join([t] + [repr(tgens[t][et]) for et in etoiles]) + '\n'
  97                 f.write(line)
  98             i = 0
  99             totname = 'total'
 100             while totname + repr(i) in tgens :
 101                 i += 1
 102             totname = totname + repr(i)
 103             line = '\t'.join([totname] + [repr(totocc[et]) for et in etoiles]) + '\n'
 104             f.write(line)
 105
 106 class History :
 107     def __init__(self, filein, syscoding = 'utf8') :
 108         self.filein = filein
 109         self.syscoding = syscoding
 110         self.corpus = {}
 111         self.openedcorpus = {}
 112         self.openedmatrix = {}
 113         self.orph = []
 114         self.analyses = {}
 115         self.history = []
 116         self.opened = {}
 117         self.read()
 118
 119     def read(self) :
 120         with open(self.filein, 'r') as fjson :
 121             d = json.load(fjson)
 122 #        d = shelve.open(self.filein, protocol=1)
 123         self.history = d.get('history', [])
 124         self.matrix = d.get('matrix', [])
 125         self.ordercorpus = dict([[corpus['uuid'], i] for i, corpus in enumerate(self.history)])
 126         self.corpus = dict([[corpus['uuid'], corpus] for corpus in self.history])
 127         self.analyses = dict([[analyse['uuid'], analyse] for corpus in self.history for analyse in corpus.get('analyses', [])])
 128         self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
 129         self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
 130 #        d.close()
 131
 132     def write(self) :
 133         d = {}
 134         d['history'] = self.history
 135         d['matrix'] = self.matrix
 136         with open(self.filein, 'w') as f :
 137             f.write(json.dumps(d, indent=4, default=str))
 138        #d = shelve.open(self.filein, protocol=1)
 139        #d.close()
 140
 141     def add(self, analyse) :
 142         log.info('add to history %s' % analyse.get('corpus_name', 'pas un corpus'))
 143         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 144         if tosave['uuid'] in self.corpus :
 145             log.info('problem : this uuid is already in history : %s' % tosave['uuid'])
 146             return
 147         if analyse.get('corpus', False) :
 148             if analyse['uuid'] in self.analyses :
 149                 return
 150             tosave['corpus'] = analyse['corpus']
 151             tosave['name'] = analyse['name']
 152             acorpus_uuid =  analyse['corpus']
 153             if acorpus_uuid in self.corpus :
 154                 if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] :
 155                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave)
 156                 else :
 157                     self.history[self.ordercorpus[acorpus_uuid]]['analyses'] = [tosave]
 158             else :
 159                 self.orph.append(tosave)
 160         else :
 161             tosave['corpus_name'] = analyse['corpus_name']
 162             #self.ordercorpus[tosave['uuid']] = len(history)
 163             #self.corpus[tosave['uuid']] = analyse
 164             self.history.append(tosave)
 165         self.write()
 166         self.read()
 167
 168     def addMatrix(self, analyse) :
 169         tosave = analyse
 170         #tosave['matrix_name'] = analyse['matrix_name']
 171         tosave['analyses'] = []
 172         self.matrix.append(tosave)
 173         self.write()
 174         self.read()
 175
 176     def addMatrixAnalyse(self, analyse) :
 177         tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type'], 'matrix' : analyse['matrix']}
 178         tosave['name'] = analyse['name']
 179         if tosave['matrix'] in self.ordermatrix :
 180             self.matrix[self.ordermatrix[tosave['matrix']]]['analyses'].append(tosave)
 181         self.write()
 182         self.read()
 183
 184     def addmultiple(self, analyses) :
 185         log.info('add multiple')
 186         for analyse in analyses :
 187             tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
 188             corpus = analyse['corpus']
 189             tosave['corpus'] = corpus
 190             tosave['name'] = analyse['name']
 191             if corpus in self.corpus :
 192                 if 'analyses' in self.history[self.ordercorpus[corpus]] :
 193                     self.history[self.ordercorpus[corpus]]['analyses'].append(tosave)
 194                 else :
 195                     self.history[self.ordercorpus[corpus]]['analyses'] = [tosave]
 196         self.write()
 197         self.read()
 198
 199     def delete(self, analyse, corpus = False) :
 200         log.info('delete %s' % analyse.get('name', 'noname'))
 201         if corpus :
 202             self.history.pop(self.ordercorpus[analyse['uuid']])
 203             if analyse['uuid'] in self.openedcorpus :
 204                 del self.openedcorpus[analyse['uuid']]
 205             log.info('delete corpus : %s' % analyse['uuid'])
 206         elif analyse['uuid'] in self.analyses :
 207             todel = [i for i, ana in enumerate(self.corpus[analyse['corpus']]['analyses']) if ana['uuid'] == analyse['uuid']][0]
 208             self.history[self.ordercorpus[analyse['corpus']]]['analyses'].pop(todel)
 209         elif analyse['uuid'] in self.matrixanalyse :
 210             self.matrix = [mat for mat in self.matrix if mat['uuid'] != analyse['uuid']]
 211         elif analyse.get('matrix', False) in self.matrixanalyse :
 212             analyses = self.matrix[self.ordermatrix[analyse['matrix']]]['analyses']
 213             topop = [i for i, val in enumerate(analyses) if analyse['uuid'] == val['uuid']][0]
 214             analyses.pop(topop)
 215             self.matrix[self.ordermatrix[analyse['matrix']]]['analyses'] = analyses
 216         self.write()
 217         self.read()
 218
 219     def addtab(self, analyse) :
 220         self.opened[analyse['uuid']] = analyse
 221
 222     def rmtab(self, analyse) :
 223         del self.opened[analyse['uuid']]
 224
 225     def update(self, analyse) :
 226         if 'matrix_name' in analyse :
 227             self.matrixanalyse[analyse['uuid']].update(analyse)
 228         elif 'corpus_name' in analyse :
 229             self.corpus[analyse['uuid']].update(analyse)
 230         elif 'corpus' in analyse :
 231             self.analyses[analyse['uuid']].update(analyse)
 232         else :
 233             toupdate = [an for an in self.matrixanalyse[analyse['matrix']]['analyses'] if an['uuid'] == analyse['uuid']]
 234             toupdate[0].update(analyse)
 235         self.write()
 236         self.read()
 237
 238     def clean(self) :
 239         corpustodel = [corpus for corpus in self.history if not os.path.exists(corpus['ira'])]
 240         print(corpustodel)
 241         for corpus in corpustodel :
 242             print('cleaning :', corpus['corpus_name'])
 243             self.delete(corpus, corpus = True)
 244         anatodel = [analyse for corpus in self.history for analyse in corpus.get('analyses', []) if not os.path.exists(analyse.get('ira', '/'))]
 245         for analyse in anatodel :
 246             print('cleaning :', analyse['name'])
 247             self.delete(analyse)
 248
 249     def dostat(self):
 250         todel = {}
 251         tokens = 0
 252         corpusnb = {}
 253         subnb = 0
 254         analysenb = 0
 255         hours = 0
 256         minutes = 0
 257         secondes = 0
 258         ha = 0
 259         ma = 0
 260         sa = 0
 261         for corpus in self.history :
 262             analysenb += len(corpus.get('analyses', []))
 263             analyses = corpus.get('analyses', [])
 264             for analyse in analyses :
 265                 if os.path.exists(analyse['ira']) :
 266                     ana = DoConf(analyse['ira']).getoptions()
 267                     if 'time' in ana :
 268                         time = ana['time'].split()
 269                         ha += int(time[0].replace('h','')) * 3600
 270                         ma += int(time[1].replace('m','')) * 60
 271                         sa += int(time[2].replace('s',''))
 272             if os.path.exists(corpus['ira']) :
 273                 param = DoConf(corpus['ira']).getoptions()
 274                 time = param.get('time','0h 0m 0s')
 275                 time = time.split()
 276                 hours += int(time[0].replace('h','')) * 3600
 277                 minutes += int(time[1].replace('m','')) * 60
 278                 secondes += int(time[2].replace('s',''))
 279                 if param.get('originalpath', False) :
 280                     if param['originalpath'] in corpusnb :
 281                         corpusnb[param['originalpath']] += 1
 282                         tokens += int(param['occurrences'])
 283                     else :
 284                         corpusnb[param['originalpath']] = 1
 285                     #print param
 286                 else :
 287                     subnb += 1
 288             else :
 289                 if corpus['ira'] in todel :
 290                     todel['ira'] += 1
 291                 else :
 292                     todel['ira'] = 1
 293         print('Nbr total de corpus : %s' % len(self.history))
 294         corpus_nb = len(corpusnb) + len(todel)
 295         print('Nbr de corpus différents : %s' % corpus_nb)
 296         lentodel = len(todel)
 297         print('Nbr de corpus à supprimer : %s' % lentodel)
 298         print('Nbr de sous corpus : %s' % subnb)
 299         print("Nbr total d'occurrences : %s" % tokens)
 300         print('Moyenne occurrences par corpus : %f' % (tokens/corpus_nb))
 301         print('---------------------')
 302         print("Nbr total d'analyses : %s" % analysenb)
 303         print('Temps total indexation : %f h' % ((hours+minutes+secondes) / 3600))
 304         print('Temps total analyses :  %f h' % ((ha+ma+sa) / 3600))
 305
 306     def __str__(self) :
 307         return str(self.history)
 308
 309 class DoConf :
 310     def __init__(self, configfile=None, diff = None, parametres = None) :
 311         self.configfile = configfile
 312         self.conf = ConfigParser(interpolation=None) # pourquoi ce paramètre ???
 313
 314         if configfile is not None :
 315             configfile = normpath_win32(configfile)
 316             self.conf.read_file(codecs.open(configfile, 'r', 'utf8'))
 317         self.parametres = {}
 318         if parametres is not None :
 319             self.doparametres(parametres)
 320
 321     def doparametres(self, parametres) :
 322         return parametres
 323
 324     def getsections(self) :
 325         return self.conf.sections()
 326
 327     def getoptions(self, section = None, diff = None):
 328         parametres = {}
 329         if section is None :
 330             section = self.conf.sections()[0]
 331         for option in self.conf.options(section) :
 332             if self.conf.get(section, option).isdigit() :
 333                 parametres[option] = int(self.conf.get(section, option))
 334             elif self.conf.get(section, option) == 'False' :
 335                 parametres[option] = False
 336             elif self.conf.get(section, option) == 'True' :
 337                 parametres[option] = True
 338             elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') :
 339                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 340             elif self.conf.get(section, option).startswith('[') and self.conf.get(section, option).endswith(']') :
 341                 parametres[option] = ast.literal_eval(self.conf.get(section, option))
 342             else :
 343                 parametres[option] = self.conf.get(section, option)
 344         if 'type' not in parametres :
 345             parametres['type'] = section
 346         return parametres
 347
 348     def makeoptions(self, sections, parametres, outfile = None) :
 349         txt = ''
 350         for i, section in enumerate(sections) :
 351             txt += '[%s]\n' % section
 352             if not self.conf.has_section(section) :
 353                 self.conf.add_section(section)
 354             for option in parametres[i] :
 355                 if isinstance(parametres[i][option], int) :
 356                     self.conf.set(section, option, repr(parametres[i][option]))
 357                     txt += '%s = %i\n' % (option, parametres[i][option])
 358                 elif isinstance(parametres[i][option], str) :
 359                     self.conf.set(section, option, parametres[i][option])
 360                     txt += '%s = %s\n' % (option, parametres[i][option])
 361                 elif isinstance(parametres[i][option], wx.Colour) :
 362                     self.conf.set(section, option, str(parametres[i][option]))
 363                     txt += '%s = %s\n' % (option, str(parametres[i][option]))
 364                 elif option == 'analyses' :
 365                     pass
 366                 else :
 367                     self.conf.set(section, option, repr(parametres[i][option]))
 368                     txt += '%s = %s\n' % (option, repr(parametres[i][option]))
 369         if outfile is None :
 370             outfile = self.configfile
 371         outfile = normpath_win32(outfile)
 372         with open(outfile, 'w') as f :
 373             f.write(txt)
 374             #self.conf.write(f)
 375
 376     def totext(self, parametres) :
 377         #txt = ['Corpus']
 378         txt = []
 379         for val in parametres :
 380             if isinstance(parametres[val], int) :
 381                 txt.append(' \t\t: '.join([val, repr(parametres[val])]))
 382             elif isinstance(parametres[val], str) :
 383                 txt.append(' \t\t: '.join([val, parametres[val]]))
 384             elif val in ['listet', 'stars'] :
 385                 pass
 386             else :
 387                 txt.append(' \t\t: '.join([val, repr(parametres[val])]))
 388         return '\n'.join(txt)
 389
 390
 391 def write_tab(tab, fileout) :
 392         csvWriter = csv.writer(open(fileout, 'w'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
 393         csvWriter.writerows(tab)
 394
 395 class BugDialog(wx.Dialog):
 396     def __init__(self, *args, **kwds):
 397         # begin wxGlade: MyDialog.__init__
 398         kwds["style"] = wx.DEFAULT_DIALOG_STYLE | wx.STAY_ON_TOP
 399         kwds["size"] = wx.Size(500, 200)
 400         wx.Dialog.__init__(self, *args, **kwds)
 401         self.SetTitle(kwds['title'])
 402         self.text_ctrl_1 = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE)
 403         self.text_ctrl_1.SetBackgroundColour('#DDE8EB')
 404         self.button_1 = wx.Button(self, wx.ID_OK, "")
 405
 406         self.__set_properties()
 407         self.__do_layout()
 408         # end wxGlade
 409
 410     def __set_properties(self):
 411         # begin wxGlade: MyDialog.__set_properties
 412         self.SetMinSize(wx.Size(500, 200))
 413         self.text_ctrl_1.SetMinSize(wx.Size(500, 200))
 414
 415         # end wxGlade
 416
 417     def __do_layout(self):
 418         # begin wxGlade: MyDialog.__do_layout
 419         sizer_1 = wx.BoxSizer(wx.VERTICAL)
 420         sizer_1.Add(self.text_ctrl_1, 1, wx.EXPAND, 0)
 421         sizer_1.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
 422         self.SetSizer(sizer_1)
 423         sizer_1.Fit(self)
 424         self.Layout()
 425
 426
 427 def CreateIraFile(DictPathOut, clusternb, corpname='corpus_name', section = 'analyse'):
 428     AnalyseConf = ConfigParser()
 429     AnalyseConf.read(DictPathOut['ira'])
 430     AnalyseConf.add_section(section)
 431     date = datetime.datetime.now().ctime()
 432     AnalyseConf.set(section, 'date', str(date))
 433     AnalyseConf.set(section, 'clusternb', clusternb)
 434     AnalyseConf.set(section, 'corpus_name', corpname)
 435
 436     fileout = open(DictPathOut['ira'], 'w')
 437     AnalyseConf.write(fileout)
 438     fileout.close()
 439
 440 def multisort(liste2d, ordre, indices_tri):
 441
 442     """
 443     methode destinée à remplacer 'comp' qui a disparu en Python 3
 444         tri de tuples sur l'un des éléments du tuple
 445         en principe, elle doit renvoyer les éléments triés selon le principe d'avant
 446         tel que décrit dans la docstring de 'sortedby'
 447
 448         probablement à améliorer pour la rendre d'usage plus général
 449         en acceptant un nombre variable de parametres ???
 450     """
 451
 452     indices_triTuple = indices_tri.Tuple(int, ...)
 453     for key in reversed(indices_tri):
 454         liste2d.sort(key=attrgetter(key), reverse=ordre)
 455     return liste2d
 456
 457 def sortedby(liste2d, direct, *indices):
 458
 459     """
 460         sortedby: sort a list of lists (e.g. a table) by one or more indices
 461                   (columns of the table) and return the sorted list
 462
 463         e.g.
 464          for list = [[2,3],[1,2],[3,1]]:
 465          sortedby(list,1) will return [[3, 1], [1, 2], [2, 3]],
 466          sortedby(list,0) will return [[1, 2], [2, 3], [3, 1]]
 467
 468          elle n'est pas remplacée par la méthode 'multisort' ???
 469
 470     """
 471
 472 # iramuteq original
 473 #    nlist = map(lambda x, indices=indices:
 474 #                 map(lambda i, x=x: x[i], indices) + [x],
 475 #                 list)
 476
 477 # iramuteq passé à 2to3
 478 #    nlist = list(map(lambda x, indices=indices:
 479 #                 list(map(lambda i, x=x: x[i], indices)) + [x],
 480 #                 liste2d))
 481
 482     for key in reversed(indices):
 483         liste2d.sort(key=itemgetter(key), reverse=(direct==2))
 484     return liste2d
 485
 486
 487 #    if direct == 1:
 488 #        nlist.sort()
 489 #         sorted_list = multisort(liste2d, direct, *indices)
 490
 491 #    elif direct == 2:
 492 #        nlist.sort(reverse=True)
 493 #         sorted_list = multisort(liste2d, direct, *indices)
 494
 495 #    return [l[-1] for l in nlist]
 496 #    return sorted_list
 497
 498 def add_type(line, dictlem):
 499     if line[4] in dictlem:
 500         line.append(dictlem[line[4]])
 501     else :
 502         line.append('')
 503     return line
 504
 505 def treat_line_alceste(i, line) :
 506     if line[0] == '*' or line[0] == '*****' :
 507         return line + ['']
 508     if line[5] == 'NA':
 509         print('NA', line[5])
 510         pass
 511     elif float(line[5].replace(',', '.')) < 0.0001:
 512         line[5] = '< 0,0001'
 513     elif float(line[5].replace(',', '.')) > 0.05:
 514         line[5] = 'NS (%s)' % str(float(line[5].replace(',', '.')))[0:7]
 515     else:
 516         line[5] = str(float(line[5].replace(',', '.')))[0:7]
 517     return [i, int(line[0]), int(line[1]), float(line[2]), float(line[3]), line[6], line[4], line[5]]
 518
 519 def ReadProfileAsDico(File, Alceste=False, encoding = sys.getdefaultencoding()):
 520     dictlem = {}
 521     print('lecture des profiles')
 522     FileReader = codecs.open(File, 'r', encoding)
 523     Filecontent = FileReader.readlines()
 524     FileReader.close()
 525     DictProfile = {}
 526     count = 0
 527     #rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace(',', '.').replace('\r','').split(';') for row in Filecontent]
 528     rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace('\r','').split(';') for row in Filecontent]
 529     rows.pop(0)
 530     ClusterNb = rows[0][2]
 531     rows.pop(0)
 532     clusters = [row[2] for row in rows if row[0] == '**']
 533     valclusters = [row[1:4] for row in rows if row[0] == '****']
 534     lp = [i for i, line in enumerate(rows) if line[0] == '****']
 535     prof = [rows[lp[i] + 1:lp[i+1] - 1] for i in range(0, len(lp)-1)] + [rows[lp[-1] + 1:len(rows)]]
 536     if Alceste :
 537         prof = [[add_type(row, dictlem) for row in pr] for pr in prof]
 538         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 539     else :
 540         prof = [[line + [''] for line in pr] for pr in prof]
 541         prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
 542     for i, cluster in enumerate(clusters):
 543         DictProfile[cluster] = [valclusters[i]] + prof[i]
 544     return DictProfile
 545
 546 def GetTxtProfile(dictprofile, cluster_size) :
 547     proflist = []
 548     for classe in range(0, len(dictprofile)) :
 549         prof = dictprofile[str(classe + 1)]
 550         clinfo = cluster_size[classe]
 551         proflist.append('\n'.join([' '.join(['classe %i' % (classe + 1), '-', '%s uce sur %s - %s%%' % (clinfo[0], clinfo[1], clinfo[2])]), '\n'.join(['%5s|%5s|%6s|%6s|%8s|%8s|%20s\t%10s' % tuple([str(val) for val in line]) for line in prof if len(line)==8])]))
 552     return '\n\n'.join(proflist)
 553
 554 def formatExceptionInfo(maxTBlevel=5):
 555     cla, exc, trbk = sys.exc_info()
 556     try :
 557         excName = cla.__name__
 558     except :
 559         excName = 'None'
 560     try:
 561         excArgs = exc.args[0]
 562     except :
 563         excArgs = "<no args>"
 564     excTb = traceback.format_tb(trbk, maxTBlevel)
 565     return (excName, excArgs, excTb)
 566
 567
 568 #fonction des etudiants de l'iut
 569 def decoupercharact(chaine, longueur, longueurOptimale, separateurs = None) :
 570     """
 571         on part du dernier caractère, et on recule jusqu'au début de la chaîne.
 572         Si on trouve un '$', c'est fini.
 573         Sinon, on cherche le meilleur candidat. C'est-à-dire le rapport poids/distance le plus important.
 574     """
 575     separateurs = [['.', 60.0], ['?', 60.0], ['!', 60.0], ['£$£', 60], [':', 50.0], [';', 40.0], [',', 10.0], [' ', 0.1]]
 576     trouve = False                 # si on a trouvé un bon séparateur
 577     iDecoupe = 0                # indice du caractere ou il faut decouper
 578
 579     # on découpe la chaine pour avoir au maximum 240 caractères
 580     longueur = min(longueur, len(chaine) - 1)
 581     chaineTravail = chaine[:longueur + 1]
 582     nbCar = longueur
 583     meilleur = ['', 0, 0]        # type, poids et position du meilleur separateur
 584
 585     # on vérifie si on ne trouve pas un '$'
 586     indice = chaineTravail.find('$')
 587     if indice > -1:
 588         trouve = True
 589         iDecoupe = indice
 590
 591     # si on ne trouve rien, on cherche le meilleur séparateur
 592     if not trouve:
 593         while nbCar >= 0:
 594             caractere = chaineTravail[nbCar]
 595             distance = abs(longueurOptimale - nbCar) + 1
 596             meilleureDistance = abs(longueurOptimale - meilleur[2]) + 1
 597
 598             # on vérifie si le caractére courant est une marque de ponctuation
 599             for s in separateurs:
 600                 if caractere == s[0]:
 601                     # si c'est une ponctuation
 602
 603                     if s[1] / distance > float(meilleur[1]) / meilleureDistance:
 604                         # print nbCar, s[0]
 605                         meilleur[0] = s[0]
 606                         meilleur[1] = s[1]
 607                         meilleur[2] = nbCar
 608                         trouve = True
 609                         iDecoupe = nbCar
 610
 611                     # et on termine la recherche
 612                     break
 613
 614             # on passe au caractère précédant
 615             nbCar = nbCar - 1
 616
 617     # si on a trouvé
 618     if trouve:
 619         fin = chaine[iDecoupe + 1:]
 620         retour = chaineTravail[:iDecoupe]
 621         return len(retour) > 0, retour.split(), fin
 622     # si on a rien trouvé
 623     return False, chaine.split(), ''
 624
 625
 626 exceptions = {'paragrapheOT' : "Un problème de formatage (présence d'un marqueur de paragraphe (-*) en dehors d'un texte) est survenu à la ligne ",
 627               'EmptyText' : "Texte vide (probablement un problème de formatage du corpus). Le problème est apparu à la ligne ",
 628               'CorpusEncoding' : "Problème d'encodage.",
 629               'TextBeforeTextMark' : "Problème de formatage : du texte avant le premier marqueur de texte (****). Le problème est survenu à la ligne ",
 630               'MissingAnalyse' : 'Aucun fichier à cet emplacement :\n',
 631 }
 632
 633 def BugReport(parent, error = None):
 634     for ch in parent.GetChildren():
 635         if "<class 'wx._windows.ProgressDialog'>" == str(type(ch)):
 636             ch.Destroy()
 637     excName, exc, excTb = formatExceptionInfo()
 638     if excName == 'Exception' :
 639         print(exc)
 640         if len(exc.split()) == 2 :
 641             mss, linenb = exc.split()
 642             if mss in exceptions :
 643                 txt = exceptions[mss] + linenb
 644             else :
 645                 txt = exc
 646         else :
 647             if exc in exceptions :
 648                 txt = exceptions[exc]
 649             else :
 650                 txt = exc
 651         title = "Information"
 652     else :
 653         txt = '\n            !== BUG ==!       \n'
 654         txt += '*************************************\n'
 655         txt += '\n'.join(excTb).replace('    ', ' ')
 656         txt += excName + '\n'
 657         txt += repr(exc)
 658         title = "Bug"
 659
 660     dial = BugDialog(parent, **{'title' : title})
 661     if 'Rerror' in dir(parent) :
 662         txt += parent.Rerror
 663         parent.Rerror = ''
 664     log.info(txt)
 665     dial.text_ctrl_1.write(txt)
 666     dial.CenterOnParent()
 667     dial.ShowModal()
 668     dial.Destroy()
 669
 670 def PlaySound(parent):
 671     if parent.pref.getboolean('iramuteq', 'sound') :
 672         try:
 673             if "gtk2" in wx.PlatformInfo:
 674                 error = Popen(['aplay','-q',os.path.join(parent.AppliPath,'son_fin.wav')])
 675             else :
 676                 sound = wx.adv.Sound(os.path.join(parent.AppliPath, 'son_fin.wav'))
 677                 sound.Play(wx.adv.SOUND_SYNC)
 678         except :
 679             print('pas de son')
 680
 681 def ReadDicoAsDico(dicopath):
 682     with codecs.open(dicopath, 'r', 'UTF8') as f:
 683         content = f.readlines()
 684     lines = [line.rstrip('\n\r').replace('\n', '').replace('"', '').split('\t') for line in content if line != '']
 685     return dict([[line[0], line[1:]] for line in lines])
 686
 687 def ReadLexique(parent, lang = 'french', filein = None):
 688     if lang != 'other' :
 689         if filein is None :
 690             parent.lexique = ReadDicoAsDico(parent.DictPath.get(lang, 'french'))
 691         else :
 692             parent.lexique = ReadDicoAsDico(filein)
 693     else :
 694         if filein is None :
 695             parent.lexique = {}
 696         else :
 697             parent.lexique = ReadDicoAsDico(filein)
 698
 699 def ReadList(filein, encoding = sys.getdefaultencoding(), sep = ';'):
 700     #file = open(filein)
 701     with codecs.open(filein, 'r', encoding) as f :
 702         content = f.read()
 703     content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
 704     #file = codecs.open(filein, 'r', encoding)
 705     #content = file.readlines()
 706     #file.close()
 707     first = content.pop(0)
 708     #first = first.replace('\n', '').replace('\r','').replace('\"', '').split(sep)
 709     dict = {}
 710     i = 0
 711     for line in content:
 712         #line = line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.')
 713         #line = line.split(';')
 714         nline = [line[0]]
 715         for val in line[1:]:
 716             if val == 'NA' :
 717                 don = ''
 718             else:
 719                 try:
 720                     don = int(val)
 721                 except:
 722                     don = float('%.5f' % float(val))
 723             nline.append(don)
 724         dict[i] = nline
 725         i += 1
 726     return dict, first
 727
 728 def exec_RCMD(rpath, command) :
 729     log.info('R CMD INSTALL %s' % command)
 730     rpath = rpath.replace('\\','\\\\')
 731     error = call(["%s" % rpath, 'CMD', 'INSTALL', "%s" % command])
 732     return error
 733
 734 def exec_rcode(rpath, rcode, wait = True, graph = False):
 735     log.info("R Script : %s" % rcode)
 736     needX11 = False
 737     if sys.platform == 'darwin' :
 738         try :
 739             macversion = platform.mac_ver()[0].split('.')
 740             if int(macversion[1]) < 5 :
 741                 needX11 = True
 742             else :
 743                 needX11 = False
 744         except :
 745             needX11 = False
 746     rpath = rpath.replace('\\','\\\\')
 747     env = os.environ.copy()
 748     if sys.platform == 'darwin' and 'LC_ALL' not in env:
 749         env['LC_ALL'] = 'en_US.UTF-8'
 750     if not graph :
 751         if wait :
 752             if sys.platform == 'win32':
 753                 error = call(["%s" % rpath, "--vanilla","--slave","-f", "%s" % rcode])
 754             else :
 755                 error = call([rpath, '--slave', "--vanilla", "--encoding=UTF-8", "-f %s" % rcode], env = env)
 756             return error
 757         else :
 758             if sys.platform == 'win32':
 759                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 760             else :
 761                 pid = Popen([rpath, '--slave', "--vanilla", "--encoding=UTF-8", "-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8') #PIPE ou STDOUT ?
 762             return pid
 763     else :
 764         if wait :
 765             if sys.platform == 'win32':
 766                 error = call(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 767             elif sys.platform == 'darwin' and needX11:
 768                 os.environ['DISPLAY'] = ':0.0'
 769                 error = call([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], env = env, encoding='UTF-8')
 770             else :
 771                 error = call([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], env = env, encoding='UTF-8')
 772             return error
 773         else :
 774             if sys.platform == 'win32':
 775                 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
 776             elif sys.platform == 'darwin' and needX11:
 777                 os.environ['DISPLAY'] = ':0.0'
 778                 pid = Popen([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8')
 779             else :
 780                 pid = Popen([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8')
 781             return pid
 782
 783 def check_Rresult(parent, pid) :
 784     if isinstance(pid, Popen) :
 785         if pid.returncode != 0 :
 786             error = pid.communicate()
 787             error = [str(error[0]), error[1]]
 788             if error[1] is None :
 789                 error[1] = 'None'
 790             parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)])
 791             try :
 792                 raise Exception('\n'.join(['Erreur R', '\n'.join(error[1:])]))
 793             except :
 794                 BugReport(parent)
 795             return False
 796         else :
 797             return True
 798     else :
 799         if pid != 0 :
 800             try :
 801                 raise Exception('Erreur R')
 802             except :
 803                 BugReport(parent)
 804             return False
 805         else :
 806             return True
 807
 808
 809 def launchcommand(mycommand):
 810     Popen(mycommand)
 811
 812 def print_liste(filename,liste):
 813     with open(filename,'w') as f :
 814         for graph in liste :
 815             f.write(';'.join(graph) +'\n')
 816 def read_list_file(filename, encoding = sys.getdefaultencoding()):
 817     with codecs.open(filename,'r', encoding) as f:
 818         content=f.readlines()
 819         ncontent=[line.replace('\n','').split(';') for line in content if line.strip() != '']
 820     return ncontent
 821
 822 def progressbar(self, maxi):
 823     ira = wx.GetApp().GetTopWindow()
 824     parent = ira
 825     try:
 826         maxi = int(maxi)
 827     except:
 828         maxi = 1
 829     prog = wx.ProgressDialog("Traitements",
 830                              "Veuillez patienter...",
 831                              maximum=maxi,
 832                              parent=parent,
 833                              style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT
 834                              )
 835                              # parent ???
 836     # le ABORT n'est pas géré à tous les coups ???
 837     prog.SetSize((400,150))
 838     #prog.SetIcon(ira._icon)
 839     return prog
 840
 841 def treat_var_mod(variables) :
 842     var_mod = {}
 843     variables = list(set(variables))
 844     varmod = [variable.split('_') for variable in variables]
 845     vars = list(set([var[0] for var in varmod if len(var) >=2]))
 846     for var in vars :
 847         mods = ['_'.join(v) for v in varmod if v[0] == var]
 848         var_mod[var] = mods
 849
 850 #     for variable in variables :
 851 #         if '_' in variable :
 852 #             forme = variable.split('_')
 853 #             var = forme[0]
 854 #             mod = forme[1]
 855 #             if not var in var_mod :
 856 #                 var_mod[var] = [variable]
 857 #             else :
 858 #                 if not mod in var_mod[var] :
 859 #                     var_mod[var].append(variable)
 860     return var_mod
 861
 862 def doconcorde(corpus, uces, mots, uci = False) :
 863     if not uci :
 864         ucestxt1 = [row for row in corpus.getconcorde(uces)]
 865     else :
 866         ucestxt1 = [row for row in corpus.getuciconcorde(uces)]
 867     ucestxt1 = dict(ucestxt1)
 868     ucestxt = []
 869     ucis_txt = []
 870     listmot = [corpus.getlems()[lem].formes for lem in mots]
 871     listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
 872     mothtml = ['<font color=red><b>%s</b></font>' % mot for mot in listmot]
 873     dmots = dict(list(zip(listmot, mothtml)))
 874     for uce in uces :
 875         ucetxt = ucestxt1[uce].split()
 876         ucetxt = ' '.join([dmots.get(mot, mot) for mot in ucetxt])
 877         if not uci :
 878             uciid = corpus.getucefromid(uce).uci
 879             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[corpus.getucefromid(uce).uci].etoiles) + '<a href="%i_%i"> *%i_%i</a></b></p>' % (uciid, uce, uciid, uce))
 880         else :
 881             ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[uce].etoiles) + '</b></p>')
 882         ucestxt.append(ucetxt)
 883     return ucis_txt, ucestxt
 884
 885
 886 def getallstcarac(corpus, analyse) :
 887    pathout = PathOut(analyse['ira'])
 888    profils =  ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, self.encoding)
 889    print(profils)
 890
 891 def read_chd(filein, fileout):
 892     with open(filein, 'r') as f :
 893         content = f.read()
 894     #content = [line[3:].replace('"',"").replace(' ','') for line in content.splitlines()]
 895     content = [line.split('\t') for line in content.splitlines()]
 896     chd = {'name':1, 'children':[]}
 897     mere={}
 898     for i, line in enumerate(content) :
 899         if i == 0 :
 900             chd['children'] = [{'name': line[1],'size' : content[i+1][0]}, {'name':line[2], 'size': content[i+1][1]}]
 901             mere[line[1]] = chd['children'][0]
 902             mere[line[2]] = chd['children'][1]
 903         elif not i % 2 :
 904             if 'children' in mere[line[0]]:
 905                 mere[line[0]]['children'].append({'name': line[1],'size' : content[i+1][0]})
 906                 mere[line[1]] = mere[line[0]]['children'][-1]
 907                 mere[line[0]]['children'].append({'name': line[2],'size' : content[i+1][1]})
 908                 mere[line[2]] = mere[line[0]]['children'][-1]
 909             else :
 910                 mere[line[0]]['children'] = [{'name': line[1],'size' : content[i+1][0]}, {'name':line[2], 'size': content[i+1][1]}]
 911                 mere[line[1]] = mere[line[0]]['children'][-2]
 912                 mere[line[2]] = mere[line[0]]['children'][-1]
 913     with open(fileout, 'w') as f :
 914         f.write(json.dumps(chd))
 915
 916
 917 translation_languages = {"Afrikaans":"af", "Albanian":"sq", "Amharic":"am", "Arabic":"ar", "Armenian":"hy", "Azeerbaijani":"az", "Basque":"eu", "Belarusian":"be", "Bengali":"bn", "Bosnian":"bs", "Bulgarian":"bg", "Catalan":"ca", "Cebuano":"ceb", "Chichewa":"ny", "Chinese (Simplified)":"zh-CN", "Chinese (Traditional)":"zh-TW", "Corsican":"co", "Croatian":"hr", "Czech":"cs", "Danish":"da", "Dutch":"nl", "English":"en", "Esperanto":"eo", "Estonian":"et", "Filipino":"tl", "Finnish":"fi", "French":"fr", "Frisian":"fy", "Galician":"gl", "Georgian":"ka", "German":"de", "Greek":"el", "Gujarati":"gu", "Haitian Creole":"ht", "Hausa":"ha", "Hawaiian":"haw", "Hebrew":"iw", "Hindi":"hi", "Hmong":"hmn ", "Hungarian":"hu", "Icelandic":"is", "Igbo":"ig", "Indonesian":"id", "Irish":"ga", "Italian":"it", "Japanese":"ja", "Javanese":"jw", "Kannada":"kn", "Kazakh":"kk", "Khmer":"km", "Korean":"ko", "Kurdish":"ku", "Kyrgyz":"ky", "Lao":"lo", "Latin":"la", "Latvian":"lv", "Lithuanian":"lt", "Luxembourgish":"lb", "Macedonian":"mk", "Malagasy":"mg", "Malay":"ms", "Malayalam":"ml", "Maltese":"mt", "Maori":"mi", "Marathi":"mr", "Mongolian":"mn", "Burmese":"my", "Nepali":"ne", "Norwegian":"no", "Pashto":"ps", "Persian":"fa", "Polish":"pl", "Portuguese":"pt", "Punjabi":"ma", "Romanian":"ro", "Russian":"ru", "Samoan":"sm", "Scots Gaelic":"gd", "Serbian":"sr", "Sesotho":"st", "Shona":"sn", "Sindhi":"sd", "Sinhala":"si", "Slovak":"sk", "Slovenian":"sl", "Somali":"so", "Spanish":"es", "Sundanese":"su", "Swahili":"sw", "Swedish":"sv", "Tajik":"tg", "Tamil":"ta", "Telugu":"te", "Thai":"th", "Turkish":"tr", "Ukrainian":"uk", "Urdu":"ur", "Uzbek":"uz", "Vietnamese":"vi", "Welsh":"cy", "Xhosa":"xh", "Yiddish":"yi", "Yoruba":"yo", "Zulu":"zu", }
 918
 919
 920 def gettranslation(words, lf, lt) :
 921     import urllib.request, urllib.error, urllib.parse
 922     import json
 923     agent = {'User-Agent':
 924     "Mozilla/4.0 (\
 925     compatible;\
 926     MSIE 6.0;\
 927     Windows NT 5.1;\
 928     SV1;\
 929     .NET CLR 1.1.4322;\
 930     .NET CLR 2.0.50727;\
 931     .NET CLR 3.0.04506.30\
 932     )"}
 933     base_link = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=%s&tl=%s&dt=t&q=%s"
 934     print(len(words))
 935     totrans = urllib.parse.quote('\n'.join(words))
 936     link = base_link % (lf, lt, totrans)
 937     request = urllib.request.Request(link, headers=agent)
 938     raw_data = urllib.request.urlopen(request).read()
 939     data = json.loads(raw_data)
 940     return [line[0].replace("'", '_').replace(' | ', '|').replace(' ', '_').replace('-','_').replace('\n','') for line in data[0]]
 941
 942 def makenprof(prof, trans, deb=0) :
 943     nprof=[]
 944     if deb == 0 :
 945         nprof.append(prof[0])
 946     for i, val in enumerate(trans) :
 947         line = prof[deb+i+1][:]
 948         line[6] = val
 949         nprof.append(line)
 950     return nprof
 951
 952 def treatempty(val) :
 953     if val.strip() == '' :
 954         return '_'
 955     else :
 956         return val
 957
 958 def translateprofile(corpus, dictprofile, lf='it', lt='fr', maxword = 50) :
 959     nprof = {}
 960     lems = {}
 961     for i in range(len(dictprofile)) :
 962         prof = dictprofile[repr(i+1)]
 963         try :
 964             lenact = prof.index(['*****', '*', '*', '*', '*', '*', '', ''])
 965             lensup = -1
 966         except ValueError:
 967             try :
 968                 lenact = prof.index(['*', '*', '*', '*', '*', '*', '', ''])
 969                 lensup = 0
 970             except ValueError:
 971                 lenact = len(prof)
 972                 lensup = 0
 973         try :
 974             lensup += prof.index(['*', '*', '*', '*', '*', '*', '', ''])
 975             lensup = lensup - lenact
 976         except ValueError:
 977             lensup += len(prof) - lenact
 978         if lenact != 0 :
 979             if lenact > maxword :
 980                 nlenact = maxword
 981             else :
 982                 nlenact = lenact
 983             actori = [line[6] for line in prof[1:nlenact]]
 984             act = [val.replace('_', ' ') for val in actori]
 985             act = gettranslation(act, lf, lt)
 986             for j, val in enumerate(actori) :
 987                 if act[j] not in lems :
 988                     lems[act[j]] = val
 989                 else :
 990                     while act[j] in lems :
 991                         act[j] = act[j] + "+"
 992                     lems[act[j]] = val
 993             nprof[repr(i+1)] = makenprof(prof, act)
 994
 995         if lensup != 0 :
 996             if lensup > maxword :
 997                 nlensup = maxword
 998             else :
 999                 nlensup = lensup
1000             supori = [line[6] for line in prof[(1+lenact):(lenact+nlensup)]]
1001             sup = [val.replace('_', ' ') for val in supori]
1002             sup = [treatempty(val) for val in sup]
1003             sup = gettranslation(sup, lf, lt)
1004             for j, val in enumerate(supori) :
1005                 if sup[j] not in lems :
1006                     lems[sup[j]] = val
1007                 else :
1008                     while sup[j] in lems :
1009                         sup[j] = sup[j] + "+"
1010                     lems[sup[j]] = val
1011             nprof[repr(i+1)].append(['*****', '*', '*', '*', '*', '*', '', ''])
1012             nprof[repr(i+1)] += makenprof(prof, sup, deb=lenact)
1013
1014         try :
1015             lenet = prof.index(['*', '*', '*', '*', '*', '*', '', ''])
1016             nprof[repr(i+1)].append(['*', '*', '*', '*', '*', '*', '', ''])
1017             nprof[repr(i+1)] += prof[(lenet+1):]
1018         except :
1019             pass
1020     return nprof, lems
1021
1022 def write_translation_profile(prof, lems, language, dictpathout) :
1023     if os.path.exists(dictpathout['translations.txt']) :
1024         with codecs.open(dictpathout['translations.txt'], 'r', 'utf8') as f :
1025             translist = f.read()
1026         translist = [line.split('\t') for line in translist.splitlines()]
1027     else :
1028         translist = []
1029     toprint = []
1030     toprint.append(['','','','','',''])
1031     toprint.append(['***', 'nb classes', repr(len(prof)), '***', '', ''])
1032     for i in range(len(prof)) :
1033         toprint.append(['**', 'classe', repr(i+1), '**', '', ''])
1034         toprint.append(['****'] + prof[repr(i+1)][0] + ['****'])
1035         rest = [[repr(line[1]), repr(line[2]), repr(line[3]), repr(line[4]), line[6], line[7].replace('< 0,0001', '0.00009').replace('NS (','').replace(')','')] for line in prof[repr(i+1)][1:]]
1036         for i, line in enumerate(prof[repr(i+1)][1:]) :
1037             if line[0] == '*' :
1038                 rest[i] = ['*', '*', '*', '*', '*', '*']
1039             elif line[0] == '*****' :
1040                 rest[i] = ['*****','*','*', '*', '*', '*']
1041         toprint += rest
1042     with open(dictpathout['translation_profile_%s.csv' % language], 'w') as f :
1043         f.write('\n'.join([';'.join(line) for line in toprint]))
1044     with open(dictpathout['translation_words_%s.csv' % language], 'w') as f :
1045         f.write('\n'.join(['\t'.join([val, lems[val]]) for val in lems]))
1046     if 'translation_profile_%s.csv' % language not in [val[0] for val in translist] :
1047         translist.append(['translation_profile_%s.csv' % language, 'translation_words_%s.csv' % language])
1048         with open(dictpathout['translations.txt'], 'w') as f :
1049             f.write('\n'.join(['\t'.join(line) for line in translist]))
1050
1051 def makesentidict(infile, language) :
1052     with codecs.open(infile,'r', 'utf8') as f :
1053         content = f.read()
1054     content = [line.split('\t') for line in content.splitlines()]
1055     titles = content.pop(0)
1056     senti = ['Positive', 'Negative', 'Anger', 'Anticipation', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise', 'Trust']
1057     sentid = {}
1058     for sent in senti :
1059         sentid[sent] = titles.index(sent)
1060     frtitle = [val for val in titles if '(fr)' in val]
1061     frid = titles.index(frtitle[0])
1062     sentidict = [[line[frid].lower(), [line[sentid[sent]] for sent in senti]] for line in content]
1063     pos = ['positive'] + [line[0] for line in sentidict if line[1][0] == '1']
1064     neg = ['negative'] + [line[0] for line in sentidict if line[1][1] == '1']
1065     anger = ['anger'] + [line[0] for line in sentidict if line[1][2] == '1']
1066     anticipation = ['anticipation'] + [line[0] for line in sentidict if line[1][3] == '1']
1067     disgust = ['disgust'] + [line[0] for line in sentidict if line[1][4] == '1']
1068     fear = ['fear'] + [line[0] for line in sentidict if line[1][5] == '1']
1069     joy = ['joy'] + [line[0] for line in sentidict if line[1][6] == '1']
1070     sadness = ['sadness'] + [line[0] for line in sentidict if line[1][7] == '1']
1071     surprise = ['surprise'] + [line[0] for line in sentidict if line[1][8] == '1']
1072     trust = ['trust'] + [line[0] for line in sentidict if line[1][9] == '1']
1073     with open('/tmp/tgenemo.csv', 'w') as f :
1074         for val in [pos, neg, anger, anticipation, disgust, fear, joy, sadness, surprise, trust] :
1075             f.write('\t'.join(val) + '\n')
1076
1077 def countsentfromprof(prof, encoding, sentidict) :
1078     with codecs.open(prof, 'r', encoding) as f :
1079         content = f.read()
1080     content = [line.split(';') for line in content.splitlines()]
1081     print(content)
1082     content = [[line[0], [int(val) for val in line[1:]]] for line in content]
1083     print(content)
1084     content = dict(content)
1085     print(content)
1086
1087 def iratolexico(infile, outfile, encoding) :
1088     with codecs.open(infile, 'r', encoding) as f :
1089         for line in f :
1090             if line.startswith('**** ') :
1091                 line = line.split()
1092