www.iramuteq.org Git - iramuteq/blob - iraopen.py

   1 #!/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #Author: Pierre Ratinaud
   4 #Copyright (c) 2010 Pierre Ratinaud
   5 #Lisense: GNU/GPL
   6
   7 import os
   8 from optparse import OptionParser
   9 import sys
  10 reload(sys)
  11 import locale
  12 import codecs
  13 sys.setdefaultencoding(locale.getpreferredencoding())
  14 from chemins import ConstructConfigPath, ConstructDicoPath, ConstructRscriptsPath, ChdTxtPathOut
  15 from functions import ReadLexique
  16 from ConfigParser import *
  17 #######################################
  18 from textchdalc import AnalyseAlceste
  19 from textdist import PamTxt
  20 #from textafcuci import AfcUci
  21 from textstat import Stat
  22 from corpus import Corpus
  23 import tempfile
  24 import pickle
  25 from word_stat import *
  26 from textclassechd import ClasseCHD
  27
  28 AppliPath = os.path.abspath(os.path.dirname(os.path.realpath(sys.argv[0])))
  29 if os.getenv('HOME') != None:
  30     user_home = os.getenv('HOME')
  31 else:
  32     user_home = os.getenv('HOMEPATH')
  33 UserConfigPath = os.path.abspath(os.path.join(user_home, '.iramuteq'))
  34
  35 class CmdLine :
  36     def __init__(self) :
  37         self.DictPath = ConstructDicoPath(AppliPath)
  38         self.ConfigPath = ConstructConfigPath(UserConfigPath)
  39
  40         parser = OptionParser()
  41
  42         parser.add_option("-f", "--file", dest="filename", help="chemin du corpus", metavar="FILE", default=False)
  43         parser.add_option("-t", "--type", dest="type_analyse", help="type d'analyse", metavar="TYPE D'ANALYSE", default='alceste')
  44
  45         parser.add_option("-c", "--conf", dest="configfile", help="chemin du fichier de configuration", metavar="CONF", default=False)
  46         parser.add_option("-e", "--enc", dest="encodage", help="encodage du corpus", metavar="ENC", default=locale.getpreferredencoding())
  47         parser.add_option("-l", "--lang", dest="language", help="langue du corpus", metavar="LANG", default='french')
  48         (options, args) = parser.parse_args()
  49         print args
  50         print options
  51         if options.filename :
  52             self.filename = os.path.abspath(options.filename)
  53             self.conf = RawConfigParser()
  54             self.conf.read(self.filename)
  55             print self.conf.sections()
  56
  57             if 'analyse' in self.conf.sections() :
  58                 print 'zerzerz'
  59                 DictPathOut=ChdTxtPathOut(os.path.dirname(self.filename))
  60                 self.pathout = os.path.dirname(self.filename)
  61                 self.DictPathOut=DictPathOut
  62                 self.corpus = Corpus(self)
  63                 self.corpus.dictpathout = self.DictPathOut
  64                 self.corpus.read_corpus_from_shelves(self.DictPathOut['db'])
  65                 self.corpus.parametre['analyse'] = 'alceste'
  66                 self.corpus.make_lem_type_list()
  67                 for i in range(1,18) :
  68                     ClasseCHD(self, self.corpus, i, True)
  69                 zerzerzer
  70                 #ll = self.corpus.find_segments_doublon(15,1000000)
  71                 #with open('extrait_doublons.csv' ,'w') as f :
  72                 #    f.write('\n'.join([';'.join([`v[0]`,v[1]]) for v in ll]))
  73                 #print ll
  74                 #self.corpus.count_uci_from_list('/home/pierre/fac/lerass/bouquin_indentite/liste_mot_chercher_uci.txt')
  75                 #print 'start pickle'
  76                 #output = open('testpickle.pkl', 'r')
  77                 #pickle.dump(self.corpus.formes, output, -1)
  78                 #formes = pickle.load(output)
  79                 #output.close()
  80                 #print 'finish pickle'
  81                 #sdfsdfs
  82                 #listin = '/home/pierre/fac/identite/Personnages.csv'
  83                 #with codecs.open(listin, 'r', 'cp1252') as f :
  84                 #    content = f.read()
  85                 #content = content.replace('"','').splitlines()
  86                 #print content
  87                 #self.corpus.make_and_write_sparse_matrix_from_uce_list(content, '/home/pierre/fac/identite/personnages.mm')
  88                 #print 'zerzer'
  89     #            print 'EXTRACT NR'
  90     #            self.corpus.extractnr()
  91                 #listin = [u'droit', u'devoir']
  92                 #make_word_stat(self.corpus, listin)
  93                 Alceste=True
  94             fsdfsdfd
  95             self.filename = os.path.abspath(options.filename)
  96             self.corpus_encodage = options.encodage
  97             self.corpus_lang = options.language
  98             if options.configfile :
  99                 self.ConfigPath[options.type_analyse] = os.path.abspath(options.configfile)
 100             self.TEMPDIR = tempfile.mkdtemp('iramuteq')
 101             self.RscriptsPath = ConstructRscriptsPath(AppliPath)
 102             self.PathPath = ConfigParser()
 103             self.PathPath.read(self.ConfigPath['path'])
 104             self.RPath = self.PathPath.get('PATHS', 'rpath')
 105             self.pref = RawConfigParser()
 106             self.pref.read(self.ConfigPath['preferences'])
 107             #print 'PAS DE CODECS POUR CABLE'
 108             with codecs.open(self.filename, 'r', self.corpus_encodage) as f:
 109                 self.content = f.read()
 110             self.content = self.content.replace('\r','')
 111             ReadLexique(self, lang = options.language)
 112             if options.type_analyse == 'alceste' :
 113             #    print 'ATTENTION : BIGGGGGGGGGGGGGGGGGGG'
 114             #    self.Text = AnalyseAlceste(self, cmd = True, big = True)
 115                 self.Text = AnalyseAlceste(self, cmd = True)
 116             elif options.type_analyse == 'pam' :
 117                 self.Text = PamTxt(self, cmd = True)
 118             elif options.type_analyse == 'afcuci' :
 119                 self.Text = AfcUci(self, cmd = True)
 120             elif options.type_analyse == 'stat' :
 121                 self.Text = Stat(self, cmd = True)
 122             #print self.Text.corpus.hours, 'h', self.Text.corpus.minutes,'min', self.Text.corpus.seconds, 's'
 123 #            self.Text.corpus.make_colored_corpus('colored.html')
 124
 125 if __name__ == '__main__':
 126     __name__ = 'Main'
 127     CmdLine()
 128