当前位置: 首页>>代码示例>>Python>>正文


Python Utils.readSentences方法代码示例

本文整理汇总了Python中Utils.Utils.readSentences方法的典型用法代码示例。如果您正苦于以下问题:Python Utils.readSentences方法的具体用法?Python Utils.readSentences怎么用?Python Utils.readSentences使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Utils.Utils的用法示例。


在下文中一共展示了Utils.readSentences方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: Utils

# 需要导入模块: from Utils import Utils [as 别名]
# 或者: from Utils.Utils import readSentences [as 别名]
          ## Both the phrases are same, for dual structure principle
          if l1PhraseTag == l2PhraseTag or l2PhraseTag in self.__phraseMap[l1PhraseTag]:
            # Debugging !!
            #sys.stderr.write("Alignment: "+str(align)+"\n")
            #sys.stderr.write("L2Sequence: "+str(l2Sequence)+" Same Const: "+l1PhraseTag+"\n")
            ##############
            break
    return l1Sequence
  
if __name__ == "__main__":
  
  l2Data = "/usr0/home/pgadde/Work/CodeSwitching/Hinglish/Data/Parallel/Good2.0/trainSingle/engParse.wx"
  l1Data = "/usr0/home/pgadde/Work/CodeSwitching/Hinglish/Data/Parallel/Good2.0/trainSingle/hinParse.wx"
  l2Aligns = "/usr0/home/pgadde/Work/CodeSwitching/Hinglish/Data/Parallel/Good2.0/trainSingle/engAlign.wx"
  l1Aligns = "/usr0/home/pgadde/Work/CodeSwitching/Hinglish/Data/Parallel/Good2.0/trainSingle/hinAlign.wx"
  pureL1 = "/usr0/home/pgadde/Work/CodeSwitching/Hinglish/Data/HinPOS/hindiTrain.wx"
  pureL2 = "/usr0/home/pgadde/Work/CodeSwitching/Hinglish/Data/EngPOS/train.0-18.tsv"
  
  U = Utils()  
  parL1 = U.readSentences(l1Data)
  parL2 = U.readSentences(l2Data)
  align = U.readAligns(l1Aligns, l2Aligns)
  pureL1 = U.readSentencesPlain(pureL1)
  pureL2 = U.readSentencesPlain(pureL2)
 
  #print parL1[0]print parL2[0]print pureL1[0]print pureL2[0]
  print align[0]
  CS = CSHandler()
  CS.updateHandler(parL1[0], parL2[0], align[0], 0)
  print CS.csSentence(4)
  
开发者ID:phanigadde,项目名称:CSRelated,代码行数:32,代码来源:CSHandler.py

示例2: __init__

# 需要导入模块: from Utils import Utils [as 别名]
# 或者: from Utils.Utils import readSentences [as 别名]

#.........这里部分代码省略.........
    self.align = []
    self.pureL1 = []
    self.pureL2 = []
    self.L1Tags = set()
    self.L2Tags = set()
    self.commonTags = set()
    self.posMap = {}
    
    ## Pre-processing
    self.genPosMap()
    
    ## Others
    self.__utils = Utils()
    
  def addLangTags(self, WordTags, lTag):
    wordTags = []
    for wt in WordTags:
      newWT = [i for i in wt]
      wordTags.append(newWT)
    for index in range(len(wordTags)):
      wordTags[index].append(lTag)
    return wordTags
  
  def makeLD(self, wordsTagsLangs):
    newLine = []
    for index in range(len(wordsTagsLangs)):
      wordTagLang = copy.deepcopy(wordsTagsLangs[index])
      wordTagLang[1] = wordTagLang[1] + '_' + wordTagLang[2]
      newLine.append(wordTagLang)
    return newLine
  
  def genPosMap(self):
    for i in open(self.l1MapFile):
      i = i.strip()
      srcTag = i.split()[0]
      uniTag = i.split()[1]
      self.posMap[srcTag] = uniTag

    for i in open(self.l2MapFile):
      i = i.strip()
      srcTag = i.split()[0]
      uniTag = i.split()[1]
      self.posMap[srcTag] = uniTag  
    
    self.L1Tags = set()
    for line in open(self.l1MapFile):
      tag = line.split()[0]
      self.L1Tags.add(tag)
    for line in open(self.l2MapFile):
      tag = line.split()[0]
      self.L2Tags.add(tag)
    self.commonTags = set([c for c in self.L1Tags if c in self.L2Tags])
  
  def map2Uni(self, wordTagsLangs):
    newLine = []
    for index in range(len(wordTagsLangs)):
      newLine.append(wordTagsLangs[index])
      tag = wordTagsLangs[index][1]
      try:
        newLine[index][1] = self.posMap[tag]
      except:
        newLine[index][1] = 'X'
    return newLine
  
  def mapLD2Uni(self, wordTagsLangs):
    newLine = []
    for index in range(len(wordTagsLangs)):
      wordTagLang = copy.deepcopy(wordTagsLangs[index])
      tag = wordTagLang[1].split("_")[0]
      lang = wordTagLang[2] 
      try:
        wordTagLang[1] = self.posMap[tag] + "_" + lang 
      except:
        wordTagLang[1] = 'X' + "_" + lang 
      newLine.append(wordTagLang)
    return newLine
  
  def map2UniControl(self, wordTagsLangs):
    newLine = []
    for index in range(len(wordTagsLangs)):
      newLine.append(wordTagsLangs[index])
      tag = wordTagsLangs[index][1]
      lang = wordTagsLangs[index][2]
      try:
        newLine[index][1] = self.posMap[tag] + '_' + lang
      except:
        newLine[index][1] = 'X' + '_' + lang
    return newLine
  
  def loadData(self, l1Data, l2Data, l1Aligns, l2Aligns, pureL1Data, pureL2Data):
    self.parL1 = self.__utils.readSentences(l1Data)
    self.parL2 = self.__utils.readSentences(l2Data)
    self.align = self.__utils.readAligns(l1Aligns, l2Aligns)
    self.pureL1 = self.__utils.readSentencesPlain(pureL1Data)
    self.pureL2 = self.__utils.readSentencesPlain(pureL2Data)
    sys.stderr.write("parL1:" + str(len(self.parL1)) + "\n")
    sys.stderr.write("parL2:" + str(len(self.parL2)) + "\n")
    sys.stderr.write("align:" + str(len(self.align)) + "\n")
    sys.stderr.write("pureL1:" + str(len(self.pureL1)) + "\n")
    sys.stderr.write("pureL2:" + str(len(self.pureL2)) + "\n")
开发者ID:phanigadde,项目名称:CSRelated,代码行数:104,代码来源:DataHandler.py

示例3: __init__

# 需要导入模块: from Utils import Utils [as 别名]
# 或者: from Utils.Utils import readSentences [as 别名]
class DataGenerator:
  def __init__(self, outDir):
    sys.stderr.write("DataGenerator: Constructor\n")
    ## Languages and Order
    self.__LID = ["HI","EN"]
    self.__l2MapFile = "/usr0/home/pgadde/Work/CodeSwitching/Hinglish/Data/UniMaps/en-ptb.map"
    self.__l1MapFile = "/usr0/home/pgadde/Work/CodeSwitching/Hinglish/Data/UniMaps/hi-hyd.map"
    ## Data containers
    self.__parL1 = []
    self.__parL2 = []
    self.__align = []
    self.__pureL1 = []
    self.__pureL2 = []
    self.__outputDir = outDir
    self.__posMap = {}
    self.__phraseMap = dd(list)
    self.__csInstance = CSHandler()
    self.__utils = Utils()
    self.__Tree = Dependencytree()
    
    ## Generation Variants
    self.__csVariants = [0,1,2,3,4]
    self.__tagsetVariants = ["",".uni"]
    self.__dataRange = range(50,900,50)
    ##self.__dataRange = [200]
    self.__splits = [(50,50),(60,40),(70,30),(80,20),(90,10)]
    self.__csHash = set()
    ##LID stuff
    self.__L1Tags = set()
    self.__L2Tags = set()
    self.__commonTags = set()
    ## Pre processing
    self.__genPosMap()
    self.__genPhraseMap()
    self.__csInstance.updatePhraseMap(self.__phraseMap)
    self.__csInstance.updateLIDTags(self.__LID[0], self.__LID[1])
    
    ## Real test overwrites
    #self.__csVariants = [1,2,3,4]
    self.__tagsetVariants = [""]
    self.__dataRange = [400]
    self.__dataRanges = {0:range(40,601,40), 1:range(40,601,40), 2:range(35,540,35), 3:range(30,451,30), 4:range(15,231,15)}
    #self.__dataRanges = {0:[880], 1:[880], 2:[800], 3:[630], 4:[330]}
    #self.__dataRanges = {0:[60], 1:[60], 2:[60], 3:[60], 4:[60]}
    #self.__splits = [(50,50)]
    #for i in range(0,51,5):
    #  split = (100-i, i)
    #  self.__splits.append(split)
    self.__fileSuffix = ""
 
  def loadData(self, l1Data, l2Data, l1Aligns, l2Aligns, pureL1Data, pureL2Data):
    self.__parL1 = self.__utils.readSentences(l1Data)
    self.__parL2 = self.__utils.readSentences(l2Data)
    self.__align = self.__utils.readAligns(l1Aligns, l2Aligns)
    self.__pureL1 = self.__utils.readSentencesPlain(pureL1Data)
    self.__pureL2 = self.__utils.readSentencesPlain(pureL2Data)
    sys.stderr.write("parL1:"+str(len(self.__parL1))+"\n")
    sys.stderr.write("parL2:"+str(len(self.__parL2))+"\n")
    sys.stderr.write("align:"+str(len(self.__align))+"\n")
    sys.stderr.write("pureL1:"+str(len(self.__pureL1))+"\n")
    sys.stderr.write("pureL2:"+str(len(self.__pureL2))+"\n")
  
  def __genTrainData(self):
    statusCount = 0
    for data in self.__dataRange:
      #control = 0
      #while 1:
      for Split in self.__splits:
      #for control in range(3):
        #if control == 3:
        #  break
        #pr= int(control*1.0/2 * data)
        pr= int((Split[0]*1.0/(Split[0]+Split[1])) * data)
        tr = data - pr
        pr = pr/2
        
        print pr
        random.seed()
        pIndicesL1 = random.sample(range(len(self.__pureL1)),pr)
        pIndicesL2 = random.sample(range(len(self.__pureL2)),pr)
        
        for csType in self.__csVariants:
          self.__csHash = set()
          ##sys.stderr.write("csType:"+str(csType)+'\n')
          # Debugging !!
          #switch = ""
          #############
          #for tag in self.__tagsetVariants:
            # Debugging !!
            #if switch == "yes":
            #    break
            ###################
            #sys.stderr.write(outputDir+"Train"+cs+str(len(trainVariants[tr]))+"Pure"+str(len(pureVariants[pr]))+tag+"\n")
          
          sys.stderr.write(self.__outputDir+"TrainCSType"+str(csType)+"CS"+str(Split[1])+"Pure"+str(Split[0])+"Total"+str(data)+self.__fileSuffix+'\n')
          dataFile = open(self.__outputDir+"TrainCSType"+str(csType)+"CS"+str(Split[1])+"Pure"+str(Split[0])+"Total"+str(data)+self.__fileSuffix,'w')
          ##dataFileUni = open(self.__outputDir+"TrainCSType"+str(csType)+"CS"+str(Split[1])+"Pure"+str(Split[0])+"Total"+str(data)+".uni",'w')
          ##dataFileUniq = open(self.__outputDir+"TrainCSType"+str(csType)+"CS"+str(Split[1])+"Pure"+str(Split[0])+"Total"+str(data)+".uniq",'w')
          ##dataFileUniUniq = open(self.__outputDir+"TrainCSType"+str(csType)+"CS"+str(Split[1])+"Pure"+str(Split[0])+"Total"+str(data)+".uni.uniq",'w')
          
#.........这里部分代码省略.........
开发者ID:phanigadde,项目名称:CSRelated,代码行数:103,代码来源:DataGenerator.py


注:本文中的Utils.Utils.readSentences方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。