做词性标注时,出现如下错误
源代码如下:
- # -*- coding: utf-8 -*-
- import sys
- import os
- class StanfordCoreNLP():
- def __init__(self,jarpath):
- self.root=jarpath
- self.tempsrcpath="tempsrc"
- self.jarlist=["ejml-0.23.jar","javax.json.jar","jollyday.jar","joda-time.jar","protobuf.jar","slf4j-api.jar",
- "slf4j-simple.jar","stanford-corenlp-3.8.0.jar","xom.jar"]
- self.jarpath=""
- self.buildjars()
-
- def buildjars(self):
- for jar in self.jarlist:
- self.jarpath += self.root+jar+";"
-
- def savefile(self,path,sent):
- fp=open(path,"wb")
- fp.write(sent)
- fp.close()
-
- def delfile(self,path):
- os.remove(path)
-
- class StanfordPOSTagger(StanfordCoreNLP):
- def __init__(self,jarpath,modelpath):
- StanfordCoreNLP.__init__(self,jarpath)
- self.modelpath=modelpath
- self.classfier="edu.stanford.nlp.tagger.maxent.MaxentTagger"
- self.delimiter="\\"
- self.__buildcmd()
- print(jarpath)
- print(modelpath)
-
- def __buildcmd(self):
- self.cmdline = 'java -mx1g -cp "'+self.jarpath+'" ' + self.classfier+' -model "'+self.modelpath+'" -tagSeparator ' + self.delimiter
- print(self.cmdline)
-
- def tag(self,sent):
- self.savefile(self.tempsrcpath,sent)
- tagtxt=os.popen(self.cmdline+" -textFile "+self.tempsrcpath,'r').read()
- self.delfile(self.tempsrcpath)
- type(tagtxt)
- #print(tagtxt)
- return tagtxt
-
- def tagfile(self,inputpath,outpath):
- os.system(self.cmdline+' -textFile '+inputpath+' > '+outpath)


雷达卡



京公网安备 11010802022788号







