Python统计词频时,如何避免IndexError: list index out of range长尾词问题?
- 内容介绍
- 文章标签
- 相关推荐
本文共计532个文字,预计阅读时间需要3分钟。
该代码片段似乎是一个Python脚本的开头部分,其中包含了一些导入语句和一个文件读取操作。以下是简化后的版本:
pythonimport os, string, codecsimport sys, time
def read_file(): wordlist=[] aimfilmra=该楼层疑似违规已被系统折叠隐藏此楼查看此楼 import os, string, codecs import sys, time return wordlist
该楼层疑似违规已被系统折叠隐藏此楼查看此楼importos,string,codecsimportsys,timedefreadfile():wordlist[]aimfilmra该楼层疑似违规已被系统折叠 隐藏此楼查看此楼
import os,string,codecs
import sys,time
def readfile():
wordlist[]
aimfilmraw_input(anter your file nema :)
baseopen(aimfilm,r)
baseinfobase.readlines()
tagfopen(tag.txt,r)
tagfinfotagf.readlines()
for i in tagfinfo:
tagsi.split( )
for i in baseinfo:
wordsi.split( )
for word in words:
if word ! \\tand word ! \\n and word! and word ! and word>2:
wordword.replace(\\t,)
wordword.replace(\\n,)
wordword.replace( ,)
wordword.replace(.\\n,)
if word!:
wordlist.append(word)
tags[.,",,,!,?,(,)]
for x in range(len(tags)):
#global tags
tagtags[x]
for k in range(len(wordlist)):
if tag in wordlist[k]: #用符号分割
wordswordlist[k].split(tag)
del wordlist[k]
for j in range(len(words)): #去掉判断后的空字符
if words[j]!:
wordlist.append(words[j])
base.close()
tagf.close()
return wordlist
def getstr(word,count,allwordnum):
countstrword--------str(count)#--------str(allwordnum)
return countstr
if __name__"__main__":
wordcnt{}
wordlistreadfile()
wordlistallwordlist
allwordnumlen(wordlistall)
outdataopen(count.txt,w)
print ******************************************
print ******************************************
print(u"start......")
print------------------------------------------------------------------------
for i in wordlistall:
if i in wordcnt:
wordcnt[i]1
else:
wordcnt[i]1
for word,cnt in wordcnt.iteritems():
print word--------str(cnt)#--------str(allwordnum)
outdata.write(getstr(word,cnt,allwordnum)\n)
print------------------------------------------------------------------------
print(u"finish")
#print(upress any key to exit)
outdata.close()
词频统计有的文件报这个错小白求教
Traceback (most recent call last):
File "cptj.py", line 50, in
wordlistreadfile()
File "cptj.py", line 33, in readfile
if tag in wordlist[k]: #用符号分割
IndexError: list index out of range
本文共计532个文字,预计阅读时间需要3分钟。
该代码片段似乎是一个Python脚本的开头部分,其中包含了一些导入语句和一个文件读取操作。以下是简化后的版本:
pythonimport os, string, codecsimport sys, time
def read_file(): wordlist=[] aimfilmra=该楼层疑似违规已被系统折叠隐藏此楼查看此楼 import os, string, codecs import sys, time return wordlist
该楼层疑似违规已被系统折叠隐藏此楼查看此楼importos,string,codecsimportsys,timedefreadfile():wordlist[]aimfilmra该楼层疑似违规已被系统折叠 隐藏此楼查看此楼
import os,string,codecs
import sys,time
def readfile():
wordlist[]
aimfilmraw_input(anter your file nema :)
baseopen(aimfilm,r)
baseinfobase.readlines()
tagfopen(tag.txt,r)
tagfinfotagf.readlines()
for i in tagfinfo:
tagsi.split( )
for i in baseinfo:
wordsi.split( )
for word in words:
if word ! \\tand word ! \\n and word! and word ! and word>2:
wordword.replace(\\t,)
wordword.replace(\\n,)
wordword.replace( ,)
wordword.replace(.\\n,)
if word!:
wordlist.append(word)
tags[.,",,,!,?,(,)]
for x in range(len(tags)):
#global tags
tagtags[x]
for k in range(len(wordlist)):
if tag in wordlist[k]: #用符号分割
wordswordlist[k].split(tag)
del wordlist[k]
for j in range(len(words)): #去掉判断后的空字符
if words[j]!:
wordlist.append(words[j])
base.close()
tagf.close()
return wordlist
def getstr(word,count,allwordnum):
countstrword--------str(count)#--------str(allwordnum)
return countstr
if __name__"__main__":
wordcnt{}
wordlistreadfile()
wordlistallwordlist
allwordnumlen(wordlistall)
outdataopen(count.txt,w)
print ******************************************
print ******************************************
print(u"start......")
print------------------------------------------------------------------------
for i in wordlistall:
if i in wordcnt:
wordcnt[i]1
else:
wordcnt[i]1
for word,cnt in wordcnt.iteritems():
print word--------str(cnt)#--------str(allwordnum)
outdata.write(getstr(word,cnt,allwordnum)\n)
print------------------------------------------------------------------------
print(u"finish")
#print(upress any key to exit)
outdata.close()
词频统计有的文件报这个错小白求教
Traceback (most recent call last):
File "cptj.py", line 50, in
wordlistreadfile()
File "cptj.py", line 33, in readfile
if tag in wordlist[k]: #用符号分割
IndexError: list index out of range

