本萌新正尝试跟这个教程走一遍NLP基础,然后就扒下来了一段程序,作用如下:
给定一个CSV文件(corpusFile),然后将CSV信息所对应的推文写入另一个CSV文件(tweetDataFile)
然而跑了好几次,毫无反应,根本不见有推文进入第二个CSV文件,故此求助
import twitter
def buildTrainingSet(corpusFile, tweetDataFile):
import csv
import time
corpus = []
with open(corpusFile,'r') as csvfile:
lineReader = csv.reader(csvfile,delimiter=',', quotechar="\"")
for row in lineReader:
corpus.append({"tweet_id":row[2], "label":row[1], "topic":row[0]})
rate_limit = 180
sleep_time = 900/180
trainingDataSet = []
for tweet in corpus:
try:
status = twitter_api.GetStatus(tweet["tweet_id"])
print("Tweet fetched" + status.text)
tweet["text"] = status.text
trainingDataSet.append(tweet)
time.sleep(sleep_time)
except:
continue
# now we write them to the empty CSV file
with open(tweetDataFile,'w') as csvfile:
linewriter = csv.writer(csvfile,delimiter=',',quotechar="\"")
for tweet in trainingDataSet:
try:
linewriter.writerow([tweet["tweet_id"], tweet["text"], tweet["label"], tweet["topic"]])
except Exception as e:
print(e)
return trainingDataSet
corpusFile =r"C:\Users\34346\Desktop\TrainingFile.csv"
tweetDataFile =r"C:\Users\34346\Desktop\tweetDataFile.csv"
trainingData = buildTrainingSet(corpusFile, tweetDataFile)