我编写了这个简单的代码,它可以在我的github简介上找到(它只是下载一些twitter数据并存储在一个excel文件中)。
这是我在python中使用OOP的第一次实验之一:代码可以工作(按预期执行其任务),但在本例中可能并不显式地需要OOP实现。
我是否可以将这些代码显示为我的编码器组合的一部分,或者需要进行一些调整?
这是代码:
from twitterHandler import Twitter_User
import pandas as pd
import threading
if __name__=='__main__':
usersIDS = {'UserName1':1234,'UserName2':1234,
'UserName3':1234,'UserName4':1234}
threads = {}
excel_writer = pd.ExcelWriter("Twitter User's Report.xlsx", engine='openpyxl')
def get_data(user_id): #get best last tweets and store them in a excel file
user = Twitter_User.Twitter_User(user_id,2000)
user.get_tweets()
#print(user.get_tweets())
best_tweets = user.most_liked_rt()
#print(best_tweets)
best_tweets.to_excel(excel_writer, '{}'.format(user.name),index=False)
excel_writer.save()
excel_writer.close()
for user_name,user_id in usersIDS.items():
try:
t = threading.Thread(target=get_data,args=(user_id,))
threads[user_name] = t
print('Starting to get data for: {}'.format(user_name))
t.start()
except Exception as e:
print('Something wrong happens: ',e)
for name,t in threads.items():
t.join()
print('Process for {} Stopped'.format(name))import tweepy
import pandas as pd
import twitterHandler.twitter_data
import collections
auth = tweepy.OAuthHandler(twitterHandler.twitter_data.consumer_key, twitterHandler.twitter_data.consumer_secret)
auth.set_access_token(twitterHandler.twitter_data.access_token, twitterHandler.twitter_data.access_secret)
api = tweepy.API(auth)
class Twitter_User():
'''Get information about a specific Twitter User'''
def __init__(self, id, count=200):
self.id = id
self.count = count
self.data = None
self.like_average = None
self.rt_average = None
try:
self.user = api.get_user(self.id)
self.name = self._user._json['name']
self.screen_name = self._user._json['screen_name']
self.location = self._user._json['location']
self.description = self._user._json['description']
self.url = self._user._json['url']
self.followers = self._user._json['followers_count']
except tweepy.TweepError as e:
print(e.response.text)
return
except tweepy.RateLimitError:
rate = api.rate_limit_status()
print(rate)
return
def get_tweets(self): #store last n tweets in a dataframe
simple_list = []
for status in tweepy.Cursor(api.user_timeline, id=self.id).items(self.count):
array = [status._json["text"].strip(), status._json["favorite_count"],
status._json["created_at"], status._json["retweet_count"],
[h["text"] for h in status._json["entities"]["hashtags"]],status._json["lang"]]
simple_list.append(array)
self.data = pd.DataFrame(simple_list, columns=["Text", "Like", "Created at", "Retweet", "Hashtags","Lang"])
self.data = self.data[~self.data["Text"].str.startswith('RT')]
return self.data
def most_liked_rt(self): #return a df of tweets where the number of like and rt is greater than respective averages
self.like_average = self.data["Like"].mean()
self.rt_average = self.data["Retweet"].mean()
return self.data.loc[(self.data['Like'] > self.like_average) & (self.data['Retweet'] > self.rt_average)]
def count_hashtags(self,df): #give the most used hashtags in the tweets df - to use with the return df of most_liked_rt()
h_tags_cloud = []
h_tags = df[['Hashtags', 'Created at']]
h_tags = h_tags[h_tags["Hashtags"].map(len) != 0]
h_tags_list = h_tags["Hashtags"].tolist()
h_tags_counter = collections.Counter()
for h_inner_list in h_tags_list:
for h_element in h_inner_list:
h_tags_cloud.append(h_element)
h_tags_counter.update(h_tags_cloud)
h_tags_df = pd.DataFrame.from_dict(h_tags_counter,orient="index").sort_values(0,ascending=False)
h_tags_df = h_tags_df.rename(columns={0: 'HashTags Freq'})
return h_tags_dfTwitterData.py
consumer_key = ''
consumer_secret = ''
access_token = ''
access_secret = ''发布于 2018-01-24 14:18:55
我花了一些时间和你的项目,我有一些事情需要改进(在我看来ofc)。
Get some data from a Twitter user是不够的:)为您的API添加一些描述,可能尝试为codecov添加一些徽章等等。usersIDS = {'UserName1':1234,'UserName2':1234,'UserName3':1234,'UserName4':1234}
还有:这是什么?
h_tags = h_tags[h_tags“标签”.map(len) != 0]
看上去像个虫子。即使这样做应该做什么,那么它看起来非常奇怪,在我看来,应该被重构为更易读的东西。
你的构造函数看起来也很奇怪。在构造函数中捕获异常有点难闻。
https://codereview.stackexchange.com/questions/185876
复制相似问题