-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTwitterAPI.py
More file actions
113 lines (96 loc) · 3.28 KB
/
TwitterAPI.py
File metadata and controls
113 lines (96 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import tweepy
from textblob import TextBlob
import re
def get_tweets(search_query, number_tweets=50):
"""
Gets a list of tweets
:param search_query: search query used to find tweets (String)
:param number_tweets: number of tweets to get (Integer)
:return: List of tweets
"""
consumer_key = 'XXXXXX'
consumer_secret = 'XXXXXX'
access_token = 'XXXXXX'
access_token_secret = 'XXXXXX'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
tweets = api.search(q=search_query, count=number_tweets)
text = []
for tweet in tweets:
new_text = scrub_tweet(tweet.text)
if len(new_text) > 0:
text.append(new_text)
return text, tweets
def scrub_tweet(tweet):
"""
Scubs a list of tweets
:param tweet: tweet (String)
:return: scrubed_tweets: Scrubbed list of tweets (List of Strings)
"""
tweet = tweet.lower()
tweet = re.sub(r'https://.*', '', tweet)
tweet = re.sub(r'rt.*?:', '', tweet)
tweet = re.sub(r'\(.*?.\)', '', tweet)
tweet = re.sub(r'@.*? ', '', tweet)
tweet = re.sub(r'@.*?', '', tweet)
tweet = re.sub(r'#.*? ', '', tweet)
tweet = tweet.replace(">", ' ')
tweet = tweet.replace("<", ' ')
tweet = tweet.replace("&", ' ')
tweet = tweet.replace(u'\u2014', '-')
tweet = tweet.replace(u'\u2013', '-')
exclude = ['!', '"', '#', '$', '%', '&', '(', ')', '*', '+', ',', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~']
exclude += ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
exclude.append(u'\u2018') # '
exclude.append(u'\u2019') # '
exclude.append(u'\u201c') # "
exclude.append(u'\u201d') # "
exclude.append(u'\u2022') # bullet point
exclude.append(u'\u2026') # ...
for c in exclude:
tweet = tweet.replace(c, ' ')
tweet = tweet.strip()
tweet.replace("\n", " ")
tweet = tweet.replace('-', ' ')
scrubed_tweet = ' '.join(tweet.split())
return scrubed_tweet
def get_tweet_sentiment(tweet):
'''
Utility function to classify sentiment of passed tweet
using textblob's sentiment method
'''
# create TextBlob object of passed tweet text
analysis = TextBlob(tweet)
# set sentiment
if analysis.sentiment.polarity > 0:
return 1 #positive
elif analysis.sentiment.polarity == 0:
return 0 #neutral
else:
return -1 #negative
def get_sentiments(search_query, number_tweets):
texts, tweets = get_tweets(search_query, number_tweets)
users = []
messages = []
for tweet in tweets:
if tweet.text.startswith("RT @"):
temp = re.sub('RT @.*:', '', tweet.text)
messages.append(temp)
else:
messages.append(tweet.text)
users.append(tweet.user.name)
positive = 0
neutral = 0
negative = 0
for text in texts:
sentiment = get_tweet_sentiment(text)
if sentiment > 0:
positive += 1
elif sentiment == 0:
neutral += 1
else:
negative += 1
found = positive + neutral + negative
zipped_content = zip(users, messages)
return [positive, neutral, negative, found], zipped_content