Collections and Comprehensions (pt.2)

Friends you may know

friendships{
0: [1, 2],
1: [0, 2, 3],
2: [0, 1, 3],
3: [1, 2, 4],
4: [3, 5],
5: [4, 6, 7],
6: [5, 8],
7: [5, 8],
8: [6, 7, 9],
9: [8]
}
def foaf_ids_bad(user):
"""foaf is short for 'friend of a friend' """
return [foaf_id
for friend_id in friendships[user["id"]]
for foaf_id in friendships[friend_id]]
# Let's take Hero, to see Hero's friends
# we'll call the first key of the friendships dict
# Hero has two friends with ids 1 and 2
friendships[0] # [1,2]
# then we'll loop over *each* of the friends
friendships[1] # [0, 2, 3]
friendships[2] # [0, 1, 3]
# assert that function works
assert foaf_ids_bad(users[0]) == [0, 2, 3, 0, 1, 3]

Can we count mutual friends?

from collections import Counterdef friends_of_friends(user):
user_id = user["id"]
return Counter(
foaf_id
for friend_id in friendships[user_id] # for each of my friends,
for foaf_id in friendships[friend_id] # find their friends
if foaf_id != user_id # who aren't me
and foaf_id not in friendships[user_id] # and aren't my friends
)
# lets look at Hero
# he has two common friends with Chi
# Chi is neither Hero nor his direct friends
friends_of_friends(users[0]) # Counter({3: 2})
interests = [
(0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"),
(0, "Spark"), (0, "Storm"), (0, "Cassandra"),
(1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"),
(1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"),
(2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"),
(3, "statistics"), (3, "regression"), (3, "probability"),
(4, "machine learning"), (4, "regression"), (4, "decision trees"),
(4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"),
(5, "Haskell"), (5, "programming langauges"), (6, "statistics"),
(6, "probability"), (6, "mathematics"), (6, "theory"),
(7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"),
(7, "neural networks"), (8, "neural networks"), (8, "deep learning"),
(8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"),
(9, "Java"), (9, "MapReduce"), (9, "Big Data")
]
def data_scientists_who_like(target_interest):
"""Find the ids of all users who like the target interests."""
return [user_id
for user_id, user_interest in interests
if user_interest == target_interest]
# let's see all user_id who likes "statistics"
data_scientists_who_like("statistics") # [3, 6]
def num_user_with_interest_in(target_interest):
interest_count = 0
for user_id, user_interest in interests:
if user_interest == target_interest:
interest_count += 1
return interest_count
from collections import defaultdict# user_ids matched to specific interest
user_ids_by_interest = defaultdict(list)
for user_id, interest in interests:
user_ids_by_interest[interest].append(user_id)
# three users interested in Python
assert user_ids_by_interest["Python"] == [2,3,5]
# list of interests by user_id
interests_by_user_id = defaultdict(list)
for user_id, interest in interests:
interests_by_user_id[user_id].append(interest)
# check all of Hero's interests
assert interests_by_user_id[0] == ['Hadoop', 'Big Data', 'HBase', 'Java', 'Spark', 'Storm', 'Cassandra']
def most_common_interests_with(user):
return Counter(
interested_user_id
for interest in interests_by_user_id[user["id"]]
for interested_user_id in user_ids_by_interest[interest]
if interested_user_id != user["id"]
)
# let's check to see who has the most common interest with Hero
most_common_interests_with(users[0]) # Counter({9: 3, 8: 1, 1: 2, 5: 1})
words_and_counts = Counter(word
for user, interest in interests
for word in interest.lower().split())

Salaries and Experience Data

salaries_and_tenures = [(83000, 8.7), (88000, 8.1),
(48000, 0.7), (76000, 6),
(69000, 6.5), (76000, 7.5),
(60000, 2.5), (83000, 10),
(48000, 1.9), (63000, 4.2)]
salary_by_tenure = defaultdict(list)for salary, tenure in salaries_and_tenures:
salary_by_tenure[tenure].append(salary)
# find average salary by tenure
average_salary_by_tenure = {
tenure: sum(salaries) / len(salaries)
for tenure, salaries in salary_by_tenure.items()
}
def tenure_bucket(tenure):
if tenure < 2:
return "less than two"
elif tenure < 5:
return "between two and five"
else:
return "more than five"
salary_by_tenure_bucket = defaultdict(list)for salary, tenure in salaries_and_tenures:
bucket = tenure_bucket(tenure)
salary_by_tenure_bucket[bucket].append(salary)
# finally calculate average
average_salary_by_bucket = {
tenure_bucket: sum(salaries) / len(salaries)
for tenure_bucket, salaries in salary_by_tenure_bucket.items()
}

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store