楼主: ReneeBK
1497 6

[GitHub]Practical Data Science Cookbook - Second Edition [推广有奖]

  • 1关注
  • 62粉丝

VIP

已卖:4897份资源

学术权威

14%

还不是VIP/贵宾

-

TA的文库  其他...

R资源总汇

Panel Data Analysis

Experimental Design

威望
1
论坛币
49635 个
通用积分
55.7537
学术水平
370 点
热心指数
273 点
信用等级
335 点
经验
57805 点
帖子
4005
精华
21
在线时间
582 小时
注册时间
2005-5-8
最后登录
2023-11-26

楼主
ReneeBK 发表于 2017-7-7 04:17:44 |AI写论文

+2 论坛币
k人 参与回答

经管之家送您一份

应届毕业生专属福利!

求职就业群
赵安豆老师微信:zhaoandou666

经管之家联合CDA

送您一个全额奖学金名额~ !

感谢您参与论坛问题回答

经管之家送您两个论坛币!

+2 论坛币


本帖隐藏的内容

Practical-Data-Science-Cookbook-Second-Edition-master.zip (55.02 MB, 需要: 1 个论坛币)


二维码

扫码加我 拉你入群

请注明:姓名-公司-职位

以便审核进群资格,未注明则拒绝

关键词:Data Science Practical Cookbook Edition Science

已有 1 人评分经验 学术水平 热心指数 信用等级 收起 理由
nuomin + 60 + 1 + 1 + 1 奖励积极上传好的资料

总评分: 经验 + 60  学术水平 + 1  热心指数 + 1  信用等级 + 1   查看全部评分

本帖被以下文库推荐

沙发
ReneeBK(未真实交易用户) 发表于 2017-7-7 04:21:31
  1. '''
  2. Understanding the Twitter API v1.1
  3. '''
  4. from twython import Twython

  5. API_KEY = 'INSERT HERE'
  6. API_SECRET = 'INSERT HERE'

  7. ACCESS_TOKEN = 'INSERT HERE'
  8. ACCESS_TOKEN_SECRET = 'INSERT HERE'


  9. twitter = Twython(API_KEY, API_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET)

  10. #testing our new twitter interface
  11. print twitter.get_user_timeline()

  12. print twitter.get_last_function_header('x-rate-limit-remaining')


  13. #from the How it Works section:
  14. def  twitter_oauth_login():
  15.   API_KEY = 'INSERT HERE'
  16.   API_SECRET = 'INSERT HERE'

  17.   ACCESS_TOKEN = 'INSERT HERE'
  18.   ACCESS_TOKEN_SECRET = 'INSERT HERE'

  19.   twitter = Twython(API_KEY, API_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
  20.   return(twitter)


  21. '''
  22. Determining your Twitter followers and friends
  23. '''

  24. twitter = twitter_oauth_login()

  25. friends_ids = twitter.get_friends_ids(count=5000)
  26. friends_ids = friends_ids['ids']

  27. followers_ids = twitter.get_followers_ids(count=5000)
  28. followers_ids = followers_ids['ids']

  29. print len(friends_ids), len(followers_ids)


  30. friends_set = set(friends_ids)
  31. followers_set = set(followers_ids)

  32. print('Number of Twitter users who either are our friend or follow you (union):')
  33. print(len(friends_set.union(followers_set)))

  34. len(friends_set | followers_set)
  35. len(set(friends_ids+followers_ids))

  36. print('Number of Twitter users who follow you and are your friend (intersection):')
  37. print(len(friends_set & followers_set))

  38. print("Number of Twitter users you follow that don't follow you (set difference):")
  39. print(len(friends_set - followers_set))

  40. print("Number of Twitter users who follow you that you don't follow (set difference):")
  41. print(len(followers_set - friends_set))


  42. '''
  43. Pulling Twitter User Profiles
  44. '''


  45. def pull_users_profiles(ids):
  46.     users = []
  47.     for i in range(0, len(ids), 100):
  48.         batch = ids[i:i + 100]
  49.         users += twitter.lookup_user(user_id=batch)
  50.         print(twitter.get_lastfunction_header('x-rate-limit-remaining'))
  51.     return (users)

  52. friends_profiles = pull_users_profiles(friends_ids)
  53. followers_profiles = pull_users_profiles(followers_ids)

  54. friends_screen_names = [p['screen_name'] for p in friends_profiles]

  55. print friends_screen_names

  56. #from There's more section

  57. friends_screen_names = [p['screen_name'] for p in friends_profiles if 'screen_name' in p]

  58. friends_screen_names = [p.get('screen_name',{}) for p in friends_profiles]




  59. '''
  60. Making requests without running afoul of Twitter's rate limit
  61. '''

  62. import time
  63. import math

  64. rate_limit_window = 15 * 60 #900 seconds

  65. def pull_users_profiles_limit_aware(ids):
  66.     users = []
  67.     start_time = time.time()
  68.     # Must look up users in
  69.     for i in range(0, len(ids), 10):
  70.         batch = ids[i:i + 10]
  71.         users += twitter.lookup_user(user_id=batch)
  72.         calls_left = float(twitter.get_lastfunction_header('x-rate-limit-remaining'))
  73.         time_remaining_in_window = rate_limit_window - (time.time()-start_time)
  74.         sleep_duration = math.ceil(time_remaining_in_window/calls_left)
  75.         print('Sleeping for: ' + str(sleep_duration) + ' seconds; ' + str(calls_left) + ' API calls remaining')
  76.         time.sleep(sleep_duration)

  77.     return (users)

  78. friends_profiles = pull_users_profiles_limit_aware(friends_ids)
  79. followers_profiles = pull_users_profiles_limit_aware(followers_ids)




  80. '''
  81. Storing JSON data to disk
  82. '''

  83. import json
  84. def save_json(filename, data):
  85.     with open(filename, 'wb') as outfile:
  86.         json.dump(data, outfile)

  87. def load_json(filename):
  88.     with open(filename) as infile:
  89.         data = json.load(infile)
  90.     return data

  91. fname  = 'test_friends_profiles.json'
  92. save_json(fname, friends_profiles)

  93. test_reload = load_json(fname)
  94. print(test_reload[0])

  95. '''
  96. Storing user profiles in MongoDB using PyMongo
  97. '''

  98. import pymongo

  99. host_string = "mongodb://localhost"
  100. port = 27017
  101. mongo_client = pymongo.MongoClient(host_string, port)

  102. # get a reference to the mongodb database 'test'
  103. mongo_db = mongo_client['test']

  104. # get a reference to the 'user profiles' collection in the 'test' database
  105. user_profiles_collection = mongo_db['user_profiles']

  106. user_profiles_collection.insert(friends_profiles)
  107. user_profiles_collection.insert(followers_profiles)


  108. #from How it works section
  109. def save_json_data_to_mongo(data, mongo_db,
  110.                             mongo_db_collection,
  111.                             host_string = "localhost",
  112.                             port = 27017):
  113.     mongo_client = pymongo.MongoClient(host_string, port)
  114.     mongo_db = mongo_client[mongo_db]
  115.     collection = mongo_db[mongo_db_collection]
  116.     inserted_object_ids = collection.insert(data)
  117.     return(inserted_object_ids)


  118. '''
  119. Exploring geographic information available in profiles
  120. '''


  121. fname = 'test_friends_profiles.json'
  122. load_json(fname)


  123. geo_enabled = [p['geo_enabled'] for p in friends_profiles]
  124. print geo_enabled.count(1)


  125. location = [p['location'] for p in friends_profiles]
  126. print location.count('')


  127. print(set(location))

  128. time_zone = [p['time_zone'] for p in friends_profiles]
  129. print time_zone.count(None)
  130. print(set(time_zone))

  131. status_geo = [p['status']['geo'] for p in friends_profiles if ('status' in p and p['status']['geo'] is not None)]
  132. if status_geo: print status_geo[0]
  133. print len(status_geo)



  134. '''
  135. Plotting geo spatial data in Python
  136. '''

  137. status_geo = []
  138. status_geo_screen_names = []
  139. for fp in friends_profiles:
  140.     if ('status' in fp and fp['status']['geo'] is not None and 'screen_name' in fp):
  141.         status_geo.append(fp['status']['geo'])
  142.         status_geo_screen_names.append(fp['screen_name'])



  143. import folium
  144. from itertools import izip

  145. #Let Folium determine the scale
  146. map = folium.Map(location=[48, -102], zoom_start=3)

  147. for sg, sn in izip(status_geo, status_geo_screen_names):
  148.     map.simple_marker(sg['coordinates'], popup=str(sn))

  149. map.create_map(path='us_states.html')
复制代码

藤椅
hjtoh(未真实交易用户) 发表于 2017-7-7 06:11:45 来自手机
ReneeBK 发表于 2017-7-7 04:17
**** 本内容被作者隐藏 ****
必须学会!

板凳
fengyg(真实交易用户) 企业认证  发表于 2017-7-7 06:48:30
kankan

报纸
cloudoversea(真实交易用户) 发表于 2017-7-7 07:07:55
看看      

地板
lionli(未真实交易用户) 发表于 2017-7-12 12:08:13
thanks  for sharing

7
xiexie1111(真实交易用户) 发表于 2017-8-12 00:38:41
thanks for your sharing

您需要登录后才可以回帖 登录 | 我要注册

本版微信群
加好友,备注jltj
拉您入交流群
GMT+8, 2026-1-4 07:11