main.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. # -*- coding: UTF-8 -*-
  2. import time
  3. import requests
  4. import json
  5. from retry import retry
  6. import xlwt
  7. from os import path
  8. from sys import stdout
  9. from concurrent.futures import ThreadPoolExecutor
  10. __all__ = ["get_dict", "get_json"]
  11. function_list = ["get_cnt", "get_pr", "get_contributors"] # 信息获取函数
  12. PATH = "" # 文件输出路径以及配置文件存储路径,为空则默认在脚本文件同一目录下`
  13. head1 = ["name", "starred", "watching", "fork", "issue", "pull_request", "contributor"] # 表头
  14. head2 = ["name", "contributions"]
  15. # 配置文件读取
  16. try:
  17. with open(path.join(PATH, "config.json"), "r", encoding="utf-8") as f:
  18. # 配置文件选项说明
  19. dic = json.loads(f.read())
  20. USER = dic["user"] # 目标用户
  21. TOKEN = dic["token"] # github访问令牌,用于增加api访问次数
  22. PARALLEL = dic["parallel_threads"] # 最并行线程数
  23. BLACKLIST = dic["black_list"] # contributor获取的仓库黑名单
  24. WHITELIST = dic["white_list"] # 仓库黑名单中的contributor白名单
  25. pool = ThreadPoolExecutor(max_workers=PARALLEL)
  26. except Exception as e:
  27. print("There are some errors while getting configure information!\n")
  28. raise e
  29. @retry(Exception, 5, 2, 8)
  30. def get_info(url):
  31. """
  32. :param url:请求的api链接
  33. :return: py字典
  34. """
  35. headers = {"Authorization": "Bearer " + TOKEN}
  36. response = requests.get(url=url, headers=headers).text
  37. return json.loads(response)
  38. def get_repo(repo_dict):
  39. """
  40. :param repo_dict:仓库字典
  41. :return: py字典
  42. """
  43. result = {"name": str(repo_dict.get("name")), "description": repo_dict.get("description")}
  44. for fuc in function_list:
  45. result.update(eval("%s(repo_dict)" % (fuc)))
  46. return result
  47. def get_cnt(repo_dict):
  48. result = {
  49. "starred": repo_dict.get("stargazers_count"),
  50. "watching": repo_dict.get("watchers_count"),
  51. "fork": repo_dict.get("forks_count"),
  52. "issue": repo_dict.get("open_issues_count"),
  53. }
  54. return result
  55. def get_pr(repo_dict):
  56. pr_dict = get_info(r"https://api.github.com/repos/" + repo_dict["full_name"] + "/pulls")
  57. return {"pull_request": len(pr_dict)}
  58. def get_contributors(repo_dict):
  59. result = {"contributor_list": []}
  60. contri_dict = get_info(repo_dict["contributors_url"])
  61. for dic in contri_dict:
  62. # 黑白名单实现
  63. if repo_dict["name"] in BLACKLIST or repo_dict.get("parent"):
  64. if dic["login"] not in WHITELIST:
  65. continue
  66. tmp = {
  67. "name": dic["login"],
  68. "id": dic["id"],
  69. "contributions": dic["contributions"]
  70. }
  71. result["contributor_list"].append(tmp)
  72. result["contributor"] = len(result["contributor_list"])
  73. return result
  74. def sum_up(dic):
  75. contribute_existed = {}
  76. result = {"total": {
  77. "starred": 0,
  78. "watching": 0,
  79. "fork": 0,
  80. "issue": 0,
  81. "pull_request": 0,
  82. "contributor": 0,
  83. "contributor_list": []
  84. }}
  85. pos = 0
  86. for repo in dic["repositories"]:
  87. for k in result["total"].keys():
  88. if k != "contributor_list":
  89. result["total"][k] += repo[k]
  90. else:
  91. # contributor累加
  92. for contribute in repo[k]:
  93. if contribute_existed.get(contribute["name"]) is None:
  94. result["total"][k].append(contribute.copy())
  95. contribute_existed[contribute["name"]] = pos
  96. pos += 1
  97. else:
  98. result["total"][k][contribute_existed[contribute["name"]]]["contributions"] += \
  99. contribute["contributions"]
  100. result["total"]["contributor_list"].sort(key=lambda a: a["contributions"], reverse=True)
  101. result["total"]["contributor"] = len(contribute_existed)
  102. dic.update(result)
  103. return dic
  104. def get_dict():
  105. """
  106. :return:带有信息的py字典
  107. """
  108. # 获取用户信息
  109. info_dict = {"repositories": []}
  110. root_dict = get_info(r"https://api.github.com/users/" + USER + r"/repos")
  111. # 解析信息
  112. def thread(dic):
  113. result = get_repo(dic)
  114. info_dict["repositories"].append(result)
  115. return 1
  116. # 分别获取每个仓库
  117. thread_list = []
  118. wrong_list = []
  119. for dic in root_dict:
  120. thread_list.append(pool.submit(thread, dic))
  121. time.sleep(0.05)
  122. # 等待线程完毕
  123. while thread_list:
  124. for x in thread_list:
  125. if x.done() and x.result():
  126. thread_list.remove(x)
  127. elif x.done() and not x.result():
  128. wrong_list.append(x.exception())
  129. thread_list.remove(x)
  130. stdout.write('\r %d threads left. . .' % (len(thread_list)))
  131. # 输出线程完成情况
  132. stdout.write('\r Done!During the process,%d exceptions have been raised. . . ' % (len(wrong_list)))
  133. stdout.flush()
  134. if len(wrong_list):
  135. for i in wrong_list:
  136. stdout.write(str(i) + "\n")
  137. stdout.flush()
  138. # 按名字字母排序
  139. info_dict["repositories"].sort(key=lambda a: a["name"].lower())
  140. return sum_up(info_dict)
  141. def get_json(dic=None):
  142. """
  143. :return:带有信息的json文本
  144. """
  145. if not dic:
  146. return json.dumps(get_dict(), sort_keys=False, indent=4, separators=(',', ':'), ensure_ascii=False)
  147. else:
  148. return json.dumps(dic, sort_keys=False, indent=4, separators=(',', ':'), ensure_ascii=False)
  149. def wt_json(text):
  150. if PATH:
  151. with open(path.join(PATH, "github_info.json"), "w", encoding="utf-8") as f:
  152. f.write(text)
  153. f.flush()
  154. else:
  155. with open(path.join(PATH, "github_info.json"), "w", encoding="utf-8") as f:
  156. f.write(text)
  157. f.flush()
  158. def wt_excel(dic):
  159. wb = xlwt.Workbook()
  160. # try:
  161. # 写入仓库数据
  162. tb1 = wb.add_sheet("repositories", cell_overwrite_ok=True)
  163. for i in range(len(head1)):
  164. tb1.write(0, i, head1[i])
  165. for i in range(len(dic["repositories"])):
  166. for j in range(len(head1)):
  167. tb1.write(i + 1, j, dic["repositories"][i][head1[j]])
  168. # 写入总计数据
  169. for i in range(len(head1)):
  170. if head1[i] == "name":
  171. tb1.write(len(dic["repositories"]) + 1, i, "Total")
  172. continue
  173. # if type(dic["total"][head1[i]]) == ("dict" or "list"):
  174. # tb1.write(len(dic["repositories"]) + 2, i, len(dic["total"][head1[i]]))
  175. # else:
  176. tb1.write(len(dic["repositories"]) + 1, i, dic["total"][head1[i]])
  177. # 写入贡献者名单
  178. tb2 = wb.add_sheet("contributor list", cell_overwrite_ok=True)
  179. for i in range(len(head2)):
  180. tb2.write(0, i, head2[i])
  181. for i in range(len(dic["total"]["contributor_list"])):
  182. for j in range(len(head2)):
  183. tb2.write(i + 1, j, dic["total"]["contributor_list"][i][head2[j]])
  184. # except Exception as e:
  185. # print("\n")
  186. # print(e)
  187. wb.save(path.join(PATH, "statistics.xls"))
  188. if __name__ == '__main__':
  189. dic = get_dict()
  190. wt_json(get_json(dic))
  191. wt_excel(dic)