1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
| def crawl_shop_info_handler(self, new_shop): """ Crawl shop info, contain phone and categorys :param new_shop: shop info :type new_shop: dict :return: None :rtype: NoneType """ city_info = new_shop["cityInfo"]
# self.mongo_db.upsert("shops", new_shop) self.mark_job_status("shops", new_shop, 1)
if "shopInfo" not in new_shop: shop_info = { "storeId": new_shop["storeId"], "wid": new_shop["wid"], "eleId": new_shop["_id"], "cityId": city_info["_id"] } res = self._get_request_data("shop", city_info, **shop_info) shop_data = res.get("data", {}).get("data", {}) new_shop["shopInfo"] = shop_data.get("shopInfo", {})
if "shopCategorys" not in new_shop: cate_data = { "storeId": new_shop["storeId"] } res = self._get_request_data("cates", city_info, **cate_data) cate_ids = {} cate_lists = res.get("data", {}).get("data", []) for cate in cate_lists: one_data = self.cate_id_join(cate) for two_level in cate.get("detail", []): status = {"status": 0} two_data = self.cate_id_join(two_level) cate_info = self.cate_joint_symbol.join([one_data, two_data]) cate_ids[cate_info] = status
new_shop["shopCategorys"] = cate_ids new_shop["update_time_stamp"] = int(time.time() * 1000) new_shop["status"] = 2 self.mongo_db.upsert("shops", new_shop)
self.logger.debug("Get shop:%s info success, cate total: %s" % (new_shop["_id"], len(cate_ids)))
|