当前位置: 首页 > news >正文

怎么用ps做网站ui织梦如何做网站

怎么用ps做网站ui,织梦如何做网站,微信小程序开发费用,哪家公司网站建设好点本博文使用基本框架完成搜房网或者其他网站的数据爬取(重点理解 scrapy 框架的构建过程,使用回调函数,完成数据采集和数据处理) 包结构目录如下图所示: 主要代码: (sfw.py) # -*- …
本博文使用基本框架完成搜房网或者其他网站的数据爬取(重点理解 scrapy 框架的构建过程,使用回调函数,完成数据采集和数据处理)
包结构目录如下图所示:

主要代码:
(sfw.py)
# -*- coding: utf-8 -*-
import scrapy
import re
from fang.items import NewHouseItem,ESFHouseItem
class SfwSpider(scrapy.Spider):
name = 'sfw' allowed_domains = ['fang.com']
start_urls = ['http://www.fang.com/SoufunFamily.htm']
def parse(self, response):
trs =response.xpath("//div[@class='outCont']//tr")
province =None
for tr in trs:
tds =tr.xpath(".//td[not(@class='font01')]")
province_td=tds[0]
province_text =province_td.xpath(".//text()").get()
province_text =re.sub(r"\s","",province_text)
if province_text:
province=province_text
#不爬取海外
if province =='其它':
continue
city_td = tds[1]
city_links =city_td.xpath(".//a")
for city_link in city_links:
city_name = city_link.xpath(".//text()").get()
city_url = city_link.xpath(".//@href").get()
# print("省份",province)
# print('城市',city_name)
# print('城市 url',city_url)
url_module =city_url.split(".")
scheme =url_module[0]
fang =url_module[1]
com = url_module[2]
if 'http://bj' in scheme:
newhouse_url="http://newhouse.fang.com/house/s/?from=db" esf_url="http://esf.fang.com/?ctm=1.bj.xf_search.head.105" else:
#新房 url
if "/" in com:
newhouse_url =scheme+'.'+"newhouse."+fang+"."+com+"house/s/" else:
newhouse_url = scheme + '.' + "newhouse." + fang + "." + com +
"/house/s/" #旧房 url
esf_url =scheme+'.'+"esf."+fang+"."+com
yield
scrapy.Request(url=newhouse_url,callback=self.parse_newhouse,meta={"info":(province,city_na
me)})
yield scrapy.Request(url=esf_url, callback=self.parse_esf, meta={"info":
(province, city_name)})
def parse_newhouse(self,response):
province,city =response.meta.get('info')
#获取 yield 中的元组
lis = response.xpath("//div[contains(@class,'nl_con clearfix')]/ul/li[not(@id)]")
for li in lis:
name = "".join(li.xpath(".//div[contains(@class,'nlcd_name')]/a/text()").getall())
name = re.sub(r"\s","",name)
# if name!=None:
# name=name.strip()
# print(name)
house_type_list = li.xpath(".//div[contains(@class,'house_type')]/a/text()").getall()
house_type_list=list(map(lambda x:re.sub(r"\s","",x),house_type_list))
rooms_list = list(filter(lambda x:x.endswith("居"),house_type_list))
rooms = "".join(rooms_list)
#print(rooms)
area="".join(li.xpath(".//div[contains(@class,'house_type')]/text()").getall())
area = re.sub(r"\s|-|/","",area)
#print(area)
address = "".join(li.xpath(".//div[@class = 'address']/a/@title").getall())
#print(address)
district_text = "".join(li.xpath(".//div[@class ='address']/a//text()").getall())
try:
district = re.search(r".*\[(.+)\].*",district_text).group(1)
except Exception:
district = "" #print(district)
sale = li.xpath(".//div[contains(@class,'fangyuan')]/span/text()").get()
#售楼状态是第一个,只需要一个 get
#print(sale)
price = "".join(li.xpath(".//div[contains(@class,'nhouse_price')]//text()").getall())
price = re.sub(r"\s|广告","",price)
#print(price)
origin_url_p = "".join(li.xpath(".//div[@class='nlcd_name']/a/@href").getall())
origin_url = response.urljoin(origin_url_p)
# detail_url = "".join(dl.xpath(".//h4[@class='clearfix']/a/@href").getall())
# item['origin_url'] = response.urljoin(detail_url)
#print(origin_url)
item
=NewHouseItem(province=province,city=city,name=name,rooms=rooms,address=address,area=a
rea,district=district,price=price,sale=sale,origin_url=origin_url)
yield item
next_url = response.xpath("//div[@class='page']/a[@class='next']/@href").get()
if next_url:
yield
scrapy.Request(url=response.urljoin(next_url),callback=self.parse_newhouse,meta={"info":(provi
nce,city)})
def parse_esf(self,response):
province,city =response.meta.get('info')
#print(name)
dls = response.xpath("//dl[contains(@dataflag,'bg')]")
for dl in dls:
item = ESFHouseItem(province=province,city=city)
name = ''.join(dl.xpath(".//dd//p[@class='add_shop']/a/@title").getall())
name = re.sub(r"\s", "", name)
item['name']=name
infos = dl.xpath(".//dd//p[@class='tel_shop']//text()").getall()
infos = list(map(lambda x:re.sub(r"\s|\|",'',x),infos))
infos = list(filter(None,infos))
for info in infos:
if "厅" in info:
item['rooms']=info
elif '层' in info:
item['floor']=info
elif '年' in info:
item['year']=info
elif '向' in info:
item['toward']=info
elif '㎡' in info:
item['area']=info
address = "".join(dl.xpath(".//dd//p[@class='add_shop']//span//text()").getall())
item['address']=address
price =
"".join(dl.xpath(".//dd[@class='price_right']//span[@class='red']//text()").getall())
item['price'] = price
unit = "".join(dl.xpath(".//dd[@class='price_right']//span[2]//text()").getall())
item['unit'] = unit
detail_url = "".join(dl.xpath(".//h4[@class='clearfix']/a/@href").getall())
item['origin_url']=response.urljoin(detail_url)
yield item
next_url = response.xpath("//div[@class='page_al']//p[1]/a/@href").get()
yield
scrapy.Request(url=response.urljoin(next_url),callback=self.parse_esf,meta={"info":{province,city}
})

(item.py)

# -*- coding: utf-8 -*- # Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class NewHouseItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
#省份
province = scrapy.Field()
#城市
city = scrapy.Field()
#小区名
name = scrapy.Field()
#价格
price = scrapy.Field()
#X 居,列表
rooms = scrapy.Field()
#面积
area = scrapy.Field()
#地址
address = scrapy.Field()
#行政区
district = scrapy.Field()
#是否在售
sale = scrapy.Field()
#房天下详情页面 url
origin_url = scrapy.Field()
class ESFHouseItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
# 省份
province = scrapy.Field()
# 城市
city = scrapy.Field()
# 小区名
name = scrapy.Field()
# 价格
price = scrapy.Field()
# 几室几厅
rooms = scrapy.Field()
# 层
floor = scrapy.Field()
# 朝向
toward = scrapy.Field()
# 年份
year = scrapy.Field()
# 面积
area = scrapy.Field()
# 地址
address = scrapy.Field()
#单价
unit = scrapy.Field()
# #联系人
# people = scrapy.Field()
# 房天下详情页面 url
origin_url = scrapy.Field()

爬取数据如图所示

http://www.yayakq.cn/news/861458/

相关文章:

  • 企业官网有哪些网站wordpress会员权限
  • 网站定制设计服务需要使用的技术互联网销售怎么做
  • 公司网站需要程序员做吗怎么成立网站
  • 自己的网站怎样做优化wordpress最大负载
  • 苏州专业网站设计手机企业网站开发
  • 如何构思公司网站常见的电子商务平台有哪些
  • 重庆做营销网站建设连云制作企业网站
  • 建设厂招工信息网站王晴儿 网站建设
  • 亦庄建站推广好看的企业网站首页
  • 自适应网站开发框架深圳哪家网站建设服务好
  • 网站建设报价 东莞免费在线网站模板
  • 高端网站建设价格彩票网站注册
  • 朔州公司做网站成都建站推广
  • 怎样做 云知梦 网站wordpress 找回密码 邮件
  • 怎么给钓鱼网站做防红抖音电商具体是做什么的
  • 网站建设中网站需求分析和报告工能论文网站域名的密码
  • 公司域名注册后怎么建设网站网站里的副栏目是什么
  • 网站策划哪里找大连手机模板建站
  • 网站学做糕点的课程大麦网的网站建设
  • 建设银行住房贷款网站文化传媒公司
  • php免费开源建站系统做网站花费
  • 做网站需要提供哪些资料乐从网站开发
  • 建一个商业网站要多少钱做营销网站视频
  • dede 汽车网站微信怎么关闭小程序消息
  • 什么网站可以兼职做效果图做教育app的网站有哪些内容
  • 创业网站推广怎么做想在网上做设计接单有没有网站
  • seo网站管理招聘企业网站的建设公司
  • 电子商务网站开发的视频咸鱼之王小程序
  • 自助微信网站服装定制加盟
  • 南阳做网站收费教学网站