第1.8章 scrapy之完整工程部署

来源:互联网 发布:窗户漏风 知乎 编辑:程序博客网 时间:2024/05/21 17:26

1 scrapy

# 安装sqliteyum install sqlite-devel    # 升级python到2.7.12,注意原系统中python版本tar -zxvf Python-2.7.12.tgzcd Python-2.7.12./configure make allmake installmake clean make distclean mv /usr/bin/python /usr/bin/python2.6.6ln -s /usr/local/bin/python2.7 /usr/bin/python#yumq切换,因为python升级会导致yum不能使用,将#!/usr/bin/python修改为#!/usr/bin/python2.6.6vi /usr/bin/yum# 安装wget http://pypi.python.org/packages/source/d/distribute/distribute-0.6.49.tar.gztar -zxvf distribute-0.6.49.tar.gzcd distribute-0.6.49#安装piptar -zxvf pip-9.0.1.tar.gz cd pip-9.0.1python setup.py install # 安装爬虫相关系列插件pip install pyquerypip install demjsonpip install pyasn1pip install pyasn1-modulespip install cryptographypip install certifipip install urllib3==1.21.1pip install chardetpip install redispip install Pillowpip install sqlalchemypip install scrapypip install scrapy-splash# 安装Twistedunzip Twisted-17.5.0.zipcd Twisted-17.5.0python setup.py install################################################## 安装mysql,这里省略掉mysql安装过程# 安装unzip MySQL-python-1.2.5.zip cd MySQL-python-1.2.5python setup.py install################################################## 建立libmysqlclient的软链接,注意mysql的路径ln -s /application/mysql56/lib/libmysqlclient.so.18 /usr/lib64/libmysqlclient.so.18#pip install -U setuptools#pip install setuptools_scm

2 scrapyd安装

unzip scrapyd-1.2.zip cd scrapyd-1.2python setup.py installmkdir -p /etc/scrapydcd /etc/scrapydvi scrapyd.conf################################################[scrapyd]eggs_dir    = eggslogs_dir    = logsitems_dir   =jobs_to_keep = 5dbs_dir     = dbsmax_proc    = 0max_proc_per_cpu = 4finished_to_keep = 100poll_interval = 5.0bind_address = 0.0.0.0http_port   = 6800debug       = offrunner      = scrapyd.runnerapplication = scrapyd.app.applicationlauncher    = scrapyd.launcher.Launcherwebroot     = scrapyd.website.Root[services]schedule.json     = scrapyd.webservice.Schedulecancel.json       = scrapyd.webservice.Canceladdversion.json   = scrapyd.webservice.AddVersionlistprojects.json = scrapyd.webservice.ListProjectslistversions.json = scrapyd.webservice.ListVersionslistspiders.json  = scrapyd.webservice.ListSpidersdelproject.json   = scrapyd.webservice.DeleteProjectdelversion.json   = scrapyd.webservice.DeleteVersionlistjobs.json     = scrapyd.webservice.ListJobsdaemonstatus.json = scrapyd.webservice.DaemonStatus################################################# 设置scrapyd的开机启动mkdir -p /var/scrapydcd /etc/init.dvi scrapyd################################################# chkconfig:   2345 90 10# description:  redis is a persistent key-value databasePORT=6800HOME="/var/scrapyd/"BIN="/usr/local/bin/scrapyd"pid=`netstat -lnopt | grep :$PORT | awk '/python/{gsub(/\/python/,"",$7);print $7;}'`start() {   if [ -n "$pid" ]; then      echo "server already start,pid:$pid"      return 0   fi   cd $HOME   nohup $BIN &   echo "start at port:$PORT"}stop() {   if [ -z "$pid" ]; then      echo "not find program on port:$PORT"      return 0   fi   #结束程序,使用讯号2,如果不行可以尝试讯号9强制结束   kill -9 $pid   echo "kill program use signal 9,pid:$pid"}status() {   if [ -z "$pid" ]; then      echo "not find program on port:$PORT"   else      echo "program is running,pid:$pid"   fi}case $1 in   start)      start   ;;   stop)      stop   ;;   status)      status   ;;   *)      echo "Usage: {start|stop|status}"   ;;esacexit 0################################################chmod +x scrapydchkconfig scrapyd on# 安装守护进程pip install supervisormkdir -p /etc/supervisor/

3 scrapyd-client

# 安装scrapyd-clientmkdir -p /application/pypluginscd /application/pypluginsmkdir scrapyd-clientcd scrapyd-clientwget https://github.com/scrapy/scrapyd-client/archive/master.zipunzip master.zipcd scrapyd-client-masterpython setup.py install

4 splash安装

# centos中安装docker要求必须是64位操作系统# 内核要求在3.8以上# centos6默认内核是2.6,故需要升级rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.orgrpm -Uvh http://www.elrepo.org/elrepo-release-6-6.el6.elrepo.noarch.rpmyum -y --enablerepo=elrepo-kernel install kernel-lt# 修改配置文件vi /etc/grub.conf# default=1修改为default=0reboot# 安装dockerrpm -Uvh http://download.fedoraproject.org/pub/epel/6/i386/epel-release-6-8.noarch.rpmyum -y install docker-io# 启动service docker start# 设置开机启动chkconfig docker on# 安装splash镜像,已经镜像文件很大,docker save scrapinghub/splash > /application/download/splash.tar# docker pull scrapinghub/splash 这个命令是从官方下载,要下很久docker load < /application/splash.tar# 启动splashdocker run -d -p 8050:8050 --restart=always --name=splash scrapinghub/splash
原创粉丝点击