Skip to main content
 首页 » 编程设计

Linux C程序操作Mysql 调用PHP采集淘宝商品

2022年07月19日148lidabo

还是继续这个项目。

在上一篇Linux下利用Shell使PHP并发采集淘宝产品中,采用shell将对PHP的调用推到后台执行,模拟多线程。

此方法有一致命缺点,只能人工预判每个程序执行时间。如果判断时间少于执行时间,则会生成大量进程,如果判断时间多于执行时间,则会浪费时间资源。

所以,在此我们采用C程序来控制并发数。

整体思路和用shell调用相似,只是把shell控制改成了C。

下面是C程序:

复制代码
 1 #include <stdio.h> 
 2 #include <stdlib.h>   
 3 #include <string.h>   
 4 #include <sys/time.h> 
 5 #include "/usr/local/include/mysql/mysql.h"  6 #define MAX_COLUMN_LEN 32  7 #define THREAD_NUM 20//线程数  8 int threads = 0;  9 pthread_t thread[THREAD_NUM]; 10 pthread_mutex_t mut;//线程锁 11 int count=0,vod_count=0,number = 0; 12 int *goods_id[1000000]; 13 void *thread1(int thread_id) 14 { 15 int sleepsec; 16 while (number < count){; 17 char shell_cmd[50]; 18 printf("number:%d\tthread_id=%d\tid=%s\n", number, thread_id, goods_id[number]); 19 sprintf(shell_cmd, "/usr/local/bin/php /var/www/9384shop/cron/goodsupdate.php %s", goods_id[number]);//生成shell命令 20 system(shell_cmd);//调用shell 21 pthread_mutex_lock(&mut); 22 number++; 23 pthread_mutex_unlock(&mut); 24  } 25  pthread_exit(NULL); 26 } 27 28 void create_thread(void){ 29 int i,temp; 30 for (i = 0; i < THREAD_NUM; i++){ 31 if (thread[i] == 0){ 32 if ((temp = pthread_create(&thread[i], NULL, thread1, i)) != 0){ 33  } 34 else{ 35 threads++; 36  } 37 break; 38  } 39  } 40 sleep(1); 41 } 42 void thread_wait(void) 43 { 44 int i; 45 /*等待线程结束*/ 46 for (i = 0; i < THREAD_NUM; i++){ 47 if (thread[i] != 0) { 48  pthread_join(thread[i], NULL); 49  } 50  } 51 } 52 int main(int argc, char *argv[]){ 53  MYSQL my_connection; 54 MYSQL_RES *result; 55  MYSQL_ROW sql_row; 56 MYSQL_FIELD *fd; 57 char column[MAX_COLUMN_LEN][MAX_COLUMN_LEN]; 58 int res,flag; 59 mysql_init(&my_connection); 60 if (mysql_real_connect(&my_connection, "localhost" 61 , "root", "202.133", "shop", 3306, NULL, 0)){ 62 printf("connected to mysql.\n"); 63 res = mysql_query(&my_connection, "select id from s_goods where is_off_sale=0 order by id desc limit 1000000");//查询 64 printf("select id from s_goods where is_off_sale=0 order by id desc limit 1000000\n"); 65 if (!res){ 66 int i = 0, j; 67 result = mysql_store_result(&my_connection);//保存查询到的数据到result 68 printf("the result number is %lu\n", (unsigned long)mysql_num_rows(result)); 69 count = (unsigned long)mysql_num_rows(result); 70 while (sql_row = mysql_fetch_row(result))//获取具体的数据 71  { 72 goods_id[i] = (unsigned long)sql_row[0]; 73 i++; 74  } 75  } 76 mysql_close(&my_connection);//断开连接 77 while (threads < THREAD_NUM) 78  create_thread(); 79  thread_wait(); 80  } 81 else{ 82 mysql_close(&my_connection);//断开连接 83 printf("ERROR:can not connect to mysql\n"); 84  } 85 86 }
复制代码

PHP:

复制代码
  1 <?php 
  2 define("OTHER",true);  3 $host='localhost';  4 $username='root';  5 $password='123456';  6 $db_name='taobao';  7 $s=microtime(1);  8 $id=$argv[1];  9  10  11 $con=mysql_connect($host,$username,$password);  12 mysql_select_db($db_name, $con);  13 $r=mysql_fetch_array(mysql_query('SELECT url,price FROM s_goods where id='.$id),MYSQL_ASSOC);  14 mysql_close($con);  15 $oldprice=$r['price'];  16 $rs=getPrice($r['url']);  17 $t=microtime(1)-$s;  18 $r=array();  19 $r[]=date('Y-m-d H:i:s');  20 $r[]=$id;  21 $r[]=ceil($t*1000)/1000;  22 if($rs=='soldout'){  23 $r[]="OutStock";  24 $con=mysql_connect($host,$username,$password);  25 mysql_select_db($db_name, $con);  26 mysql_query("UPDATE s_goods SET is_off_sale=1 WHERE id=".$id);  27 mysql_close($con);  28 }  29 elseif($rs===false) $r[]= 'FALSE';  30 else{  31 $r[]=$oldprice;  32 $r[]=isset($rs['price'])?$rs['price']:'';  33 $r[]=isset($rs['seller_nick'])?$rs['seller_nick']:'';  34 $r[]=isset($rs['taobao_shop_id'])?$rs['taobao_shop_id']:'';  35 $r[]=isset($rs['shop_name'])?$rs['shop_name']:'';  36 $r[]=isset($rs['sales'])?$rs['sales']:'';  37 $r[]=isset($rs['taobao_cid'])?$rs['taobao_cid']:'';  38 $r[]=isset($rs['merchandis_score'])?$rs['merchandis_score']:'';  39 $r[]=isset($rs['merchandis_total'])?$rs['merchandis_total']:'';  40 $a=array();  41 //$rs['is_off_sale']=0;  42 foreach ($rs as $k=>$v){  43 if(!empty($v)){  44 $a[]="$k='$v'";  45  }  46  }  47 $a[]="update_time='".date('Y-m-d H:i:s')."'";  48 $con=mysql_connect($host,$username,$password);  49 mysql_select_db($db_name, $con);  50 mysql_query("set names utf8");  51 mysql_query("UPDATE s_goods SET ".implode(',',$a)." WHERE id=".$id);  52 mysql_close($con);  53 }  54 $h=fopen('/home/staff/www/9384shop/cron/goodsUpdate.log','a+');  55  56 fputcsv($h,$r);  57 fclose($h);  58  59 function getPrice($url){  60 $rs=array();  61 preg_match('/[&|\?]id=(\d+)/',$url,$id);  62 $id=$id[1];  63 $c=curls($url,true);  64 $content = $c['content'];  65 if(empty($content)) exit;  66 $content=mb_convert_encoding($content,"UTF-8","gbk");  67 $lastredirectaddr = $c['lastredirectaddr'];  68 if(preg_match('/noitem\.htm/',$content)||preg_match('/<strong>此宝贝已下架<\/strong>|您查看的商品找不到了|您查看的宝贝不存在,可能已下架或者被转移/',$content)){  69 return 'soldout';  70 }elseif(preg_match("/'reservePrice'\s*:\s*'([\d\.]+?)',/",$content,$price)){  71 $price = (float)$price[1];  72 }elseif(preg_match('/price:([\d\.]+?),/',$content,$price)){  73 $price = (float)$price[1];  74  }  75 if(preg_match('/"sellerNickName"\s*:\s*"(.*?)",/',$content,$nick)){  76 $rs['seller_nick'] = urldecode($nick[1]);  77 }elseif(preg_match('/sellerNick\s*:\s*"(.*?)",/',$content,$nick)){  78 $rs['seller_nick'] = $nick[1];  79  }  80 if(preg_match('/shopId:"(\d+?)",/',$content,$shopid)){  81 $rs['taobao_shop_id']=$shopid[1];  82 }elseif(preg_match('/&shopId=(\d+)&/',$content,$shopid)){  83 $rs['taobao_shop_id']=$shopid[1];  84  }  85 if(preg_match("/'categoryId'\s*:\s*'(\d+?)',/",$content,$cid)){  86 $rs['taobao_cid'] = (float)$cid[1];  87 }elseif(preg_match('/"categoryId"\s*:\s*"(\d+?)",/',$content,$cid)){  88 $rs['taobao_cid'] = (float)$cid[1];  89 }elseif(preg_match("/\scid:'(\d+?)',/",$content,$cid)){  90 $rs['taobao_cid'] = (float)$cid[1];  91  }  92 if(OTHER){  93 if(preg_match('/tmall\.com/',$lastredirectaddr)){  94 if(preg_match('/slogo-shopname.*?>(.*?)<\/a>/',$content,$shopname)){  95 $rs['shop_name']=json_decode('"'.$shopname[1].'"');  96  }  97 if(empty($rs['shop_name'])&&!empty($shopname[1])) $rs['shop_name']=$shopname[1];  98 if(empty($rs['shop_name'])&&!empty($rs['seller_nick'])) $rs['shop_name']=$rs['seller_nick'];  99 $url2='http://mdskip.taobao.com/core/initItemDetail.htm?itemId='.$id; 100 $tmall_info = curls($url2); 101 preg_match('/"sellCount"\s*:\s*(\d+)/',$tmall_info,$temp); 102 if ($temp[1]!='') $rs['sales']=$temp[1]; 103 $merchandis=curls("http://dsr.rate.tmall.com/list_dsr_info.htm?callback=a&itemId=".$id); 104 if(preg_match('/gradeAvg"\s*:\s*([0-9\.]+)/',$merchandis,$m_t)) 105 $rs['merchandis_score']=$m_t[1]; 106 if(preg_match('/rateTotal"\s*:\s*([0-9]+)/',$merchandis,$m_t2)) 107 $rs['merchandis_total']=$m_t2[1]; 108 }else{ 109 if(preg_match('/shopName\s*:\s*"(.*?)",/',$content,$shopname)){ 110 111 $rs['shop_name']=json_decode('"'.$shopname[1].'"'); 112  } 113 if(empty($rs['shop_name'])&&!empty($rs['seller_nick'])) $rs['shop_name']=$rs['seller_nick']; 114 if(preg_match('/sellerId\s*:\s*"(.*?)"/',$content,$sellerid)||preg_match('/userId\':\'(\d+)\'/',$content,$sellerid)){ 115 $sellerid = $sellerid[1]; 116  } 117 if(preg_match('/sbn=([0-9a-z]+)/',$content,$sbn)) 118 $sbn=$sbn[1]; 119 $url2='http://detailskip.taobao.com/json/ifq.htm?id='.$id.'&sid='.$sellerid.'&sbn='.$sbn.'&q=1&callback=a'; 120 $count_rs = curls($url2); 121 preg_match('/quanity\s*:\s*(\d+)/',$count_rs,$temp); 122 if ($temp[1]!='') $rs['sales']=$temp[1]; 123 $merchandis=curls("http://rate.taobao.com/detail_rate.htm?userNumId=$sellerid&auctionNumId=$id&currentPage=1&rateType=1"); 124 if(preg_match('/merchandisScore"\s*:\s*"([0-9\.]+)/',$merchandis,$m_t)) $rs['merchandis_score']=$m_t[1]; 125 else $rs['merchandis_score']=6; 126 if(preg_match('/merchandisTotal"\s*:\s*([0-9]+)/',$merchandis,$m_t)) $rs['merchandis_total']=$m_t[1]; 127 else $rs['merchandis_total']=0; 128  } 129  } 130 if(!$price){ 131 if(!isset($tmall_info)){ 132 $url2="http://mdskip.taobao.com/core/initItemDetail.htm?itemId=".$id; 133 $tmall_info=curls($url2); 134  } 135 $price_content=json_decode(iconv('gbk','utf-8',preg_replace('/(\d{10,}):/','"${1}":',$tmall_info)),true); 136 $priceinfo=$price_content['defaultModel']['itemPriceResultDO']['priceInfo']; 137 $price=array(); 138 if(is_array($priceinfo)){ 139 foreach ($priceinfo as $v){ 140 if($v['price']>0) 141 $price[]=$v['price']; 142 if(is_array($v['promotionList'])){ 143 foreach ($v['promotionList'] as $v2){ 144 $p=$v2['extraPromPrice']?$v2['extraPromPrice']:$v2['price']; 145 if($p>0) $price[]=$p; 146  } 147  } 148 if(is_array($v['suggestivePromotionList'])){ 149 foreach ($v['suggestivePromotionList'] as $v2){ 150 $p=$v2['extraPromPrice']?$v2['extraPromPrice']:$v2['price']; 151 if($p>0) $price[]=$p; 152  } 153  } 154  } 155  } 156 $price=count($price)>0?min($price):false; 157  } 158 $rs['price']=$price; 159 if(count($rs)) return $rs; 160 else return false; 161 } 162 function curls($url,$lastredirectaddr=false,$head=false,$times=1){ 163 $ch = curl_init(); 164 curl_setopt($ch, CURLOPT_USERAGENT,'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0'); 165 curl_setopt($ch, CURLOPT_REFERER,'http://www.tmall.com/'); 166 curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1); 167 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);//设置输出方式, 0为自动输出返回的内容, 1为返回输出的内容,但不自动输出. 168 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30); //timeout on connect 169 curl_setopt($ch, CURLOPT_TIMEOUT, 30); //timeout on response 170 curl_setopt($ch, CURLOPT_HEADER, $head);//是否输出头信息,0为不输出,非零则输出 171 curl_setopt($ch, CURLOPT_MAXREDIRS, 50 ); 172 curl_setopt($ch, CURLOPT_URL, $url); 173 $count_rs = curl_exec($ch); 174 if($count_rs === false){ 175 echo 'Curl error: ' . curl_error($ch)."\n"; 176 exit; 177  } 178 if($lastredirectaddr) $count_rs=array('content'=>$count_rs,'lastredirectaddr'=>curl_getinfo($ch,CURLINFO_EFFECTIVE_URL)); 179 curl_close($ch); 180 if($count_rs!=''||$count_rs['content']!='') return $count_rs; 181 elseif($times<3) return curls($url,$lastredirectaddr,$head,$times+1); 182 else return false; 183 }
复制代码

程序执行结果:

复制代码
 1 "2014-04-28 12:55:17",36656,0.967,200.00,200.00,力挺服饰专营店,71777969,力挺服饰专营店,0,162201,0.0,0  2 "2014-04-28 12:55:17",36657,1.018,250.00,250.00,力挺服饰专营店,71777969,力挺服饰专营店,3,50008897,5.0,4  3 "2014-04-28 12:55:17",36655,1.001,189.00,189.00,兴铭服饰专营店,104640942,兴铭服饰专营店,0,162205,0.0,0  4 "2014-04-28 12:55:17",36654,0.979,500.00,500.00,力挺服饰专营店,71777969,力挺服饰专营店,1,50008900,5.0,1  5 "2014-04-28 12:55:17",36653,0.982,150.00,150.00,力挺服饰专营店,71777969,力挺服饰专营店,0,50000697,5.0,2  6 "2014-04-28 12:55:17",36650,0.874,138.00,138,美品坊,64228914,精致女装美品坊,1,162205,6,0  7 "2014-04-28 12:55:17",36652,1.008,229.00,229.00,兴铭服饰专营店,104640942,兴铭服饰专营店,0,50011277,0.0,0  8 "2014-04-28 12:55:17",36647,0.962,259.00,259.00,爱购叁陆陆服饰专营店,102120067,爱购叁陆陆服饰专营店,0,162205,0.0,0  9 "2014-04-28 12:55:17",36648,1.017,273.42,273.42,力挺服饰专营店,71777969,力挺服饰专营店,0,50000697,0.0,0 10 "2014-04-28 12:55:17",36645,0.961,646.80,646.80,羽戈旗舰店,100216434,羽戈旗舰店,0,50008779,4.7,29 11 "2014-04-28 12:55:17",36646,1.011,239.00,239.00,兴铭服饰专营店,104640942,兴铭服饰专营店,0,162205,0.0,0 12 "2014-04-28 12:55:17",36644,1.009,235.12,235.12,恋尚妮家纺旗舰店,67154794,恋尚妮家纺旗舰店,38,50008779,4.5,795 13 "2014-04-28 12:55:17",36643,0.968,320.68,320.68,恋尚妮家纺旗舰店,67154794,恋尚妮家纺旗舰店,143,50008779,4.8,2342 14 "2014-04-28 12:55:17",36641,0.946,19.50,19.50,淘公馆数码专营店,105992505,淘公馆数码专营店,0,50018926,4.6,15708 15 "2014-04-28 12:55:17",36642,0.985,482.92,482.92,恋尚妮家纺旗舰店,67154794,恋尚妮家纺旗舰店,80,50008779,4.8,493 16 "2014-04-28 12:55:17",36640,0.968,125.00,128.00,忆红妆旗舰店,64376787,忆红妆旗舰店,8,162702,4.9,345 17 "2014-04-28 12:55:17",36639,0.988,99.00,99.00,忆红妆旗舰店,64376787,忆红妆旗舰店,12,162702,4.8,115 18 "2014-04-28 12:55:17",36638,0.976,135.00,148.00,忆红妆旗舰店,64376787,忆红妆旗舰店,1,162702,4.7,18 19 "2014-04-28 12:55:18",36637,0.964,242.00,245.00,忆红妆旗舰店,64376787,忆红妆旗舰店,22,50005065,4.7,193 20 "2014-04-28 12:55:18",36636,0.953,412.70,427.50,忆红妆旗舰店,64376787,忆红妆旗舰店,112,162701,4.7,2291 21 "2014-04-28 12:55:18",36635,0.971,363.00,365.00,忆红妆旗舰店,64376787,忆红妆旗舰店,314,162701,4.8,1982 22 "2014-04-28 12:55:18",36634,0.973,179.10,175.00,忆红妆旗舰店,64376787,忆红妆旗舰店,0,50005065,4.8,26 23 "2014-04-28 12:55:18",36633,0.981,334.65,331.00,妹魅旗舰店,104267713,妹魅旗舰店,69,50012010,4.7,887 24 "2014-04-28 12:55:18",36631,0.943,315.00,315.00,gotrip箱包旗舰店,103732756,gotrip箱包旗舰店,122,50012019,4.8,1073 25 "2014-04-28 12:55:18",36632,0.989,192.00,192.00,哈妃猫旗舰店,70711288,哈妃猫旗舰店,11577,50012010,4.8,29206 26 "2014-04-28 12:55:18",36630,0.965,426.00,426.00,chicsouls旗舰店,106083266,chicsouls旗舰店,0,50012028,4.8,16 27 "2014-04-28 12:55:18",36629,0.953,99.00,99.00,莉娅阁旗舰店,67800337,莉娅阁旗舰店,0,50012027,4.8,97 28 "2014-04-28 12:55:18",36651,2.126,158.00,158,天天都特价等你,106393691,天天都特价,0,50010526,6,0 29 "2014-04-28 12:55:18",36628,0.973,2999.00,2999.00,舒适堡鞋类旗舰店,71301827,舒适堡鞋类旗舰店,0,50012027,5.0,19 30 "2014-04-28 12:55:18",36627,0.98,589.00,598.00,舒适堡鞋类旗舰店,71301827,舒适堡鞋类旗舰店,0,50012027,5.0,4 31 "2014-04-28 12:55:18",36626,0.972,253.00,253.00,非你不嫁服饰旗舰店,66835425,非你不嫁服饰旗舰店,7,162701,5.0,194 32 "2014-04-28 12:55:18",36622,0.854,198.00,198,刀1984,65104103,LFMY,1,162201,6,0 33 "2014-04-28 12:55:18",36625,0.965,235.00,235.00,千禧新娘旗舰店,62369744,千禧新娘旗舰店,287,162701,4.8,608 34 "2014-04-28 12:55:18",36624,0.98,10.00,10.00,朵品旗舰店,64673740,朵品旗舰店,16,50009032,4.9,680 35 "2014-04-28 12:55:18",36623,0.973,619.74,187.80,珂尼娅旗舰店,72260130,珂尼娅旗舰店,0,50012010,5.0,4 36 "2014-04-28 12:55:18",36621,0.977,138.00,138.00,eyesonu服饰旗舰店,63439938,eyesonu服饰旗舰店,23,50008901,4.7,806 37 "2014-04-28 12:55:19",36619,0.97,178.00,178.00,shezgood旗舰店,57301708,shezgood旗舰店,2,50010850,5.0,29 38 "2014-04-28 12:55:19",36618,0.992,119.00,119.00,伊莲旗舰店,73373759,伊莲旗舰店,0,50012010,4.7,2353 39 "2014-04-28 12:55:19",36617,0.967,219.80,219.80,爱伴箱包旗舰店,102234600,爱伴箱包旗舰店,1,50012010,4.7,16 40 "2014-04-28 12:55:19",36616,0.948,86.00,84.71,姿态服饰专营店,64752277,姿态服饰专营店,2,50012010,3.6,7 41 "2014-04-28 12:55:19",36620,1.082,99.00,98.90,奈奈爱霓女装旗舰店,57300194,奈奈爱霓女装旗舰店,840,1623,4.8,5593 42 "2014-04-28 12:55:19",36613,0.995,50.00,50.00,牧缇旗舰店,100328526,牧缇旗舰店,133,50000671,4.8,452 43 "2014-04-28 12:55:19",36612,0.998,98.01,98.01,lishberry旗舰店,63641040,lishberry旗舰店,0,50000671,4.8,28 44 "2014-04-28 12:55:19",36611,0.991,498.00,498.00,uncontrollable旗舰店,106009511,uncontrollable旗舰店,1,50010850,4.5,2 45 "2014-04-28 12:55:19",36610,0.981,99.00,99.00,森露旗舰店,71469682,森露旗舰店,0,50000671,4.7,22 46 "2014-04-28 12:55:19",36605,0.968,49.00,49.00,桃苡服饰旗舰店,68928805,桃苡服饰旗舰店,0,1623,5.0,3 47 "2014-04-28 12:55:19",36604,0.954,360.64,360.64,深艺服饰旗舰店,71168332,深艺服饰旗舰店,0,50005065,0.0,0 48 "2014-04-28 12:55:19",36603,0.955,168.00,168.00,艾芭莉旗舰店,100726318,艾芭莉旗舰店,55,50010850,4.8,1797 49 "2014-04-28 12:55:19",36601,0.962,78.00,78.00,歌莉韵旗舰店,105012878,歌莉韵旗舰店,169,162103,4.8,36633 50 "2014-04-28 12:55:19",36600,0.943,64.00,64.00,ieemk旗舰店,103210940,ieemk旗舰店,187,162205,4.7,2220
复制代码

从日志中我们可以看出,1秒钟更新大概是15-20个产品。

采用这种方式既可以控制线程数,又能并发,或许是一个很好的解决方案。

但此方法也有自身的缺点:

  1.因为主要功能是通过PHP来实现的,所以每更新一个产品,操作系统必新创建一个进程,这大大增加了操作系统的开销,如果就在C中对PHP的主要功能进行实现,会使程序性能大大提高。

  2.功能耦合性太强,如果要改一个小细节只有重写源码然后编译(比如并发数,查询SQL等等),应采取参数方式来弥补这个缺点。

  3.因为C只给PHP传递了1个ID参数,PHP必须通过查询数据库来获得其它信息,这样就会增加数据库的压力,降低程序的效率。

因为我是初学C,现学现卖,水平有限,所以留待以后改进。

https://www.cnblogs.com/lywy510/p/3696177.html


本文参考链接:https://www.cnblogs.com/xihong2014/p/8027951.html