错误于data.frame(goods_name, goods_text, price, org_price, snumber) :
arguments imply differing number of rows: 5, 3
大家有解决方法吗?
具体程序如下:
library(bitops)
library(RCurl)
library(XML)
start_url = "http://shanghai.lashou.com/cate/dianying"
cust_header =c("User-Agent"="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0","Accept"="text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8","Accept-Language"="en-us","Connection"="keep-alive"

pagesource <- getURL(start_url,httpheader=cust_header,.encoding="utf-8"

parseTotalPage <- function(pagesource)
{
doc <- htmlParse(pagesource)
as.numeric(sapply(getNodeSet(doc, '//div[@class="page"]/a[last()-1]/text()'), xmlValue))
}
parseContent <- function(pagesource)
{
doc <- htmlParse(pagesource)
goods_name <- sapply(getNodeSet(doc, '//div[contains(@class,"goods"
]//a[@class="goods-name"]//text()'), xmlValue)goods_text <- sapply(getNodeSet(doc, '//div[contains(@class,"goods"
]//a[@class="goods-text"]//text()'), xmlValue)price <- sapply(getNodeSet(doc, '//div[contains(@class,"goods"
]//span[@class="price"]/text()'), xmlValue)org_price <- sapply(getNodeSet(doc, '//div[contains(@class,"goods"
]//span[@class="money"]/del/text()'), xmlValue)snumber <- sapply(getNodeSet(doc, '//div[contains(@class,"goods"
]//span[@class="number"]/i/text()'), xmlValue)result <- data.frame(goods_name, goods_text, price, org_price, snumber)
}
total_page <- parseTotalPage(pagesource)
pageresults <- parseContent(pagesource)
page = 1
total_page-1)url_list = ""
url_list= paste0("http://shanghai.lashou.com/cate/dianying/page",page +1)
for (url in url_list)
{
pagesource <- getURL(url,httpheader=cust_header,.encoding="utf-8"

pageresult <- parseContent(pagesource)
pageresults <- rbind(pageresults,pageresult)
}
write.table(pageresults,"d://lashoumove.txt"



雷达卡


京公网安备 11010802022788号







