Причина, по которой вы извлекаете только один элемент, - это источник страницы, отображающий первый элемент в своей разметке. Однако браузер отображает javascript на стороне клиента из данных JSON в функции jquery. см. в браузере не то, что JSoup анализирует из источника страницы. Добро пожаловать в новый динамический мир веб-программирования!
С учетом сказанного, если вам все еще нужно извлечь данные JSON страницы, подумайте о том, чтобы использовать JSoup для дополнительных ct содержимое <script>
, а затем вы можете получить данные JSON (используя его свойство DataNode) от var aUpdatedDevelopments
. Ниже разбирает первые два сценария теги в текстовый файл и выводит на экран:
// Set Current Directory
String currentDir = new File("").getAbsolutePath();
// Open text file
FileWriter writer = new FileWriter(currentDir + "/BerkeleyGroupScrape.txt");
// Connect to HTML page and get content
Document doc = Jsoup.connect("http://www.berkeleygroup.co.uk/search-results").timeout(10*1000).get();
Elements scriptTags = doc.getElementsByTag("script");
int i = 0;
for (Element tag : scriptTags){
if (i > 2) { continue; }
for (DataNode node : tag.dataNodes()) {
System.out.println(node.getWholeData());
writer.append(node.getWholeData());
i++;
}
}
// Close text file
writer.flush();
writer.close();
OUTPUT
var __stormJs ='t1.stormiq.com/dcv4/jslib/6525_E9642FE4_B222_4C82_B6CA_7F4D662CB825.js';
var device_width=screen.width;var device_height=screen.height;var dpr=1;
if(window.devicePixelRatio!==undefined)
{dpr=window.devicePixelRatio;}if(device_width>device_height)
{ai_width=device_width;}else{ai_width=device_height;}var data =
{dpr:dpr,resolution:ai_width};$.ajax({url:"/api.cfc?method=setClientVariables
",type:"POST",data:data,success:function(d,st,xhr){},error:function(xhr,st,e)
{}});
<!--
$(document).ready(function($){
var aUpdatedDevelopments = '{"ROWCOUNT":56,"COLUMNS":
["ICON","LAT","LONGI","BESTLINKTEXT","FRIENDLYURL","DEVINTROTEXT","DEVHEADING
","DEVSTATUS","PRICERANGE","DEVIMAGE","DEVLOGO","DIVISION"],"DATA":{"ICON":
["\/media\/logo\/6\/r\/one-tower-bridge-map-logo-image-black.png","\/media
\/logo\/d\/t\/berkeley-homes-woodberry-down-map-logo.png","\/media\/logo
\/7\/j\/st-james-heritage-walk-development-logo-map-v2.png","\/media\/logo
\/i\/k\/st-george-sovereign-court-map-logo.png","\/media\/logo\/j\/k
\/st-george-one-blackfriars-map-logo.png","\/media\/logo\/s\/o\/berkeley-
250-city-road-marketing-location-map-logo-v3.png","\/media\/logo\/r\/9
\/berkeley-taplow-riverside-map-logo-v2.png","\/media\/logo\/i\/3\/st-james-
smithfield-map-logo.png","\/media\/logo\/o\/s\/berkeley-royal-wells-park-map-
logo.png","\/media\/logo\/b\/7\/st-george-battersea-reach-map-logo.png","
\/media\/logo\/d\/5\/berkeley-vista-logo-map.png","\/media\/logo\/8\/4
\/berkeley-woodhurst-park-map-logo.png","\/media\/logo\/d\/5\/berkeley-
queenshurst-map-search-logo-v2_33.png","\/media\/logo\/j\/g\/st-george-
kew-bridge-map-logo.png","\/media\/logo\/j\/a\/berkeley-green-park-village-
map-logo-updated-v2.png","\/media\/logo\/h\/0\/st-james-fiennes-park-logo-
map.png","\/media\/logo\/t\/c\/st-james-albert-embankment-corniche-
map-search.png","\/media\/logo\/f\/a\/st-george-dickens-yard-map-logo.png","
\/media\/logo\/i\/8\/st-edward-375-kensington-high-street-map-logo.png","
\/media\/logo\/i\/8\/st-james-riverlight-map-logo1.png","\/media\/logo\/i\/8
\/berkeley-homes-the-ashmiles-map-logo.png","\/media\/logo\/d\/b\/berkeley-
kennet-island-map-logo.png","\/media\/logo\/1\/1\/berkeley-oakgrove-
map-logo.png","\/media\/logo\/d\/8\/st-george-chelsea-creek-map-logo.png","
\/media\/logo\/j\/b\/berkeley-homes-holborough-lakes-map-logo.png","\/media
\/logo\/b\/q\/berkeley-victory-pier-development-logo-on-white-map.png","
\/media\/logo\/4\/8\/berkeley-homes-kidbrooke-village-map-logo.png","\/media
\/logo\/l\/h\/st-george-london-dock-map-search-icon.png","\/media\/logo\/2\/9
\/berkeley-homes-abell-and-cleland-map-logo.png","\/media\/logo\/e\/0
\/berkeley-wye-dene-map-logo.png","\/media\/logo\/i\/k\/st-edward-kensington-
row-map-logo.png","\/media\/logo\/f\/q\/st-george-beaufort-park-map-
logo.png","\/media\/logo\/0\/d\/berkeley-homes-highwood-map-logo.png","
\/media\/logo\/7\/o\/berkeley-walnut-grove-map-logo.png","\/media\/logo\/7\/c
\/st-james-hurlingham-walk-map-logo.png","\/media\/logo\/3\/b\/berkeley-
homes-edenbrook-map-logo.png","\/media\/logo\/2\/j\/berkeley-ryewood-
map-logo.png","\/media\/logo\/j\/j\/berkeley-marine-wharf-map-logo.png","
\/media\/logo\/j\/0\/berkeley-brunswick-square-map-logo-v2.png","\/media
\/logo\/p\/b\/st-edward-stanmore-place-map-logo.png","\/media\/logo\/g\/k
\/berkeley-homes-wimbledon-hill-park-map-logo.png","\/media\/logo\/8\/0
\/Berkeley-south-quay-plaza-map-search-logo.png","\/media\/logo\/l\/i
\/st-james-kew-bridge-west-map-logo.png","\/media\/logo\/3\/0\/st-james-
southall-gasworks-map-logo.png","\/media\/logo\/h\/c\/st-james-dumont-
map-logo1.png","\/me
...
JSON Pretty Print(извлечение значения переменной aUpdatedDevelopments выше)
{
"ROWCOUNT": 56,
"COLUMNS": [
"ICON",
"LAT",
"LONGI",
"BESTLINKTEXT",
"FRIENDLYURL",
"DEVINTROTEXT",
"DEVHEADING",
"DEVSTATUS",
"PRICERANGE",
"DEVIMAGE",
"DEVLOGO",
"DIVISION"
],
"DATA": {
"ICON": [
"\/media\/logo\/6\/r\/one-tower-bridge-map-logo-image-black.png",
"\/media\/logo\/d\/t\/berkeley-homes-woodberry-down-map-logo.png",
"\/media\/logo\/7\/j\/st-james-heritage-walk-development-logo-map-v2.png",
"\/media\/logo\/i\/k\/st-george-sovereign-court-map-logo.png",
"\/media\/logo\/j\/k\/st-george-one-blackfriars-map-logo.png",
"\/media\/logo\/s\/o\/berkeley-250-city-road-marketing-location-map-logo-v3.png",
"\/media\/logo\/r\/9\/berkeley-taplow-riverside-map-logo-v2.png",
"\/media\/logo\/i\/3\/st-james-smithfield-map-logo.png",
"\/media\/logo\/o\/s\/berkeley-royal-wells-park-map-logo.png",
...
"LAT": [
51.5038122197,
51.5707487583,
51.4884271,
51.494341,
51.507878,
51.528207,
51.5292685,
51.5879838,
51.1355008763,
51.4647143,
51.4802827,
51.427478,
51.414314,
51.488669,
51.4251047,
51.514982,
...
"BESTLINKTEXT": [
"One Tower Bridge",
"Woodberry Down",
"Heritage Walk",
"Sovereign Court",
"One Blackfriars",
"250 City Road",
"Taplow Riverside",
"Smithfield Square",
"Royal Wells Park",
"Battersea Reach",
"Vista, Chelsea Bridge",
"Woodhurst Park",
"Queenshurst",
"Kew Bridge",
"Green Park Village",
"Fiennes Park",
"The Corniche",
"Dickens Yard",
"375 Kensington High Street",
...
"FRIENDLYURL": [
"\/new-homes\/london\/tower-bridge\/one-tower-bridge",
"\/new-homes\/london\/finsbury-park\/woodberry-down",
"\/new-homes\/london\/kew-bridge\/heritage-walk",
"\/new-homes\/london\/hammersmith\/sovereign-court",
"\/new-homes\/london\/southwark\/one-blackfriars",
"\/new-homes\/london\/islington\/250-city-road",
"\/new-homes\/buckinghamshire\/taplow\/taplow-riverside",
"\/new-homes\/london\/hornsey\/smithfield-square",
"\/new-homes\/kent\/royal-tunbridge-wells\/royal-wells-park",
"\/new-homes\/london\/battersea\/battersea-reach",
"\/new-homes\/london\/battersea\/vista-chelsea-bridge",
"\/new-homes\/berkshire\/warfield\/woodhurst-park",
"\/new-homes\/london\/kingston\/queenshurst",
"\/new-homes\/london\/kew-bridge\/kew-bridge",
"\/new-homes\/berkshire\/reading\/green-park-village",
"\/new-homes\/berkshire\/maidenhead\/fiennes-park",
"\/new-homes\/london\/albert-embankment\/the-corniche",
"\/new-homes\/london\/ealing\/dickens-yard",
"\/new-homes\/london\/kensington\/375-kensington-high-street",
"\/new-homes\/london\/vauxhall\/riverlight",
"\/new-homes\/west-sussex\/barns-green\/the-ashmiles",
"\/new-homes\/berkshire\/reading\/kennet-island",
Я нахожу 62 элемента. (используя BS для python, но это совершенно то же самое) – njzk2
ли это печать всех 62 элементов? – Alex
и да, все 62 из них печатают – njzk2