#!/usr/bin/env php5 Komplettliste') === false)): $line = fgets($fh); $cnt++; endwhile; $line = ''; while ((! feof($fh)) && (strpos($line, '') === false)): $line = fgets($fh); $cnt++; endwhile; $line = ''; while ((! feof($fh)) && (strpos($line, '
') === false)): $line = fgets($fh); $cnt++; endwhile; echo "Current line offset in HTML: $cnt\n"; $stateInTr = 0; $stateOutTr = 1; $state = $stateOutTr; $pois = array(); while ((! feof($fh)) && (false === strpos($line, '
'))) { $line = fgets($fh, 50000); $lineNumber++; if ($state == $stateOutTr) { if (preg_match('//i', $line, $matches)) { $state = $stateInTr; $poi = array(); $poi['status'] = colorToStatus($matches[1]); } } else { if (preg_match('/([^<]+)<\/td>/i', $line, $matches)) { $poi['name'] = $matches[1]; } elseif ((false !== strpos($line, 'Google Maps')) && preg_match('/\&q=([^\&]+)\&/', $line, $matches)) { $poi['q'] = $matches[1]; } elseif (preg_match_all('/mlat=([0-9\.]+)\&mlon=([0-9\.]+)\&/', $line, $matches, PREG_SET_ORDER)) { $poi['coords'] = array(); for ($i = 0; $i < count($matches); $i++) { $poi['coords'][] = array($matches[$i][1], $matches[$i][2]); // lat + lon } } elseif (false !== strpos($line, '')) { $countAll++; // is poi already known? no? get it: if (! isset($data['streets'][$poi['name']])) { $data['streets'][$poi['name']] = $poi; $countNew++; } else // merge into existing data: { $countUpdated++; $data['streets'][$poi['name']]['status'] = $poi['status']; // statuswechsel übernehmen if (isset($poi['coords'])) { $data['streets'][$poi['name']]['coords'] = $poi['coords']; } if (isset($poi['q'])) { $data['streets'][$poi['name']]['q'] = $poi['q']; } } $state = $stateOutTr; } } } fclose($fh); echo 'Updated streetlist from url ' . $statusUrl . "\n"; $data['last_update_status'] = strftime('%F %T %s'); storeData($datafile, $data); echo "Streets in url: $countAll, new: $countNew, updated: $countUpdated\n"; echo "Done.\n\n"; } // Gecode incomplete streets if (isset($opts['G'])) { $countAll = 0; $countHadCoords = 0; $countUpdated = 0; $countFailed = 0; $countGeocoding = 0; $countWhitelisted = 0; $data = readData($datafile); foreach ($data['streets'] as $streetname => $street) { $countAll++; if (isset($street['coords']) && count($street['coords'])) { $countHadCoords++; } else { $street['coords'] = array(); // try geocoder: if (isset($street['q'])) { $countGeocoding++; $coords = getCoords($street['q'], 500000); if ($coords !== null) { $countUpdated++; $street['coords'][] = array($coords->lat, $coords->lon); $data['streets'][ $streetname ] = $street; // update echo $countGeocoding . ': Successful geocoded: ' . $streetname . ' ; searched for "' . $street['q'] . '".' . "\n"; } else { echo $countGeocoding . ': Failed geocoding: ' . $streetname . ' ; searched for "' . $street['q'] . '".' . "\n"; $countFailed++; } } } } $data['last_update_geocoder'] = strftime('%F %T %s'); storeData($datafile, $data); echo "Updated: $countUpdated, failed: $countFailed.\n"; echo "Count all: $countAll, completeness: " . ((100*($countUpdated + $countHadCoords))/$countAll) . "%\n"; echo "Count whitelisted: $countWhitelisted\n"; echo "\n"; echo "Done.\n\n"; } // Export layertext: if (isset($opts['t']) && isset($opts['o'])) { $offsetLat = 51; // lon $offsetLon = 8; // lat if (isset($opts['O'])) { $tmp = explode(",", $opts['O']); if (count($tmp) == 2) { $offsetLat = $tmp[0]; $offsetLon = $tmp[1]; } } $countOrphans = 0; $countWhitelisted = 0; $outfile = $opts['o']; $data = readData($datafile); $fh = fopen($outfile, 'w'); if ($fh === false) { throw new Exception('Could not create output file "' . $outfile . '".'); } fwrite($fh, "lat\tlon\ttitle\tdescription\ticonSize\ticonOffset\ticon\n"); $dt = strftime("%d.%m.%Y %H:%M Uhr"); foreach ($data['streets'] as $streetname => $street) { // Idea: group without-coords around city if (isset($street['coords']) && count($street['coords'])) { if (in_array($street['name'], $osmWhitelist)) { $countWhitelisted++; continue; } $title = str_replace(array("\n", "\t", "\r"), '', $street['name']); $description = $street['status'] . '
q=' . $street['q']. '
updated: ' . $dt; $icon = false; switch($street['status']) { case 'not_in_osm': $icon = 'http://www.openlayers.org/dev/img/marker.png'; break; case 'in_osm_only': $icon = 'http://www.openlayers.org/dev/img/marker-gold.png'; break; default: continue; // skip others } if ($icon !== false) { fwrite($fh, sprintf("%f\t%f\t%s\t%s\t%s\t%s\t%s\n", $street['coords'][0][0], $street['coords'][0][1], $title, $description, "21,25", "-10,-25", $icon)); } } else { $countOrphans++; $title = str_replace(array("\n", "\t", "\r"), '', $street['name']) . ' (#' . $countOrphans . ')'; $description = 'Gibt es diese Straße?'; $icon = 'http://www.openlayers.org/dev/img/drag-rectangle-on.png'; $spiral = getSpiralCoords($countOrphans, $offsetLat, $offsetLon); fwrite($fh, sprintf("%f\t%f\t%s\t%s\t%s\t%s\t%s\n", $spiral->lat, $spiral->lon, 'Ohne Koordinaten: ' . $title, $description, "21,25", "-10,-25", $icon)); } } fclose($fh); echo "Written to tonew file '$outfile'.\n"; echo "countOrphans: $countOrphans, countWhitelisted: $countWhitelisted\n"; echo "Done.\n\n"; } // Show info about status: if (isset($opts['i'])) { $data = readData($datafile); $countAll = 0; $countHadCoords = 0; $countColor = array(); echo 'Last status update: ' . $data['last_update_status'] . "\n"; echo 'Last status geocoder: ' . $data['last_update_geocoder'] . "\n"; echo 'Num streets in the database: ' . count($data['streets']) . "\n"; foreach ($data['streets'] as $streetname => $street) { $key = strlen($street['status']) > 0 ? $street['status'] : 'unknown'; if (! isset($countColor[$key])) { $countColor[$key] = 0; } $countColor[$key]++; } echo "Status of streets:\n"; var_dump($countColor); echo "Done.\n\n"; } // Dump out all data: if (isset($opts['x'])) { $data = readData($datafile); echo var_export($data, 1); } exit(0); // ==================================================================== // functions: /** * gecoding * * see http://code.google.com/support/bin/answer.py?answer=80200&topic=11364 */ function getCoords($q, $sleepDelayMicroSeconds=1000000) { usleep($sleepDelayMicroSeconds); $request_url = 'http://maps.google.de/maps/geo?output=xml&q=' . $q; //$xmlCode = file_get_contents($request_url); // simplexml_load_file($request_url) or die("url $request_url not loading"); $xmlCode = curlDoRequest($request_url); if (preg_match('/([0-9\.\-]+),([0-9\.\-]+)/', $xmlCode, $matches)) { $out = new stdClass(); $out->lat = $matches[2]; $out->lon = $matches[1]; return $out; } // echo 'XML: ' . $xmlCode . "\n"; return null; } function curlDoRequest($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.11) Gecko/2009060308 Ubuntu/9.04 (jaunty) Firefox/3.0.11"); curl_setopt($ch, CURLOPT_NOBODY, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); $headers = array( "Content-type: text/xml;charset=\"utf-8\"", "Accept: text/xml", "Cache-Control: no-cache", "Pragma: no-cache", ); curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); $response = curl_exec($ch); curl_close($ch); return $response; } function storeData($fname, $data) { file_put_contents($fname, serialize($data)); } function readData($fname) { if (! file_exists($fname)) { storeData($fname, array('streets' => array())); //create empty data } if (! file_exists($fname)) throw new Exception('datafile "' . $fname . '" not found.'); $data = unserialize(file_get_contents($fname)); if ($data === false) throw new Exception('datafile "' . $fname . '" is broken.'); return $data; } function colorToStatus($color) { switch ($color) { case 'red' : return 'not_in_osm'; case 'yellow' : return 'in_osm_only'; } return 'ok'; } /** * To place unknown places... * */ function getSpiralCoords($t, $lat0, $lon0) // num, latOffset, lonOffset { $a = 0.2; $b = 0.01; $c = 0.001; $o = new stdClass(); $o->lat = $lat0 + sin($t * $a) * ($b + ($c * $t)); $o->lon = $lon0 + cos($t * $a) * ($b + ($c * $t)); return $o; // lat + lon }