要统计城市小区淘宝用户的覆盖率。数据部门貌似没统计这个数据,也许我没找到吧。jsoup抓下。
感谢无偿提供数据,一次性的廉价代码。自己去云梯搞数据真是烦,剩下的就交给数据分析的同事去帮忙吧。
1 import org.jsoup.Jsoup; 2 import org.jsoup.nodes.Document; 3 import org.jsoup.nodes.Element; 4 import org.jsoup.select.Elements; 5 6 import java.io.FileOutputStream; 7 import java.io.IOException; 8 import java.io.PrintWriter; 9 import java.util.HashMap;10 import java.util.HashSet;11 import java.util.Map;12 import java.util.Set;13 14 /**15 * Created with IntelliJ IDEA.16 * User: zhangbin17 * Date: 13-6-2018 * Time: 下午10:1119 * To change this template use File | Settings | File Templates.20 */21 public class Main {22 public static void main(String[] args){23 String[] cities = new String[]{"beijing","tianjin","dalian","sjz","heb","sy","ty","cc","shanghai","hangzhou","nanjing","jinan","qd"24 ,"xz","shenzhen","guangzhou","cs","haikou","xm","chengdu","chongqing","wuhan","zhengzhou","xa","lz","ly","gy"25 };26 String[] citynames = new String[]{"北京","天津","大连","石家庄","哈尔滨","沈阳","太原","长春","上海","杭州","南京","济南","青岛","徐州","深圳","广州","长沙","海口","厦门"27 ,"成都","重庆","武汉","郑州","西安","兰州","洛阳","贵阳"28 };29 String middle = "haozu.com/community/";30 String head = "http://";31 String url = "";32 Map> blocksMap = new HashMap >();33 for(int i =0 ;i blocks = new HashSet ();38 for(int j=1;j<=10;j++){39 try {40 Document doc = Jsoup.connect(url+"p"+j).get();41 Elements eles = doc.getElementsByClass("clist_name");42 for(Element ele : eles){43 Element tmp = ele.getElementsByTag("a").get(0);44 String block = tmp.text();45 int index = block.indexOf('(');46 if(index != -1){47 block = block.substring(0,index);48 }49 index = block.indexOf('(');50 if(index != -1){51 block = block.substring(0,index);52 }53 blocks.add(block);54 }55 } catch (IOException e) {56 System.out.println("error");57 }58 }59 blocksMap.put(cityName,blocks);60 }61 String lineSep = System.getProperty("line.separator");62 try {63 FileOutputStream fos = new FileOutputStream("/home/zhangbin/CityBlocks.data");64 PrintWriter pw = new PrintWriter(fos);65 for(Map.Entry > entry : blocksMap.entrySet()){66 Set set = entry.getValue();67 pw.write(lineSep+lineSep+entry.getKey()+lineSep+lineSep);68 for(String tmp : set){69 pw.write(tmp+lineSep);70 }71 pw.flush();72 73 }74 pw.close();75 fos.close();76 } catch (Exception e) {77 //ignore78 }79 80 }81 }