val doc =Jsoup.connect("https://home.meishichina.com/recipe.html").get()val elements = doc.select("div.wrap div.w.clear div.home_index_slider.mt10 div#home_index_slider ul li")elements.forEach { element ->val title = element.select("a").attr("title")println(title)
* This method deserializes the specified html into an object of the specified class. * @param T the type of the desired object * @param html the string from which the object is to be deserialized * @param clazz the class of T * @return an object of type T from the string.fun<T:Any>parse(html:String, clazz:Class<T>):T {return parse(Jsoup.parse(html), clazz)
* This method deserializes the specified html into an object of the specified class. * @param T the type of the desired object * @param document the document from which the object is to be deserialized * @param clazz the class of T * @return an object of type T from the string.fun<T:Any>parse(document:Document, clazz:Class<T>):T {val rootNode = getRootNode(document, clazz)val obj:Ttry { obj = clazz.getConstructor().newInstance() } catch (e:NoSuchMethodException) {throwKsoupException("No-args constructor for class $clazz does not exist.", e) } catch (e:Exception) {throwKsoupException(e) rootNode?.let { clazz.declaredFields.forEach { field -> getFieldValue(rootNode, obj, field)return obj
再看一下具体的字段解析,同样不复杂,根据字段的类型,进行对应数据的解析。
* Parsing HTML to assign values to the specified object field. * @param node the element * @param obj the object * @param field the target fieldinternalfungetFieldValue(node:Element, obj:Any, field:Field) { field.isAccessible =trueval defVal = field[obj]when (field.type) {Int::class.java -> field[obj] =IntTypeExtractor.extract(node, field, defVal asInt?, this)Long::class.java -> field[obj] =LongTypeExtractor.extract(node, field, defVal asLong?, this)Float::class.java -> field[obj] =FloatTypeExtractor.extract(node, field, defVal asFloat?, this)String::class.java -> field[obj] =StringTypeExtractor.extract(node, field, defVal asString?, this)Double::class.java -> field[obj] =DoubleTypeExtractor.extract(node, field, defVal asDouble?, this)Boolean::class.java -> field[obj] =BooleanTypeExtractor.extract(node, field, defVal asBoolean?, this)List::class.java -> field[obj] =ArrayTypeExtractor.extract(node, field, defVal asArrayList<*>?, this)else->throwKsoupException("Type ${field.type} is not supported.")