xpath进阶用法开发者社区

xpath进阶用法

import requests
from lxml import etree
html = requests.get('http://quotes.toscrape.com/')
tree = etree.HTML(html.text)

'''提取页面中符合下列位置规则的所有keyword'''
tree.xpath("//meta[@class='keywords']/../a[@class='tag']/text()")

'''提取页面中符合下列位置规则的所有keyword'''
tree.xpath("//meta[@class='keywords']/parent::*/a[@class='tag']/text()")

'''提取href属性以/tag开头的a标签内容'''
tree.xpath("//a[starts-with(@href,'/tag')]/text()")

'''提取text()内容包含know的span标签对应的text()内容'''
tree.xpath("//span[contains(text(),'know')]/text()")

'''获取整个页面内所有href属性'''
tree.xpath("//@href")

'''同时取得多个定位规则下的内容'''
tree.xpath("//span[contains(text(),'know')]/text() | //span[contains(text(),'world')]/text()")

'''选取class为quote的div节点下所有span子节点的text()内容'''
tree.xpath("//div[@class='quote']/child::span/text()")

'''选取class为quote的div节点下所有子节点的text()内容'''
tree.xpath("//div[@class='quote']/child::*/text()")

'''选取class为quote的div标签下所有的属性值'''
tree.xpath("//div[@class='quote']/attribute::*")

'''选取class为tag的a标签下所有的href属性值'''
tree.xpath("//a[@class='tag']/attribute::href")

tree.xpath("//meta[@class='keywords']/ancestor::*/@class")

tree.xpath("//meta[@class='keywords']/ancestor-or-self::*/text()")

'''获取class为tags的标签下所有后代节点中a标签的href信息'''
tree.xpath("//div[@class='tags']/descendant::a/@href")

'''定位class为text且itemprop为text的span标签'''
tree.xpath("//span[@class='text' and @itemprop='text']/text()")

tree.xpath("//div[@class='quote' or @class='tags']/@class")

'''提取所有span标签class属性不为text的class属性值'''
tree.xpath("//span[not(@class='text')]/@class")

'''提取所有class为keywords的meta标签结束标签之后出现的标签a的text()内容'''
tree.xpath("//meta[@class='keywords']/following::a/text()")

'''选取body标签之前的所有标签的text()内容'''

推荐文章

坚韧的啤酒 · 表达式和函数 - Azure Data Factory & Azure Synapse | Microsoft Learn

1 月前

坏坏的黑框眼镜 · Niagara Launcher 🔹 fresh & clean 的使用1年的感受 - 少数派

9 月前

文武双全的刺猬 · 猎豹加速器官方版-相关推荐-应用宝官网

1 年前

任性的抽屉 · 2017哈佛峰会在杭二中举行 500多位学生哈佛40位明星授课人-新闻中心-温州网

1 年前

有胆有识的帽子 · 再关一家！美国第一共和银行被正式接管--国际--人民网

1 年前

1 年前