c# - Search text between different tags using XPath -
I am using HtmlAgilityPack and I need to find the phrase in the HML document. For example, I have a document:
& lt ;! DOCTYPE html & gt; & Lt; Html & gt; & Lt; Body & gt; & Lt; H1 & gt; AAA Heading is Arafrey & lt; / H1> Live & lt; P & gt; My & lt; B & gt; First & lt; / B & gt; . Paragraph & lt; / P & gt; & Lt; P & gt; My second paragraph & Lt; / P & gt; & Lt; P & gt; My third paragraph & Lt; / P & gt; & Lt; / Body & gt; & Lt; / Html & gt;
I "Dates & Lt; P & gt; My & lt; b & gt; 1st & lt; / b & gt; Paragraph. & Lt; / p & gt; ; "
> duration
. For this, I need to search all events with just text (no html tags) for example
This is my first paragraph.
In other words, I want this phrase thats my first paragraph.
to match Directs & lt; P & gt; My & lt; B & gt; First & lt; / B & gt; Paragraph & Lt; / P & gt;
The problem is, I do not know how to do XPath queries for this particular task. Any help would be welcome. Thanks
edit: has been updated, so html will still be valid after span replacement
by using the system. Colllections.Generic; Using System.IO; Using System.Text; Using HtmlAgilityPack; Using the system; Namespace Test {Class Program {Static Wide Men (String [] AGR) {var Markup = @ "DOCTYPE html>
Body & gt; & lt; H1 & Gt; AAA Heading is available & lt; / h1 & gt; Live & lt; P & gt; My & lt; b & gt; 1st & lt; / b & gt; Paragraph. & Lt; / p & gt;My second paragraph.
My third paragraph. & Gt; "; Var doc = new HtmlDocument (); Doc.LoadHtml (markup); Var map = new list & lt; HtmlNode & gt; (); Var nodes = doc.DocumentNode.SelectNodes ("// text ()"); Var builder = new stringbuilder (markup lang); For (var j = 0; j & lt; nodes; calculation; j ++) {var node = nodes [j]; Builder.Append (node.InnerHtml); (Var i = 0; i & lt; node.InnerHtml.Length; i ++) {map.Add (node); }} Var keyword = "is my first paragraph."; Int Index = Builder. Toaster (). Indexoff (keyword); If (index> = 0) {var firstNode = map [index]; Var lastNode = Map [index + keyword.Length - 1]; Sub-ancestor = ancestor (first node, final node); If (ancestor! = Null) {while (firstnode! = Null & level (firstnode) - level (ancestor)> 1) {firstNode = firstNode.ParentNode; } While (last node! = Null & level (last node) - level (ancestor)> 1) {lastNode = lastNode.ParentNode; } If (firstNode! = Null & amp; last node! = Null & amp; ancestors == ancestor (first node, last node)) {var span = doc.CreateElement ("span"); Ancestor Hair nodes. Insert (ancestors. Infant Nodes Index (first node), period); Int start = ancestors ChildNode. Indexoff (firstnode); Int end = ancestors Hair nodes. Index (final node); (Var i = start; i & lt; = end; i ++) for {var node = ancestor ChildNode [Start]; Ancestor.ChildNodes.Remove (start); Span.ChildNodes.Append (node); }}}} Author = new stringwriter (); Doc.Save (author); Markup = author Toasting (); } Public static HtmlNode ancestor (HtmlNode a, HtmlNode b) {If (A == empty) {New argument NullException ("a"); } If (B == blank) {New argument NullException ("b"); } Var Parents = New List & lt; HtmlNode & gt; (); While (a! = Null) {Parents. Add (A); A = a.ParentNode; } While (b! = Null) {if parent (parent). Contents (b)) {return b; } B = b.ParentNode; } Return tap; } Public stable Intel level (HtmlNode node) {int level = 0; While (node! = Null) {level ++; Node = node Pertronode; } Return Level; }}}
Comments
Post a Comment