2010-05-21 4 views
3

У меня есть объект Element его объект HTMLDocument, и я хочу, чтобы это значение было строковым.Элемент в строку в HTMLDocument

Я хочу этот результат

Christina Toth, Pharm. D.

=======================

плз смотри ниже код.

public static void main(String args[]) throws Exception { 

    InputStream is = Nullsoft.getInputStream(); 
    InputStreamReader isr = new InputStreamReader(is); 
    BufferedReader br = new BufferedReader(isr); 

    HTMLEditorKit htmlKit = new HTMLEditorKit(); 
    HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); 

    HTMLEditorKit.Parser parser = new ParserDelegator(); 
    HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0); 
    parser.parse(br, callback, true); 

    // Parse 
    ElementIterator iterator = new ElementIterator(htmlDoc); 
    Element element; 
    while ((element = iterator.next()) != null) { 
     AttributeSet attributes = element.getAttributes(); 
     Object name = attributes.getAttribute(StyleConstants.NameAttribute); 
     if ((name instanceof HTML.Tag) 
       && ((name == HTML.Tag.DIV) || (name == HTML.Tag.H2) || (name == HTML.Tag.H3))) { 
      StringBuffer text = new StringBuffer(); 
      int count = element.getElementCount(); 
      for (int i = 0; i < count; i++) { 
       Element child = element.getElement(i); 
       AttributeSet childAttributes = child.getAttributes(); 
       // if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) 
       { 
        int startOffset = child.getStartOffset(); 
        int endOffset = child.getEndOffset(); 
        int length = endOffset - startOffset; 
        text.append(htmlDoc.getText(startOffset, length)); 
       } 
      } 
      System.out.println(name + ": " + text.toString()); 
     } 
    } 
    System.exit(0); 
} 

public static InputStream getInputStream() { 

     String text = "<html>\n" + 
      "<head>\n" + 
      "<title>pg_0001</title>\n" + 
      "\n" + 
      "<style type=\"text/css\">\n" + 
      ".ft3{font-style:normal;font-weight:bold;font-size:11px;font-family:Helvetica;color:#000000;}\n" + 
      "</style>\n" + 
      "</head>\n" + 
      "<body vlink=\"#FFFFFF\" link=\"#FFFFFF\" bgcolor=\"#ffffff\">\n" + 
      "\n" + 
      "\n" + 
      "<div style=\"position:absolute;top:597;left:252\"><nobr><span class=\"ft3\">Christina Toth, Pharm. D.</span></nobr></div>\n" + 
          "\n" + 
      "\n" + 
      "</body>\n" + 
      "</html>"; 
    InputStream is = null; 
    try { 

     is = new ByteArrayInputStream(text.getBytes("UTF-8")); 

    } catch (UnsupportedEncodingException e) { 
     e.printStackTrace(); 

    } 
    return is; 
} 

ответ

6

Попробуйте это вместо этого.

Отредактировано для использования метода read()HTMLEditorKit.

import java.io.StringReader; 
import javax.swing.text.AttributeSet; 
import javax.swing.text.Element; 
import javax.swing.text.ElementIterator; 
import javax.swing.text.StyleConstants; 
import javax.swing.text.html.HTML; 
import javax.swing.text.html.HTMLDocument; 
import javax.swing.text.html.HTMLEditorKit; 

public class NewMain { 

    public static void main(String args[]) throws Exception { 
     HTMLEditorKit htmlKit = new HTMLEditorKit(); 
     HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); 
     htmlKit.read(new StringReader(text), htmlDoc, 0); 
     // Parse 
     ElementIterator iterator = new ElementIterator(htmlDoc); 
     Element element; 
     while ((element = iterator.next()) != null) { 
      AttributeSet as = element.getAttributes(); 
      Object name = as.getAttribute(StyleConstants.NameAttribute); 
      if (name == HTML.Tag.DIV) { 
       StringBuffer sb = new StringBuffer(); 
       sb.append(name).append(": "); 
       int count = element.getElementCount(); 
       for (int i = 0; i < count; i++) { 
        Element child = element.getElement(i); 
        int startOffset = child.getStartOffset(); 
        int endOffset = child.getEndOffset(); 
        int length = endOffset - startOffset; 
        sb.append(htmlDoc.getText(startOffset, length)); 
       } 
       System.out.println(sb); 
      } 
     } 
    } 
    private static String text 
     = "<html>\n" 
     + "<head>\n" 
     + "<title>pg_0001</title>\n" 
     + "\n" 
     + "<style type=\"text/css\">\n" 
     + ".ft3{font-style:normal;font-weight:bold;font-size:11px;" 
     + "font-family:Helvetica;color:#000000;}\n" 
     + "</style>\n" 
     + "</head>\n" 
     + "<body vlink=\"#FFFFFF\" link=\"#FFFFFF\" bgcolor=\"#ffffff\">\n" 
     + "\n" 
     + "\n" 
     + "<div style=\"position:absolute;top:597;left:252\"><nobr><span " 
     + "class=\"ft3\">Christina Toth, Pharm. D.</span></nobr></div>\n" 
     + "\n" 
     + "\n" 
     + "</body>\n" 
     + "</html>"; 
} 

консоли:

div: Christina Toth, Pharm. D.
Смежные вопросы