Jsoup Java HTML Parser Example

Jsoup is an open source Java based HTML parser that we can use to parse HTML and extract useful information. jsoup API can be used to fetch HTML from URL or parse it from HTML string or from HTML file. Jsoup stands for Java HTML parser. It is an open source java library which provides API for extracting and manipulating data from url or HTML file using DOM, CSS and Jquery-like methods.

1. MainActivity.java

package com.tutorialsee;

import java.io.IOException;
import java.io.InputStream;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import com.tutorialsee.R;
import android.os.AsyncTask;
import android.os.Bundle;
import android.app.Activity;
import android.app.ProgressDialog;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.view.View;
import android.view.Window;
import android.view.WindowManager;
import android.view.View.OnClickListener;
import android.widget.Button;
import android.widget.ImageView;
import android.widget.TextView;

public class MainActivity extends Activity {

ProgressDialog mProgressDialog;

@Override
public void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
requestWindowFeature(Window.FEATURE_NO_TITLE);
getWindow().setFlags(WindowManager.LayoutParams.FLAG_FULLSCREEN, WindowManager.LayoutParams.FLAG_FULLSCREEN);
setContentView(R.layout.activity_main);

// Locate the Buttons in activity_main.xml
Button titlebutton = (Button) findViewById(R.id.titlebutton);
Button descbutton = (Button) findViewById(R.id.descbutton);
Button logobutton = (Button) findViewById(R.id.logobutton);
Button keywords = (Button) findViewById(R.id.descbuttonsdgfd);

// Capture button click
keywords.setOnClickListener(new OnClickListener() {
public void onClick(View arg0) {
// Execute Title AsyncTask
new keywords().execute();
}
});

// Capture button click
titlebutton.setOnClickListener(new OnClickListener() {
public void onClick(View arg0) {
// Execute Title AsyncTask
new Title().execute();
}
});

// Capture button click
descbutton.setOnClickListener(new OnClickListener() {
public void onClick(View arg0) {
// Execute Description AsyncTask
new Description().execute();
}
});

// Capture button click
logobutton.setOnClickListener(new OnClickListener() {
public void onClick(View arg0) {
// Execute Logo AsyncTask
new Logo().execute();
}
});

}

// Title AsyncTask
private class Title extends AsyncTask<Void, Void, Void> {
String title;

@Override
protected void onPreExecute() {
super.onPreExecute();
mProgressDialog = new ProgressDialog(MainActivity.this);
mProgressDialog.setTitle("Android Basic JSoup Tutorial");
mProgressDialog.setMessage("Loading...");
mProgressDialog.setIndeterminate(false);
mProgressDialog.show();
}

@Override
protected Void doInBackground(Void... params) {
try {
// Connect to the web site
Document document = Jsoup.connect(Constant.URL).get();
// Get the html document title
title = document.title();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}

@Override
protected void onPostExecute(Void result) {
// Set title into TextView
TextView txttitle = (TextView) findViewById(R.id.titletxt);
txttitle.setText(title);
mProgressDialog.dismiss();
}
}


// Keywords AsyncTask
private class keywords extends AsyncTask<Void, Void, Void> {
String desc;

@Override
protected void onPreExecute() {
super.onPreExecute();
mProgressDialog = new ProgressDialog(MainActivity.this);
mProgressDialog.setTitle("Android Basic JSoup Tutorial");
mProgressDialog.setMessage("Loading...");
mProgressDialog.setIndeterminate(false);
mProgressDialog.show();
}

@Override
protected Void doInBackground(Void... params) {
try {
// Connect to the web site
Document document = Jsoup.connect(Constant.URL).get();
// Using Elements to get the Meta data
Elements keywords = document.select("meta[name=Keywords]");
// Locate the content attribute
desc = keywords.attr("content");
} catch (IOException e) {
e.printStackTrace();
}
return null;
}

@Override
protected void onPostExecute(Void result) {
// Set keywords into TextView
TextView txtdescs = (TextView) findViewById(R.id.desctxtsd);
txtdescs.setText(desc);
mProgressDialog.dismiss();
}
}



// Description AsyncTask
private class Description extends AsyncTask<Void, Void, Void> {
String desc;

@Override
protected void onPreExecute() {
super.onPreExecute();
mProgressDialog = new ProgressDialog(MainActivity.this);
mProgressDialog.setTitle("Android Basic JSoup Tutorial");
mProgressDialog.setMessage("Loading...");
mProgressDialog.setIndeterminate(false);
mProgressDialog.show();
}

@Override
protected Void doInBackground(Void... params) {
try {
// Connect to the web site
Document document = Jsoup.connect(Constant.URL).get();
// Using Elements to get the Meta data
Elements description = document.select("meta[name=description]");
// Locate the content attribute
desc = description.attr("content");
} catch (IOException e) {
e.printStackTrace();
}
return null;
}

@Override
protected void onPostExecute(Void result) {
// Set description into TextView
TextView txtdesc = (TextView) findViewById(R.id.desctxt);
txtdesc.setText(desc);
mProgressDialog.dismiss();
}
}

// Logo AsyncTask
private class Logo extends AsyncTask<Void, Void, Void> {
Bitmap bitmap;

@Override
protected void onPreExecute() {
super.onPreExecute();
mProgressDialog = new ProgressDialog(MainActivity.this);
mProgressDialog.setTitle("Android Basic JSoup Tutorial");
mProgressDialog.setMessage("Loading...");
mProgressDialog.setIndeterminate(false);
mProgressDialog.show();
}

@Override
protected Void doInBackground(Void... params) {

try {
// Connect to the web site
Document document = Jsoup.connect(Constant.URL).get();
// Using Elements to get the class data
Elements img = document.select("div[id=search_box] img[src]");
// Locate the src attribute
String imgSrc = img.attr("src");
// Download image from URL
InputStream input = new java.net.URL(imgSrc).openStream();
// Decode Bitmap
bitmap = BitmapFactory.decodeStream(input);
} catch (IOException e) {
e.printStackTrace();
}
return null;
}

@Override
protected void onPostExecute(Void result) {
// Set downloaded image into ImageView
ImageView logoimg = (ImageView) findViewById(R.id.logo);
logoimg.setImageBitmap(bitmap);
mProgressDialog.dismiss();
}
}
}

2. activity_main.xml

<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent" >

    <TextView
        android:id="@+id/titletxt"
        android:layout_width="match_parent"
        android:layout_height="wrap_content"
        android:gravity="center" />

    <Button
        android:id="@+id/titlebutton"
        android:layout_width="fill_parent"
        android:layout_height="wrap_content"
        android:layout_below="@+id/titletxt"
        android:background="#4CAF50"
        android:text="@string/Title"
        android:textColor="#ffffff" />

    <TextView
        android:id="@+id/desctxt"
        android:layout_width="match_parent"
        android:layout_height="wrap_content"
        android:layout_below="@+id/titlebutton"
        android:layout_centerInParent="true"
        android:gravity="center" />

    <Button
        android:id="@+id/descbutton"
        android:layout_width="fill_parent"
        android:layout_height="wrap_content"
        android:layout_below="@+id/desctxt"
        android:background="#4CAF50"
        android:text="@string/Description"
        android:textColor="#ffffff" />

    <TextView
        android:id="@+id/desctxtsd"
        android:layout_width="match_parent"
        android:layout_height="wrap_content"
        android:layout_below="@+id/descbutton"
        android:layout_centerInParent="true"
        android:gravity="center" />

    <Button
        android:id="@+id/descbuttonsdgfd"
        android:layout_width="fill_parent"
        android:layout_height="wrap_content"
        android:layout_below="@+id/desctxtsd"
        android:background="#4CAF50"
        android:text="Website Keywords"
        android:textColor="#ffffff" />

    <Button
        android:id="@+id/logobutton"
        android:layout_width="fill_parent"
        android:layout_height="wrap_content"
        android:layout_alignParentBottom="true"
        android:background="#4CAF50"
        android:text="@string/Logo"
        android:textColor="#ffffff" />

    <ImageView
        android:id="@+id/logo"
        android:layout_width="wrap_content"
        android:layout_height="wrap_content"
        android:layout_above="@+id/logobutton"
        android:layout_centerHorizontal="true"
        android:layout_marginBottom="27dp" />

</RelativeLayout>