+1 vote
by (2.0k points)
Create a Node Js Web Scraping Project for Web Computing Practical

1 Answer

+1 vote
by (2.0k points)
edited by
Best answer
  1. First, make sure you have Node.js installed on your computer.

  2. Create a new directory for your project and navigate to it in your terminal.

  3. Initialize a Node.js project:
    npm init -y
  4. Install the required libraries:
    npm install axios cheerio body-parser express
  5. Create a Javascript file named scraper.js


const express = require('express');
const bodyParser = require('body-parser');
const fs = require('fs');
const axios = require('axios');
const cheerio = require('cheerio');

const app = express();
const port = 3000;

app.use(bodyParser.urlencoded({ extended: true }));

// Function to scrape a website and extract image URLs
async function scrapeWebsite(url) {
  try {
    // Fetch the HTML content of the provided URL
    const response = await axios.get(url);
    const html = response.data;

    // Load the HTML content into Cheerio for parsing
    const $ = cheerio.load(html);

    // Extract the text content from the website
    const websiteContent = $('body').html(); // Use .html() to preserve image tags

    // Extract image URLs
    const imageUrls = $('img').map((index, element) => $(element).attr('src')).get();

    // Store the scraped content in local storage
    fs.writeFileSync('scraped_website.html', websiteContent);

    return { success: true, content: websiteContent, images: imageUrls };
  } catch (error) {
    console.error('Error scraping website:', error);
    return { success: false };

// Handle GET requests for scraping
app.get('/scrape', async (req, res) => {
  const url = req.query.url;
  if (!url) {
    res.json({ success: false });
  } else {
    const result = await scrapeWebsite(url);

app.listen(port, () => {
  console.log(`Server is running on port ${port}`);

Now create a directory public and inside it file index.html and script.js



<!DOCTYPE html>
<html lang="en">
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Web Scraper</title>
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css">
<body class="bg-gray-100">
    <div class="container mx-auto p-8 bg-white rounded-lg shadow-md">
        <h1 class="text-3xl font-semibold mb-6">Web Scraper</h1>
        <div class="flex mb-4">
            <input type="text" id="url" placeholder="https://example.com" class="flex-grow border rounded-md p-2 mr-2">
            <button id="scrapeButton" class="bg-blue-500 text-white py-2 px-4 rounded-md transition duration-300 hover:bg-blue-600">Scrape Website</button>
        <p id="statusMessage" class="text-red-500 mb-4"></p>
        <div id="scrapedContent" class="bg-gray-200 p-4 rounded-lg"></div>
        <p id="imageCount" class="text-green-500 font-semibold mt-4"></p>
    <script src="script.js"></script>



document.addEventListener('DOMContentLoaded', () => {
    const scrapeButton = document.getElementById('scrapeButton');
    const urlInput = document.getElementById('url');
    const statusMessage = document.getElementById('statusMessage');
    const scrapedContent = document.getElementById('scrapedContent');
    const imageCountElement = document.getElementById('imageCount');
    scrapeButton.addEventListener('click', async () => {
      const url = urlInput.value;
      if (!url) {
        statusMessage.textContent = 'Please enter a URL.';
      // Send the URL to the backend for scraping
      const response = await fetch(`/scrape?url=${url}`);
      const data = await response.json();
      if (data.success) {
        statusMessage.textContent = 'Website scraped successfully.';
        // Replace image placeholders with actual image elements
        scrapedContent.innerHTML = data.content;
        // Display image count
        imageCountElement.textContent = `Images found: ${data.images.length}`;
      } else {
        statusMessage.textContent = 'Error scraping website.';


Now run the project from terminal 

node scraper.js 


You can deploy it on render . com


Github Link : https://github.com/Ajinkgupta/Node-Scraper

Deployed Link : https://nodescraper.onrender.com/

Related questions

0 votes
0 answers 27 views
asked Oct 22, 2023 by anonymous
0 votes
1 answer 51 views
0 votes
1 answer 68 views
0 votes
1 answer 40 views

Doubtly is an online community for engineering students, offering:

  • Free viva questions PDFs
  • Previous year question papers (PYQs)
  • Academic doubt solutions
  • Expert-guided solutions

Get the pro version for free by logging in!

5.7k questions

5.1k answers


498 users