Split by text

Document splitting like the extraction of pages may be useful for separating the documents into various portions. This feature of PDF4me is used for faster distribution of, possibly, the relevant information in parts of PDF documents

The PDF Splitting API provides a robust solution for segmenting PDF documents based on either input text or specified coordinates. Designed to streamline document management and enhance workflow efficiency, this API offers flexible options for breaking down PDF files into smaller, more manageable segments.

Code Samples

Try the API in the language you prefer

  • C#
  • Java
  • JavaScript
  • PHP
  • Python
  • Ruby
using System;
using System.IO;
using System.Threading.Tasks;
using Pdf4me.ClientApi;
using Pdf4me.ClientApi.Models;
using Pdf4me.ClientApi.Models.Split;
using Pdf4me.Common;

namespace Pdf4meSplitByTextExample
{
    class Program
    {
        static async Task Main(string[] args)
        {
            string pdf4meApiKey = "YOUR_API_KEY";
            string filePath = Path.Combine(AppContext.BaseDirectory, "my_pdf.pdf");

            Pdf4meClient pdf4meClient = new Pdf4meClient(pdf4meApiKey);
            
            // Read PDF file as byte array
            byte[] fileBytes = File.ReadAllBytes(filePath);

            // Create the split request
            SplitByTextReq splitRequest = new SplitByTextReq
            {
                Document = new Document
                {
                    DocData = fileBytes,
                    Name = "mypdf.pdf"
                },
                SplitByTextAction = new SplitByTextAction
                {
                    Text = "",
                    FileNaming = "pagenum",
                    SplitTextPage = SplitTextPage.after,
                    X1 = 10,
                    Y1 = 700,
                    X2 = 300,
                    Y2 = 800,
                    CombinePagesWithSameConsecutiveText = false
                }
            };

            try
            {
                // Call the PDF4me API to split the PDF by text coordinates
                SplitResponse splitResponse = await pdf4meClient.Pdf4me.Instance.SplitClient.SplitByTextAsync(splitRequest);

                if (splitResponse != null && splitResponse.Documents != null)
                {
                    if (splitResponse.Documents.Count >= 2)
                    {
                        byte[] pdf1Bytes = splitResponse.Documents[0].DocData;
                        byte[] pdf2Bytes = splitResponse.Documents[1].DocData;

                        // Write split PDFs to files
                        File.WriteAllBytes("splitByText_pdf1_result.pdf", pdf1Bytes);
                        File.WriteAllBytes("splitByText_pdf2_result.pdf", pdf2Bytes);
                    }
                    else if (splitResponse.Documents.Count == 1)
                    {
                        byte[] pdf1Bytes = splitResponse.Documents[0].DocData;
                        File.WriteAllBytes("splitByText_pdf1_result.pdf", pdf1Bytes);
                    }
                }
            }
            catch (Pdf4meException ex)
            {
                Console.WriteLine($"Pdf4me Exception: {ex.Message}");
            }
            catch (Exception ex)
            {
                Console.WriteLine($"An error occurred: {ex.Message}");
            }
        }
    }
}
import fs from "fs";
import path from "path";
import { fileURLToPath } from "url";
import pdf4me from 'pdf4me';

const PDF4ME_API_KEY = 'YOUR API KEY'
const pdf4meClient = pdf4me.createClient(PDF4ME_API_KEY);

const __dirname = path.dirname(fileURLToPath(import.meta.url));
// create the Split object
const splitReq = {
  // document
  document: {
    docData: fs.readFileSync(path.join(__dirname, 'my_pdf.pdf')).toString("base64"),
  },
  // action
  splitByTextAction: {
    text: "",
    fileNaming: "pagenum",
    splitTextPage: "after",
    x1: 10,
    y1: 700,
    x2: 300,
    y2: 800,
    combinePagesWithSameConsecutiveText: false,
  },
};

const splitByTextCoordinates = async () => {
  try {
    const splitRes = await pdf4meClient.splitByText(splitReq);
    const length = splitRes?.documents?.length || 0;

    if (length) {
      if (length >= 2) {
        const pdf1 = Buffer.from(splitRes.documents[0].docData, "base64");
        const pdf2 = Buffer.from(splitRes.documents[1].docData, "base64");
        fs.writeFileSync(
          path.join(__dirname, "splitByText_pdf1_result.pdf"),
          pdf1
        );
        fs.writeFileSync(
          path.join(__dirname, "splitByText_pdf2_result.pdf"),
          pdf2
        );
      } else {
        const pdf1 = Buffer.from(splitRes.documents[0].docData, "base64");
        fs.writeFileSync(
          path.join(__dirname, "splitByText_pdf1_result.pdf"),
          pdf1
        );
      }
    }
  } catch (error) {
    console.error(error);
  }
};
splitByTextCoordinates();

Important Links

Swagger - Split by text