Extract Resources

Extract text and images from PDF

  • C#
  • Java
  • JavaScript
  • PHP
  • Python
  • Ruby
// create extract resource object
var req = new ExtractResources()
{
    //document
    Document = new Document()
    {
        DocData = File.ReadAllBytes("myPdf.pdf"),
        Name = "myPdf.pdf",
    },
    //action
    ExtractResourcesAction = new ExtractResourcesAction()
    {
        ExtractFonts = true,
        ExtractImages = true,
        Outlines = true,
        XmpMetadata = true,
        ListFonts = true,
        ListImages = true
    }
};

//extracting resources
var res = Pdf4me.Instance.ExtractClient.ExtractResourcesAsync(req).GetAwaiter().GetResult();

//saving extracted resource info to a json file
File.WriteAllText("extractResources_result.json", JsonConvert.SerializeObject(res));
// setup the pdf4meClient
const pdf4meClient = pdf4me.createClient('YOUR API KEY')

// create extract resource object
const extractResourcesReq = {
  // document
  document: {
    docData: fs.readFileSync(path.join(__dirname, 'myPdf.pdf')).toString('base64'),
  },
  // action
  extractResourcesAction: {
    extractFonts: true,
    extractImages: true,
    listFonts: true,
    listImages: true,
    outlines: true,
    xmpMetadata: true,
  },
}

// extract resources
pdf4meClient
  .extractResources(extractResourcesReq)
  .then(function(extractResourcesRes) {
    // and writing it to disk
    fs.writeFileSync(path.join(__dirname, 'extractResources_result.json'), JSON.stringify(extractResourcesRes, null, 2))
  })
  .catch(error => {
    console.log(error)
    process.exit(1)
  })
# setup the extract_client
extract_client = ExtractClient(pdf4me_client)

# create the extract object
extract_resources = ExtractResources(
    # document
    document=Document(
        doc_data=FileReader().get_file_data('PDF_10pages.pdf')
    ),
    # action
    extract_resources_action=ExtractResourcesAction(
        extract_fonts=1,
        extract_images=1,
        list_fonts=1,
        list_images=0,
        outlines=1,
        xmp_metadata=1
    )
)

# extraction
res = extract_client.extract_resources(extract_resources=extract_resources)

# writing it to disk
with open(testfolder+'\extractResources_result.json', 'w') as f:
    json.dump(res, f)
// create extract resource object
 $create_extract_resource = [
    'document'=> [
        'name' => 'PDF_10pages.pdf',
        'docData' => $pdf4meclient->getFileData('PDF_10pages.pdf')
    ],
    'ExtractResourcesAction' => [
        'outlines' => 0,
        'xmpMetadata' => 1,
        'listFonts' => 1,
        'extractFonts' => 1,
        'extractImages' => 1,
        'listImages' => 1
    ]
];
 
// extract resources
$res = $pdf4meclient->pdf4me()->extractResources($create_extract_resource);

echo $res["pdfResources"];