Extract Resources
Extract text and images from PDF
- C#
- Java
- JavaScript
- PHP
- Python
- Ruby
// create extract resource object
var req = new ExtractResources()
{
//document
Document = new Document()
{
DocData = File.ReadAllBytes("myPdf.pdf"),
Name = "myPdf.pdf",
},
//action
ExtractResourcesAction = new ExtractResourcesAction()
{
ExtractFonts = true,
ExtractImages = true,
Outlines = true,
XmpMetadata = true,
ListFonts = true,
ListImages = true
}
};
//extracting resources
var res = Pdf4me.Instance.ExtractClient.ExtractResourcesAsync(req).GetAwaiter().GetResult();
//saving extracted resource info to a json file
File.WriteAllText("extractResources_result.json", JsonConvert.SerializeObject(res));
// setup the pdf4meClient
const pdf4meClient = pdf4me.createClient('YOUR API KEY')
// create extract resource object
const extractResourcesReq = {
// document
document: {
docData: fs.readFileSync(path.join(__dirname, 'myPdf.pdf')).toString('base64'),
},
// action
extractResourcesAction: {
extractFonts: true,
extractImages: true,
listFonts: true,
listImages: true,
outlines: true,
xmpMetadata: true,
},
}
// extract resources
pdf4meClient
.extractResources(extractResourcesReq)
.then(function(extractResourcesRes) {
// and writing it to disk
fs.writeFileSync(path.join(__dirname, 'extractResources_result.json'), JSON.stringify(extractResourcesRes, null, 2))
})
.catch(error => {
console.log(error)
process.exit(1)
})
# setup the extract_client
extract_client = ExtractClient(pdf4me_client)
# create the extract object
extract_resources = ExtractResources(
# document
document=Document(
doc_data=FileReader().get_file_data('PDF_10pages.pdf')
),
# action
extract_resources_action=ExtractResourcesAction(
extract_fonts=1,
extract_images=1,
list_fonts=1,
list_images=0,
outlines=1,
xmp_metadata=1
)
)
# extraction
res = extract_client.extract_resources(extract_resources=extract_resources)
# writing it to disk
with open(testfolder+'\extractResources_result.json', 'w') as f:
json.dump(res, f)
// create extract resource object
$create_extract_resource = [
'document'=> [
'name' => 'PDF_10pages.pdf',
'docData' => $pdf4meclient->getFileData('PDF_10pages.pdf')
],
'ExtractResourcesAction' => [
'outlines' => 0,
'xmpMetadata' => 1,
'listFonts' => 1,
'extractFonts' => 1,
'extractImages' => 1,
'listImages' => 1
]
];
// extract resources
$res = $pdf4meclient->pdf4me()->extractResources($create_extract_resource);
echo $res["pdfResources"];