วิธีอ่านตาราง PDF ใน Java

บทช่วยสอนนี้ให้รายละเอียดเกี่ยวกับวิธี อ่านตาราง PDF ใน Java และเข้าถึงข้อความของแต่ละเซลล์ภายในตารางที่ต้องการ คุณจะมีสิทธิ์ควบคุมอย่างเต็มที่ในการอ้างถึงตารางเฉพาะในหน้าเป้าหมายของ PDF และแยกวิเคราะห์แถวและเซลล์ทั้งหมดเพื่อดึงข้อมูล ในการเขียน โปรแกรมอ่านตาราง PDF ใน Java นี้ไม่จำเป็นต้องใช้เครื่องมือหรือซอฟต์แวร์ของบุคคลที่สาม

ขั้นตอนในการอ่านตาราง PDF ใน Java

กำหนดค่าแอปพลิเคชันตัวอ่านตาราง PDF ของคุณเพื่อเพิ่ม Aspose.PDF จากที่เก็บ Maven
โหลดไฟล์ PDF ตัวอย่างที่มีตารางโดยใช้วัตถุคลาส Document
สร้างอินสแตนซ์และเริ่มต้นวัตถุ TableAbsorber เพื่อดึงตาราง PDF ทั้งหมดจากหน้า PDF ที่เลือก
วนซ้ำทุกแถวในตารางที่ต้องการ
ทำซ้ำผ่านเซลล์ทั้งหมดในแถวที่ต้องการและดึงข้อความทั้งหมดจากแต่ละเซลล์
แสดงข้อความที่ดึงมาจากเซลล์

ขั้นตอนเหล่านี้อธิบายวิธีใช้ Java แยกตารางจาก PDF พร้อมกับข้อมูลเกี่ยวกับไลบรารีที่จำเป็นซึ่งจะเพิ่มลงในโปรเจ็กต์ นอกจากนี้ยังระบุลำดับของการดำเนินการเพื่อให้งานสำเร็จ เช่น โหลด PDF ก่อน จากนั้นจึงเข้าถึงหน้าใดหน้าหนึ่ง และดึงตารางที่ต้องการ สุดท้าย แยกวิเคราะห์แถวและเซลล์ทั้งหมดเพื่อรับข้อมูล

รหัสเพื่ออ่านตาราง PDF ใน Java

	import com.aspose.pdf.License;
	import com.aspose.pdf.AbsorbedCell;
	import com.aspose.pdf.AbsorbedRow;
	import com.aspose.pdf.AbsorbedTable;
	import com.aspose.pdf.Document;
	import com.aspose.pdf.TableAbsorber;
	import com.aspose.pdf.TextFragmentCollection;

	public class ReadPDFTableInJava {

	public static void main(String[] args) throws Exception { // main function for reading PDF table data in ReadPDFTableInJava

	// For avoiding the trial version limitation, load the Aspose.PDF license prior to reading table data
	License licenseForHtmlToPdf = new License();
	licenseForHtmlToPdf.setLicense("Aspose.Pdf.lic");

	// Load a source PDF document which contains a table in it
	Document pdfDocument = new Document("PdfWithTable.pdf");

	// Instantiate the TableAbsorber object for PDF tables extraction
	TableAbsorber tableAbsorber = new TableAbsorber();

	// visit the table collection in the input PDF
	tableAbsorber.visit(pdfDocument.getPages().get_Item(1));

	// Access the desired table from the tables collection
	AbsorbedTable absorbedTable = tableAbsorber.getTableList().get(0);

	// Parse all the rows and get each row using the AbsorbedRow
	for (AbsorbedRow pdfTableRow : absorbedTable.getRowList())
	{
	// Access each cell in the cells collection using AbsorbedCell
	for (AbsorbedCell pdfTableCell : pdfTableRow.getCellList())
	{
	// Access each text fragment from the cell
	TextFragmentCollection textFragmentCollection = pdfTableCell.getTextFragments();

	// Access each text fragment from the fragments collection
	for (com.aspose.pdf.TextFragment textFragment : textFragmentCollection)
	{
	// Display the table cell text
	System.out.println(textFragment.getText());
	}
	}
	}

	System.out.println("Done");
	}
	}

view raw How to Read PDF Table in Java.java hosted with ❤ by GitHub

หากต้องการ แยกตารางออกจาก PDF Java มีโค้ดที่นี่ซึ่งใช้คลาส TableAbsorber และ AbsorbedTable เพื่อจัดการตารางใน PDF นอกจากนี้ยังใช้คลาส AbsorbedRow และ AbsorbedCell เพื่อจัดการแถวและคอลัมน์ก่อนที่จะใช้คลาส TextFragment เพื่อดึงข้อมูลเซลล์ นอกจากนี้ ยังมีคลาสตัวดูดซับอื่นๆ อีกมากมายที่พร้อมใช้งานสำหรับองค์ประกอบต่างๆ ในเอกสาร เช่น ฟอนต์ ย่อหน้า ข้อความ และส่วนของข้อความ

บทความนี้อธิบายว่าการใช้การแยกตาราง Java PDF สามารถทำได้ในไม่กี่ขั้นตอน หากคุณต้องการเรียนรู้วิธีอ่านข้อความและรูปภาพจากไฟล์ PDF โปรดดูบทความใน วิธีอ่านไฟล์ PDF ใน Java

Aspose ฐานความรู้

ค้นหาคำตอบโดย API

วิธีอ่านตาราง PDF ใน Java

ขั้นตอนในการอ่านตาราง PDF ใน Java

รหัสเพื่ออ่านตาราง PDF ใน Java