Я пытаюсь прочитать файл docx с php, так как я читал успешно, но я не получил какое-то уравнение в документе слова, так как я новичок в php, я не знал, как читать, пожалуйста, предложите некоторые идеи, функцию, которую я пытался читать документ
function index() { $document = 'file_path'; $text_output = $this->read_docx($document); echo nl2br($text_output); } private function read_docx($filename) { var_dump($filename); $striped_content = ''; $content = ''; $zip = zip_open($filename); if (!$zip || is_numeric($zip)) return false; while ($zip_entry = zip_read($zip)) { if (zip_entry_open($zip, $zip_entry) == FALSE) continue; if (zip_entry_name($zip_entry) != "word/document.xml") continue; $content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry)); zip_entry_close($zip_entry); }// end while zip_close($zip); $content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content); $content = str_replace('</w:r></w:p>', "\r\n", $content); $striped_content = strip_tags($content); return $striped_content; }
Это пример математического уравнения в файле docx, который я пытаюсь прочитать и отобразить на html-странице. благодаря
Я полностью просматриваю этот https://msdn.microsoft.com/en-us/library/aa982683(v=office.12).aspx#Office2007ManipulatingXMLDocs_exploring и анализирует xml с помощью php xmlreader ()
$document = 'url'; /*Function to extract images*/ function readZippedImages($filename) { $for_image = $filename; /*Create a new ZIP archive object*/ $zip = new ZipArchive; /*Open the received archive file*/ $final_arr=array(); $repo = array(); if (true === $zip->open($filename)) { for ($i=0; $i<$zip->numFiles;$i++) { if($i==3)//should be document.xml { //======function using xml parser================================// $check = $zip->getFromIndex($i); //Create a new XMLReader Instance $reader = new XMLReader(); //Loading from a XML File or URL //$reader->open($check); //Loading from PHP variable $reader->xml($check); //====================parsing through the document==================// while($reader->read()) { $node_loc = $reader->localName; if($reader->nodeType == XMLREADER::ELEMENT && $reader->localName == 'body') { $reader->read(); $read_content = $reader->value. "\n"; } if($node_loc == '#text')//parsing all the text from document using #text tag { $temp_value = array("text"=>$reader->value); array_push($final_arr,$temp_value); $reader->read(); $read_content = $reader->value. "\n"; } if($node_loc == 'blip')//parsing all the images using blip tag which is under drawing tag { $attri_r = $reader->getAttribute("r:embed"); $current_image_name = $repo[$attri_r]; $image_stream = $this->showimage($for_image,$current_image_name);//return the base64 string $temp_value = array("image"=>$image_stream); array_push($final_arr,$temp_value); } } //==================xml parser end============================// } if($i==2)//should be rels.xml { $check_id = $zip->getFromIndex($i); $reader_relation = new XMLReader(); $reader_relation->xml($check_id); //====================parsing through the document==================// while($reader_relation->read()) { $node_loc = $reader_relation->localName; if($reader_relation->nodeType == XMLREADER::ELEMENT && $reader_relation->localName == 'Relationship') { $read_content_id = $reader_relation->getAttribute("Id"); $read_content_name = $reader_relation->getAttribute("Target"); $repo[$read_content_id]=$read_content_name; } } } } } } function showimage($zip_file_original, $file_name_image) { $file_name_image = 'word/'.$file_name_image.'';// getting the image in the zip using its name $z_show = new ZipArchive(); if ($z_show->open($zip_file_original) !== true) { echo "File not found."; return false; } $stat = $z_show->statName($file_name_image); $fp = $z_show->getStream($file_name_image); if(!$fp) { echo "Could not load image."; return false; } header('Content-Type: image/jpeg'); header('Content-Length: ' . $stat['size']); $image = stream_get_contents($fp); $picture = base64_encode($image); return $picture;//return the base62 string for the current image. fclose($fp); } readZippedImages($document);
в$document = 'url'; /*Function to extract images*/ function readZippedImages($filename) { $for_image = $filename; /*Create a new ZIP archive object*/ $zip = new ZipArchive; /*Open the received archive file*/ $final_arr=array(); $repo = array(); if (true === $zip->open($filename)) { for ($i=0; $i<$zip->numFiles;$i++) { if($i==3)//should be document.xml { //======function using xml parser================================// $check = $zip->getFromIndex($i); //Create a new XMLReader Instance $reader = new XMLReader(); //Loading from a XML File or URL //$reader->open($check); //Loading from PHP variable $reader->xml($check); //====================parsing through the document==================// while($reader->read()) { $node_loc = $reader->localName; if($reader->nodeType == XMLREADER::ELEMENT && $reader->localName == 'body') { $reader->read(); $read_content = $reader->value. "\n"; } if($node_loc == '#text')//parsing all the text from document using #text tag { $temp_value = array("text"=>$reader->value); array_push($final_arr,$temp_value); $reader->read(); $read_content = $reader->value. "\n"; } if($node_loc == 'blip')//parsing all the images using blip tag which is under drawing tag { $attri_r = $reader->getAttribute("r:embed"); $current_image_name = $repo[$attri_r]; $image_stream = $this->showimage($for_image,$current_image_name);//return the base64 string $temp_value = array("image"=>$image_stream); array_push($final_arr,$temp_value); } } //==================xml parser end============================// } if($i==2)//should be rels.xml { $check_id = $zip->getFromIndex($i); $reader_relation = new XMLReader(); $reader_relation->xml($check_id); //====================parsing through the document==================// while($reader_relation->read()) { $node_loc = $reader_relation->localName; if($reader_relation->nodeType == XMLREADER::ELEMENT && $reader_relation->localName == 'Relationship') { $read_content_id = $reader_relation->getAttribute("Id"); $read_content_name = $reader_relation->getAttribute("Target"); $repo[$read_content_id]=$read_content_name; } } } } } } function showimage($zip_file_original, $file_name_image) { $file_name_image = 'word/'.$file_name_image.'';// getting the image in the zip using its name $z_show = new ZipArchive(); if ($z_show->open($zip_file_original) !== true) { echo "File not found."; return false; } $stat = $z_show->statName($file_name_image); $fp = $z_show->getStream($file_name_image); if(!$fp) { echo "Could not load image."; return false; } header('Content-Type: image/jpeg'); header('Content-Length: ' . $stat['size']); $image = stream_get_contents($fp); $picture = base64_encode($image); return $picture;//return the base62 string for the current image. fclose($fp); } readZippedImages($document);
в$document = 'url'; /*Function to extract images*/ function readZippedImages($filename) { $for_image = $filename; /*Create a new ZIP archive object*/ $zip = new ZipArchive; /*Open the received archive file*/ $final_arr=array(); $repo = array(); if (true === $zip->open($filename)) { for ($i=0; $i<$zip->numFiles;$i++) { if($i==3)//should be document.xml { //======function using xml parser================================// $check = $zip->getFromIndex($i); //Create a new XMLReader Instance $reader = new XMLReader(); //Loading from a XML File or URL //$reader->open($check); //Loading from PHP variable $reader->xml($check); //====================parsing through the document==================// while($reader->read()) { $node_loc = $reader->localName; if($reader->nodeType == XMLREADER::ELEMENT && $reader->localName == 'body') { $reader->read(); $read_content = $reader->value. "\n"; } if($node_loc == '#text')//parsing all the text from document using #text tag { $temp_value = array("text"=>$reader->value); array_push($final_arr,$temp_value); $reader->read(); $read_content = $reader->value. "\n"; } if($node_loc == 'blip')//parsing all the images using blip tag which is under drawing tag { $attri_r = $reader->getAttribute("r:embed"); $current_image_name = $repo[$attri_r]; $image_stream = $this->showimage($for_image,$current_image_name);//return the base64 string $temp_value = array("image"=>$image_stream); array_push($final_arr,$temp_value); } } //==================xml parser end============================// } if($i==2)//should be rels.xml { $check_id = $zip->getFromIndex($i); $reader_relation = new XMLReader(); $reader_relation->xml($check_id); //====================parsing through the document==================// while($reader_relation->read()) { $node_loc = $reader_relation->localName; if($reader_relation->nodeType == XMLREADER::ELEMENT && $reader_relation->localName == 'Relationship') { $read_content_id = $reader_relation->getAttribute("Id"); $read_content_name = $reader_relation->getAttribute("Target"); $repo[$read_content_id]=$read_content_name; } } } } } } function showimage($zip_file_original, $file_name_image) { $file_name_image = 'word/'.$file_name_image.'';// getting the image in the zip using its name $z_show = new ZipArchive(); if ($z_show->open($zip_file_original) !== true) { echo "File not found."; return false; } $stat = $z_show->statName($file_name_image); $fp = $z_show->getStream($file_name_image); if(!$fp) { echo "Could not load image."; return false; } header('Content-Type: image/jpeg'); header('Content-Length: ' . $stat['size']); $image = stream_get_contents($fp); $picture = base64_encode($image); return $picture;//return the base62 string for the current image. fclose($fp); } readZippedImages($document);
напечатайте $ final_arr, вы получите весь текст и изображения в документе.
Прежде всего, это очень плохая идея для синтаксического анализа XML с использованием регулярного выражения. Вместо этого используйте XML-парсер PHP, который предназначен для выполнения таких задач.
Вам необходимо прочитать спецификацию Open XML (стандарт, используемый Microsoft Office), чтобы узнать о внутренней структуре данных, которую Microsoft использует для хранения этих математических уравнений.