暫無描述

dompdf.cls.php 31KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101
  1. <?php
  2. /**
  3. * @package dompdf
  4. * @link http://dompdf.github.com/
  5. * @author Benj Carson <benjcarson@digitaljunkies.ca>
  6. * @author Fabien Ménager <fabien.menager@gmail.com>
  7. * @license http://www.gnu.org/copyleft/lesser.html GNU Lesser General Public License
  8. */
  9. /**
  10. * DOMPDF - PHP5 HTML to PDF renderer
  11. *
  12. * DOMPDF loads HTML and does its best to render it as a PDF. It gets its
  13. * name from the new DomDocument PHP5 extension. Source HTML is first
  14. * parsed by a DomDocument object. DOMPDF takes the resulting DOM tree and
  15. * attaches a {@link Frame} object to each node. {@link Frame} objects store
  16. * positioning and layout information and each has a reference to a {@link
  17. * Style} object.
  18. *
  19. * Style information is loaded and parsed (see {@link Stylesheet}) and is
  20. * applied to the frames in the tree by using XPath. CSS selectors are
  21. * converted into XPath queries, and the computed {@link Style} objects are
  22. * applied to the {@link Frame}s.
  23. *
  24. * {@link Frame}s are then decorated (in the design pattern sense of the
  25. * word) based on their CSS display property ({@link
  26. * http://www.w3.org/TR/CSS21/visuren.html#propdef-display}).
  27. * Frame_Decorators augment the basic {@link Frame} class by adding
  28. * additional properties and methods specific to the particular type of
  29. * {@link Frame}. For example, in the CSS layout model, block frames
  30. * (display: block;) contain line boxes that are usually filled with text or
  31. * other inline frames. The Block_Frame_Decorator therefore adds a $lines
  32. * property as well as methods to add {@link Frame}s to lines and to add
  33. * additional lines. {@link Frame}s also are attached to specific
  34. * Positioner and {@link Frame_Reflower} objects that contain the
  35. * positioining and layout algorithm for a specific type of frame,
  36. * respectively. This is an application of the Strategy pattern.
  37. *
  38. * Layout, or reflow, proceeds recursively (post-order) starting at the root
  39. * of the document. Space constraints (containing block width & height) are
  40. * pushed down, and resolved positions and sizes bubble up. Thus, every
  41. * {@link Frame} in the document tree is traversed once (except for tables
  42. * which use a two-pass layout algorithm). If you are interested in the
  43. * details, see the reflow() method of the Reflower classes.
  44. *
  45. * Rendering is relatively straightforward once layout is complete. {@link
  46. * Frame}s are rendered using an adapted {@link Cpdf} class, originally
  47. * written by Wayne Munro, http://www.ros.co.nz/pdf/. (Some performance
  48. * related changes have been made to the original {@link Cpdf} class, and
  49. * the {@link CPDF_Adapter} class provides a simple, stateless interface to
  50. * PDF generation.) PDFLib support has now also been added, via the {@link
  51. * PDFLib_Adapter}.
  52. *
  53. *
  54. * @package dompdf
  55. */
  56. class DOMPDF {
  57. /**
  58. * DomDocument representing the HTML document
  59. *
  60. * @var DOMDocument
  61. */
  62. protected $_xml;
  63. /**
  64. * Frame_Tree derived from the DOM tree
  65. *
  66. * @var Frame_Tree
  67. */
  68. protected $_tree;
  69. /**
  70. * Stylesheet for the document
  71. *
  72. * @var Stylesheet
  73. */
  74. protected $_css;
  75. /**
  76. * Actual PDF renderer
  77. *
  78. * @var Canvas
  79. */
  80. protected $_pdf;
  81. /**
  82. * Desired paper size ('letter', 'legal', 'A4', etc.)
  83. *
  84. * @var string
  85. */
  86. protected $_paper_size;
  87. /**
  88. * Paper orientation ('portrait' or 'landscape')
  89. *
  90. * @var string
  91. */
  92. protected $_paper_orientation;
  93. /**
  94. * Callbacks on new page and new element
  95. *
  96. * @var array
  97. */
  98. protected $_callbacks;
  99. /**
  100. * Experimental caching capability
  101. *
  102. * @var string
  103. */
  104. private $_cache_id;
  105. /**
  106. * Base hostname
  107. *
  108. * Used for relative paths/urls
  109. * @var string
  110. */
  111. protected $_base_host;
  112. /**
  113. * Absolute base path
  114. *
  115. * Used for relative paths/urls
  116. * @var string
  117. */
  118. protected $_base_path;
  119. /**
  120. * Protcol used to request file (file://, http://, etc)
  121. *
  122. * @var string
  123. */
  124. protected $_protocol;
  125. /**
  126. * HTTP context created with stream_context_create()
  127. * Will be used for file_get_contents
  128. *
  129. * @var resource
  130. */
  131. protected $_http_context;
  132. /**
  133. * Timestamp of the script start time
  134. *
  135. * @var int
  136. */
  137. private $_start_time = null;
  138. /**
  139. * The system's locale
  140. *
  141. * @var string
  142. */
  143. private $_system_locale = null;
  144. /**
  145. * Tells if the system's locale is the C standard one
  146. *
  147. * @var bool
  148. */
  149. private $_locale_standard = false;
  150. /**
  151. * The default view of the PDF in the viewer
  152. *
  153. * @var string
  154. */
  155. private $_default_view = "Fit";
  156. /**
  157. * The default view options of the PDF in the viewer
  158. *
  159. * @var array
  160. */
  161. private $_default_view_options = array();
  162. /**
  163. * Tells wether the DOM document is in quirksmode (experimental)
  164. *
  165. * @var bool
  166. */
  167. private $_quirksmode = false;
  168. /**
  169. * Protocol whitelist
  170. *
  171. * Protocols and PHP wrappers allowed in URLs. Full support is not
  172. * guarantee for the protocols/wrappers contained in this array.
  173. *
  174. * @var array
  175. */
  176. private $_allowed_protocols = array(null, "", "file://", "http://", "https://");
  177. /**
  178. * Local file extension whitelist
  179. *
  180. * File extensions supported by dompdf for local files.
  181. *
  182. * @var array
  183. */
  184. private $_allowed_local_file_extensions = array("htm", "html");
  185. /**
  186. * The list of built-in fonts
  187. *
  188. * @var array
  189. */
  190. public static $native_fonts = array(
  191. "courier", "courier-bold", "courier-oblique", "courier-boldoblique",
  192. "helvetica", "helvetica-bold", "helvetica-oblique", "helvetica-boldoblique",
  193. "times-roman", "times-bold", "times-italic", "times-bolditalic",
  194. "symbol", "zapfdinbats"
  195. );
  196. private $_options = array(
  197. // Directories
  198. "temp_dir" => DOMPDF_TEMP_DIR,
  199. "font_dir" => DOMPDF_FONT_DIR,
  200. "font_cache" => DOMPDF_FONT_CACHE,
  201. "chroot" => DOMPDF_CHROOT,
  202. "log_output_file" => DOMPDF_LOG_OUTPUT_FILE,
  203. // Rendering
  204. "default_media_type" => DOMPDF_DEFAULT_MEDIA_TYPE,
  205. "default_paper_size" => DOMPDF_DEFAULT_PAPER_SIZE,
  206. "default_font" => DOMPDF_DEFAULT_FONT,
  207. "dpi" => DOMPDF_DPI,
  208. "font_height_ratio" => DOMPDF_FONT_HEIGHT_RATIO,
  209. // Features
  210. "enable_unicode" => DOMPDF_UNICODE_ENABLED,
  211. "enable_php" => DOMPDF_ENABLE_PHP,
  212. "enable_remote" => DOMPDF_ENABLE_REMOTE,
  213. "enable_css_float" => DOMPDF_ENABLE_CSS_FLOAT,
  214. "enable_javascript" => DOMPDF_ENABLE_JAVASCRIPT,
  215. "enable_html5_parser" => DOMPDF_ENABLE_HTML5PARSER,
  216. "enable_font_subsetting" => DOMPDF_ENABLE_FONTSUBSETTING,
  217. // Debug
  218. "debug_png" => DEBUGPNG,
  219. "debug_keep_temp" => DEBUGKEEPTEMP,
  220. "debug_css" => DEBUGCSS,
  221. "debug_layout" => DEBUG_LAYOUT,
  222. "debug_layout_lines" => DEBUG_LAYOUT_LINES,
  223. "debug_layout_blocks" => DEBUG_LAYOUT_BLOCKS,
  224. "debug_layout_inline" => DEBUG_LAYOUT_INLINE,
  225. "debug_layout_padding_box" => DEBUG_LAYOUT_PADDINGBOX,
  226. // Admin
  227. "admin_username" => DOMPDF_ADMIN_USERNAME,
  228. "admin_password" => DOMPDF_ADMIN_PASSWORD,
  229. );
  230. /**
  231. * Class constructor
  232. */
  233. function __construct() {
  234. $this->_locale_standard = sprintf('%.1f', 1.0) == '1.0';
  235. $this->save_locale();
  236. $this->_messages = array();
  237. $this->_css = new Stylesheet($this);
  238. $this->_pdf = null;
  239. $this->_paper_size = DOMPDF_DEFAULT_PAPER_SIZE;
  240. $this->_paper_orientation = "portrait";
  241. $this->_base_protocol = "";
  242. $this->_base_host = "";
  243. $this->_base_path = "";
  244. $this->_http_context = null;
  245. $this->_callbacks = array();
  246. $this->_cache_id = null;
  247. $this->restore_locale();
  248. }
  249. /**
  250. * Class destructor
  251. */
  252. function __destruct() {
  253. clear_object($this);
  254. }
  255. /**
  256. * Get the dompdf option value
  257. *
  258. * @param string $key
  259. *
  260. * @return mixed
  261. * @throws DOMPDF_Exception
  262. */
  263. function get_option($key) {
  264. if ( !array_key_exists($key, $this->_options) ) {
  265. throw new DOMPDF_Exception("Option '$key' doesn't exist");
  266. }
  267. return $this->_options[$key];
  268. }
  269. /**
  270. * @param string $key
  271. * @param mixed $value
  272. *
  273. * @throws DOMPDF_Exception
  274. */
  275. function set_option($key, $value) {
  276. if ( !array_key_exists($key, $this->_options) ) {
  277. throw new DOMPDF_Exception("Option '$key' doesn't exist");
  278. }
  279. $this->_options[$key] = $value;
  280. }
  281. /**
  282. * @param array $options
  283. */
  284. function set_options(array $options) {
  285. foreach ($options as $key => $value) {
  286. $this->set_option($key, $value);
  287. }
  288. }
  289. /**
  290. * Save the system's locale configuration and
  291. * set the right value for numeric formatting
  292. */
  293. private function save_locale() {
  294. if ( $this->_locale_standard ) {
  295. return;
  296. }
  297. $this->_system_locale = setlocale(LC_NUMERIC, "0");
  298. setlocale(LC_NUMERIC, "C");
  299. }
  300. /**
  301. * Restore the system's locale configuration
  302. */
  303. private function restore_locale() {
  304. if ( $this->_locale_standard ) {
  305. return;
  306. }
  307. setlocale(LC_NUMERIC, $this->_system_locale);
  308. }
  309. /**
  310. * Returns the underlying {@link Frame_Tree} object
  311. *
  312. * @return Frame_Tree
  313. */
  314. function get_tree() {
  315. return $this->_tree;
  316. }
  317. /**
  318. * Sets the protocol to use
  319. * FIXME validate these
  320. *
  321. * @param string $proto
  322. */
  323. function set_protocol($proto) {
  324. $this->_protocol = $proto;
  325. }
  326. /**
  327. * Sets the base hostname
  328. *
  329. * @param string $host
  330. */
  331. function set_host($host) {
  332. $this->_base_host = $host;
  333. }
  334. /**
  335. * Sets the base path
  336. *
  337. * @param string $path
  338. */
  339. function set_base_path($path) {
  340. $this->_base_path = $path;
  341. }
  342. /**
  343. * Sets the HTTP context
  344. *
  345. * @param resource $http_context
  346. */
  347. function set_http_context($http_context) {
  348. $this->_http_context = $http_context;
  349. }
  350. /**
  351. * Sets the default view
  352. *
  353. * @param string $default_view The default document view
  354. * @param array $options The view's options
  355. */
  356. function set_default_view($default_view, $options) {
  357. $this->_default_view = $default_view;
  358. $this->_default_view_options = $options;
  359. }
  360. /**
  361. * Returns the protocol in use
  362. *
  363. * @return string
  364. */
  365. function get_protocol() {
  366. return $this->_protocol;
  367. }
  368. /**
  369. * Returns the base hostname
  370. *
  371. * @return string
  372. */
  373. function get_host() {
  374. return $this->_base_host;
  375. }
  376. /**
  377. * Returns the base path
  378. *
  379. * @return string
  380. */
  381. function get_base_path() {
  382. return $this->_base_path;
  383. }
  384. /**
  385. * Returns the HTTP context
  386. *
  387. * @return resource
  388. */
  389. function get_http_context() {
  390. return $this->_http_context;
  391. }
  392. /**
  393. * Return the underlying Canvas instance (e.g. CPDF_Adapter, GD_Adapter)
  394. *
  395. * @return Canvas
  396. */
  397. function get_canvas() {
  398. return $this->_pdf;
  399. }
  400. /**
  401. * Returns the callbacks array
  402. *
  403. * @return array
  404. */
  405. function get_callbacks() {
  406. return $this->_callbacks;
  407. }
  408. /**
  409. * Returns the stylesheet
  410. *
  411. * @return Stylesheet
  412. */
  413. function get_css() {
  414. return $this->_css;
  415. }
  416. /**
  417. * @return DOMDocument
  418. */
  419. function get_dom() {
  420. return $this->_xml;
  421. }
  422. /**
  423. * Loads an HTML file
  424. * Parse errors are stored in the global array _dompdf_warnings.
  425. *
  426. * @param string $file a filename or url to load
  427. *
  428. * @throws DOMPDF_Exception
  429. */
  430. function load_html_file($file) {
  431. $this->save_locale();
  432. // Store parsing warnings as messages (this is to prevent output to the
  433. // browser if the html is ugly and the dom extension complains,
  434. // preventing the pdf from being streamed.)
  435. if ( !$this->_protocol && !$this->_base_host && !$this->_base_path ) {
  436. list($this->_protocol, $this->_base_host, $this->_base_path) = explode_url($file);
  437. }
  438. if ( !in_array($this->_protocol, $this->_allowed_protocols) ) {
  439. throw new DOMPDF_Exception("Permission denied on $file. The communication protocol is not supported.");
  440. }
  441. if ( !$this->get_option("enable_remote") && ($this->_protocol != "" && $this->_protocol !== "file://" ) ) {
  442. throw new DOMPDF_Exception("Remote file requested, but DOMPDF_ENABLE_REMOTE is false.");
  443. }
  444. if ($this->_protocol == "" || $this->_protocol === "file://") {
  445. // Get the full path to $file, returns false if the file doesn't exist
  446. $realfile = realpath($file);
  447. $chroot = $this->get_option("chroot");
  448. if ( strpos($realfile, $chroot) !== 0 ) {
  449. throw new DOMPDF_Exception("Permission denied on $file. The file could not be found under the directory specified by DOMPDF_CHROOT.");
  450. }
  451. $ext = pathinfo($realfile, PATHINFO_EXTENSION);
  452. if (!in_array($ext, $this->_allowed_local_file_extensions)) {
  453. throw new DOMPDF_Exception("Permission denied on $file.");
  454. }
  455. if ( !$realfile ) {
  456. throw new DOMPDF_Exception("File '$file' not found.");
  457. }
  458. $file = $realfile;
  459. }
  460. $contents = file_get_contents($file, null, $this->_http_context);
  461. $encoding = null;
  462. // See http://the-stickman.com/web-development/php/getting-http-response-headers-when-using-file_get_contents/
  463. if ( isset($http_response_header) ) {
  464. foreach($http_response_header as $_header) {
  465. if ( preg_match("@Content-Type:\s*[\w/]+;\s*?charset=([^\s]+)@i", $_header, $matches) ) {
  466. $encoding = strtoupper($matches[1]);
  467. break;
  468. }
  469. }
  470. }
  471. $this->restore_locale();
  472. $this->load_html($contents, $encoding);
  473. }
  474. /**
  475. * Loads an HTML string
  476. * Parse errors are stored in the global array _dompdf_warnings.
  477. * @todo use the $encoding variable
  478. *
  479. * @param string $str HTML text to load
  480. * @param string $encoding Not used yet
  481. */
  482. function load_html($str, $encoding = null) {
  483. $this->save_locale();
  484. // FIXME: Determine character encoding, switch to UTF8, update meta tag. Need better http/file stream encoding detection, currently relies on text or meta tag.
  485. mb_detect_order('auto');
  486. if (mb_detect_encoding($str) !== 'UTF-8') {
  487. $metatags = array(
  488. '@<meta\s+http-equiv="Content-Type"\s+content="(?:[\w/]+)(?:;\s*?charset=([^\s"]+))?@i',
  489. '@<meta\s+content="(?:[\w/]+)(?:;\s*?charset=([^\s"]+))"?\s+http-equiv="Content-Type"@i',
  490. '@<meta [^>]*charset\s*=\s*["\']?\s*([^"\' ]+)@i',
  491. );
  492. foreach($metatags as $metatag) {
  493. if (preg_match($metatag, $str, $matches)) break;
  494. }
  495. if (mb_detect_encoding($str) == '') {
  496. if (isset($matches[1])) {
  497. $encoding = strtoupper($matches[1]);
  498. }
  499. else {
  500. $encoding = 'UTF-8';
  501. }
  502. }
  503. else {
  504. if ( isset($matches[1]) ) {
  505. $encoding = strtoupper($matches[1]);
  506. }
  507. else {
  508. $encoding = 'auto';
  509. }
  510. }
  511. if ( $encoding !== 'UTF-8' ) {
  512. $str = mb_convert_encoding($str, 'UTF-8', $encoding);
  513. }
  514. if ( isset($matches[1]) ) {
  515. $str = preg_replace('/charset=([^\s"]+)/i', 'charset=UTF-8', $str);
  516. }
  517. else {
  518. $str = str_replace('<head>', '<head><meta http-equiv="Content-Type" content="text/html;charset=UTF-8">', $str);
  519. }
  520. }
  521. else {
  522. $encoding = 'UTF-8';
  523. }
  524. // remove BOM mark from UTF-8, it's treated as document text by DOMDocument
  525. // FIXME: roll this into the encoding detection using UTF-8/16/32 BOM (http://us2.php.net/manual/en/function.mb-detect-encoding.php#91051)?
  526. if ( substr($str, 0, 3) == chr(0xEF).chr(0xBB).chr(0xBF) ) {
  527. $str = substr($str, 3);
  528. }
  529. // if the document contains non utf-8 with a utf-8 meta tag chars and was
  530. // detected as utf-8 by mbstring, problems could happen.
  531. // http://devzone.zend.com/article/8855
  532. if ( $encoding !== 'UTF-8' ) {
  533. $re = '/<meta ([^>]*)((?:charset=[^"\' ]+)([^>]*)|(?:charset=["\'][^"\' ]+["\']))([^>]*)>/i';
  534. $str = preg_replace($re, '<meta $1$3>', $str);
  535. }
  536. // Store parsing warnings as messages
  537. set_error_handler("record_warnings");
  538. // @todo Take the quirksmode into account
  539. // http://hsivonen.iki.fi/doctype/
  540. // https://developer.mozilla.org/en/mozilla's_quirks_mode
  541. $quirksmode = false;
  542. if ( $this->get_option("enable_html5_parser") ) {
  543. $tokenizer = new HTML5_Tokenizer($str);
  544. $tokenizer->parse();
  545. $doc = $tokenizer->save();
  546. // Remove #text children nodes in nodes that shouldn't have
  547. $tag_names = array("html", "table", "tbody", "thead", "tfoot", "tr");
  548. foreach($tag_names as $tag_name) {
  549. $nodes = $doc->getElementsByTagName($tag_name);
  550. foreach($nodes as $node) {
  551. self::remove_text_nodes($node);
  552. }
  553. }
  554. $quirksmode = ($tokenizer->getTree()->getQuirksMode() > HTML5_TreeBuilder::NO_QUIRKS);
  555. }
  556. else {
  557. // loadHTML assumes ISO-8859-1 unless otherwise specified, but there are
  558. // bugs in how DOMDocument determines the actual encoding. Converting to
  559. // HTML-ENTITIES prior to import appears to resolve the issue.
  560. // http://devzone.zend.com/1538/php-dom-xml-extension-encoding-processing/ (see #4)
  561. // http://stackoverflow.com/a/11310258/264628
  562. $doc = new DOMDocument();
  563. $doc->preserveWhiteSpace = true;
  564. $doc->loadHTML( mb_convert_encoding( $str , 'HTML-ENTITIES' , 'UTF-8' ) );
  565. // If some text is before the doctype, we are in quirksmode
  566. if ( preg_match("/^(.+)<!doctype/i", ltrim($str), $matches) ) {
  567. $quirksmode = true;
  568. }
  569. // If no doctype is provided, we are in quirksmode
  570. elseif ( !preg_match("/^<!doctype/i", ltrim($str), $matches) ) {
  571. $quirksmode = true;
  572. }
  573. else {
  574. // HTML5 <!DOCTYPE html>
  575. if ( !$doc->doctype->publicId && !$doc->doctype->systemId ) {
  576. $quirksmode = false;
  577. }
  578. // not XHTML
  579. if ( !preg_match("/xhtml/i", $doc->doctype->publicId) ) {
  580. $quirksmode = true;
  581. }
  582. }
  583. }
  584. $this->_xml = $doc;
  585. $this->_quirksmode = $quirksmode;
  586. $this->_tree = new Frame_Tree($this->_xml);
  587. restore_error_handler();
  588. $this->restore_locale();
  589. }
  590. static function remove_text_nodes(DOMNode $node) {
  591. $children = array();
  592. for ($i = 0; $i < $node->childNodes->length; $i++) {
  593. $child = $node->childNodes->item($i);
  594. if ( $child->nodeName === "#text" ) {
  595. $children[] = $child;
  596. }
  597. }
  598. foreach($children as $child) {
  599. $node->removeChild($child);
  600. }
  601. }
  602. /**
  603. * Builds the {@link Frame_Tree}, loads any CSS and applies the styles to
  604. * the {@link Frame_Tree}
  605. */
  606. protected function _process_html() {
  607. $this->_tree->build_tree();
  608. $this->_css->load_css_file(Stylesheet::DEFAULT_STYLESHEET, Stylesheet::ORIG_UA);
  609. $acceptedmedia = Stylesheet::$ACCEPTED_GENERIC_MEDIA_TYPES;
  610. $acceptedmedia[] = $this->get_option("default_media_type");
  611. // <base href="" />
  612. $base_nodes = $this->_xml->getElementsByTagName("base");
  613. if ( $base_nodes->length && ($href = $base_nodes->item(0)->getAttribute("href")) ) {
  614. list($this->_protocol, $this->_base_host, $this->_base_path) = explode_url($href);
  615. }
  616. // Set the base path of the Stylesheet to that of the file being processed
  617. $this->_css->set_protocol($this->_protocol);
  618. $this->_css->set_host($this->_base_host);
  619. $this->_css->set_base_path($this->_base_path);
  620. // Get all the stylesheets so that they are processed in document order
  621. $xpath = new DOMXPath($this->_xml);
  622. $stylesheets = $xpath->query("//*[name() = 'link' or name() = 'style']");
  623. foreach($stylesheets as $tag) {
  624. switch (strtolower($tag->nodeName)) {
  625. // load <link rel="STYLESHEET" ... /> tags
  626. case "link":
  627. if ( mb_strtolower(stripos($tag->getAttribute("rel"), "stylesheet") !== false) || // may be "appendix stylesheet"
  628. mb_strtolower($tag->getAttribute("type")) === "text/css" ) {
  629. //Check if the css file is for an accepted media type
  630. //media not given then always valid
  631. $formedialist = preg_split("/[\s\n,]/", $tag->getAttribute("media"),-1, PREG_SPLIT_NO_EMPTY);
  632. if ( count($formedialist) > 0 ) {
  633. $accept = false;
  634. foreach ( $formedialist as $type ) {
  635. if ( in_array(mb_strtolower(trim($type)), $acceptedmedia) ) {
  636. $accept = true;
  637. break;
  638. }
  639. }
  640. if (!$accept) {
  641. //found at least one mediatype, but none of the accepted ones
  642. //Skip this css file.
  643. continue;
  644. }
  645. }
  646. $url = $tag->getAttribute("href");
  647. $url = build_url($this->_protocol, $this->_base_host, $this->_base_path, $url);
  648. $this->_css->load_css_file($url, Stylesheet::ORIG_AUTHOR);
  649. }
  650. break;
  651. // load <style> tags
  652. case "style":
  653. // Accept all <style> tags by default (note this is contrary to W3C
  654. // HTML 4.0 spec:
  655. // http://www.w3.org/TR/REC-html40/present/styles.html#adef-media
  656. // which states that the default media type is 'screen'
  657. if ( $tag->hasAttributes() &&
  658. ($media = $tag->getAttribute("media")) &&
  659. !in_array($media, $acceptedmedia) ) {
  660. continue;
  661. }
  662. $css = "";
  663. if ( $tag->hasChildNodes() ) {
  664. $child = $tag->firstChild;
  665. while ( $child ) {
  666. $css .= $child->nodeValue; // Handle <style><!-- blah --></style>
  667. $child = $child->nextSibling;
  668. }
  669. }
  670. else {
  671. $css = $tag->nodeValue;
  672. }
  673. $this->_css->load_css($css);
  674. break;
  675. }
  676. }
  677. }
  678. /**
  679. * Sets the paper size & orientation
  680. *
  681. * @param string $size 'letter', 'legal', 'A4', etc. {@link CPDF_Adapter::$PAPER_SIZES}
  682. * @param string $orientation 'portrait' or 'landscape'
  683. */
  684. function set_paper($size, $orientation = "portrait") {
  685. $this->_paper_size = $size;
  686. $this->_paper_orientation = $orientation;
  687. }
  688. /**
  689. * Enable experimental caching capability
  690. * @access private
  691. */
  692. function enable_caching($cache_id) {
  693. $this->_cache_id = $cache_id;
  694. }
  695. /**
  696. * Sets callbacks for events like rendering of pages and elements.
  697. * The callbacks array contains arrays with 'event' set to 'begin_page',
  698. * 'end_page', 'begin_frame', or 'end_frame' and 'f' set to a function or
  699. * object plus method to be called.
  700. *
  701. * The function 'f' must take an array as argument, which contains info
  702. * about the event.
  703. *
  704. * @param array $callbacks the set of callbacks to set
  705. */
  706. function set_callbacks($callbacks) {
  707. if (is_array($callbacks)) {
  708. $this->_callbacks = array();
  709. foreach ($callbacks as $c) {
  710. if (is_array($c) && isset($c['event']) && isset($c['f'])) {
  711. $event = $c['event'];
  712. $f = $c['f'];
  713. if (is_callable($f) && is_string($event)) {
  714. $this->_callbacks[$event][] = $f;
  715. }
  716. }
  717. }
  718. }
  719. }
  720. /**
  721. * Get the quirks mode
  722. *
  723. * @return boolean true if quirks mode is active
  724. */
  725. function get_quirksmode(){
  726. return $this->_quirksmode;
  727. }
  728. function parse_default_view($value) {
  729. $valid = array("XYZ", "Fit", "FitH", "FitV", "FitR", "FitB", "FitBH", "FitBV");
  730. $options = preg_split("/\s*,\s*/", trim($value));
  731. $default_view = array_shift($options);
  732. if ( !in_array($default_view, $valid) ) {
  733. return false;
  734. }
  735. $this->set_default_view($default_view, $options);
  736. return true;
  737. }
  738. /**
  739. * Renders the HTML to PDF
  740. */
  741. function render() {
  742. $this->save_locale();
  743. $log_output_file = $this->get_option("log_output_file");
  744. if ( $log_output_file ) {
  745. if ( !file_exists($log_output_file) && is_writable(dirname($log_output_file)) ) {
  746. touch($log_output_file);
  747. }
  748. $this->_start_time = microtime(true);
  749. ob_start();
  750. }
  751. //enable_mem_profile();
  752. $this->_process_html();
  753. $this->_css->apply_styles($this->_tree);
  754. // @page style rules : size, margins
  755. $page_styles = $this->_css->get_page_styles();
  756. $base_page_style = $page_styles["base"];
  757. unset($page_styles["base"]);
  758. foreach($page_styles as $_page_style) {
  759. $_page_style->inherit($base_page_style);
  760. }
  761. if ( is_array($base_page_style->size) ) {
  762. $this->set_paper(array(0, 0, $base_page_style->size[0], $base_page_style->size[1]));
  763. }
  764. $this->_pdf = Canvas_Factory::get_instance($this, $this->_paper_size, $this->_paper_orientation);
  765. Font_Metrics::init($this->_pdf);
  766. if ( $this->get_option("enable_font_subsetting") && $this->_pdf instanceof CPDF_Adapter ) {
  767. foreach ($this->_tree->get_frames() as $frame) {
  768. $style = $frame->get_style();
  769. $node = $frame->get_node();
  770. // Handle text nodes
  771. if ( $node->nodeName === "#text" ) {
  772. $this->_pdf->register_string_subset($style->font_family, $node->nodeValue);
  773. continue;
  774. }
  775. // Handle generated content (list items)
  776. if ( $style->display === "list-item" ) {
  777. $chars = List_Bullet_Renderer::get_counter_chars($style->list_style_type);
  778. $this->_pdf->register_string_subset($style->font_family, $chars);
  779. continue;
  780. }
  781. // Handle other generated content (pseudo elements)
  782. // FIXME: This only captures the text of the stylesheet declaration,
  783. // not the actual generated content, and forces all possible counter
  784. // values. See notes in issue #750.
  785. if ( $frame->get_node()->nodeName == "dompdf_generated" ) {
  786. // all possible counter values
  787. $chars = List_Bullet_Renderer::get_counter_chars('decimal');
  788. $this->_pdf->register_string_subset($style->font_family, $chars);
  789. $chars = List_Bullet_Renderer::get_counter_chars('upper-alpha');
  790. $this->_pdf->register_string_subset($style->font_family, $chars);
  791. $chars = List_Bullet_Renderer::get_counter_chars('lower-alpha');
  792. $this->_pdf->register_string_subset($style->font_family, $chars);
  793. $chars = List_Bullet_Renderer::get_counter_chars('lower-greek');
  794. $this->_pdf->register_string_subset($style->font_family, $chars);
  795. // the text of the stylesheet declaration
  796. $this->_pdf->register_string_subset($style->font_family, $style->content);
  797. continue;
  798. }
  799. }
  800. }
  801. $root = null;
  802. foreach ($this->_tree->get_frames() as $frame) {
  803. // Set up the root frame
  804. if ( is_null($root) ) {
  805. $root = Frame_Factory::decorate_root( $this->_tree->get_root(), $this );
  806. continue;
  807. }
  808. // Create the appropriate decorators, reflowers & positioners.
  809. Frame_Factory::decorate_frame($frame, $this, $root);
  810. }
  811. // Add meta information
  812. $title = $this->_xml->getElementsByTagName("title");
  813. if ( $title->length ) {
  814. $this->_pdf->add_info("Title", trim($title->item(0)->nodeValue));
  815. }
  816. $metas = $this->_xml->getElementsByTagName("meta");
  817. $labels = array(
  818. "author" => "Author",
  819. "keywords" => "Keywords",
  820. "description" => "Subject",
  821. );
  822. foreach($metas as $meta) {
  823. $name = mb_strtolower($meta->getAttribute("name"));
  824. $value = trim($meta->getAttribute("content"));
  825. if ( isset($labels[$name]) ) {
  826. $this->_pdf->add_info($labels[$name], $value);
  827. continue;
  828. }
  829. if ( $name === "dompdf.view" && $this->parse_default_view($value) ) {
  830. $this->_pdf->set_default_view($this->_default_view, $this->_default_view_options);
  831. }
  832. }
  833. $root->set_containing_block(0, 0, $this->_pdf->get_width(), $this->_pdf->get_height());
  834. $root->set_renderer(new Renderer($this));
  835. // This is where the magic happens:
  836. $root->reflow();
  837. // Clean up cached images
  838. Image_Cache::clear();
  839. global $_dompdf_warnings, $_dompdf_show_warnings;
  840. if ( $_dompdf_show_warnings ) {
  841. echo '<b>DOMPDF Warnings</b><br><pre>';
  842. foreach ($_dompdf_warnings as $msg) {
  843. echo $msg . "\n";
  844. }
  845. echo $this->get_canvas()->get_cpdf()->messages;
  846. echo '</pre>';
  847. flush();
  848. }
  849. $this->restore_locale();
  850. }
  851. /**
  852. * Add meta information to the PDF after rendering
  853. */
  854. function add_info($label, $value) {
  855. if ( !is_null($this->_pdf) ) {
  856. $this->_pdf->add_info($label, $value);
  857. }
  858. }
  859. /**
  860. * Writes the output buffer in the log file
  861. *
  862. * @return void
  863. */
  864. private function write_log() {
  865. $log_output_file = $this->get_option("log_output_file");
  866. if ( !$log_output_file || !is_writable($log_output_file) ) {
  867. return;
  868. }
  869. $frames = Frame::$ID_COUNTER;
  870. $memory = DOMPDF_memory_usage() / 1024;
  871. $time = (microtime(true) - $this->_start_time) * 1000;
  872. $out = sprintf(
  873. "<span style='color: #000' title='Frames'>%6d</span>".
  874. "<span style='color: #009' title='Memory'>%10.2f KB</span>".
  875. "<span style='color: #900' title='Time'>%10.2f ms</span>".
  876. "<span title='Quirksmode'> ".
  877. ($this->_quirksmode ? "<span style='color: #d00'> ON</span>" : "<span style='color: #0d0'>OFF</span>").
  878. "</span><br />", $frames, $memory, $time);
  879. $out .= ob_get_clean();
  880. $log_output_file = $this->get_option("log_output_file");
  881. file_put_contents($log_output_file, $out);
  882. }
  883. /**
  884. * Streams the PDF to the client
  885. *
  886. * The file will open a download dialog by default. The options
  887. * parameter controls the output. Accepted options are:
  888. *
  889. * 'Accept-Ranges' => 1 or 0 - if this is not set to 1, then this
  890. * header is not included, off by default this header seems to
  891. * have caused some problems despite the fact that it is supposed
  892. * to solve them, so I am leaving it off by default.
  893. *
  894. * 'compress' = > 1 or 0 - apply content stream compression, this is
  895. * on (1) by default
  896. *
  897. * 'Attachment' => 1 or 0 - if 1, force the browser to open a
  898. * download dialog, on (1) by default
  899. *
  900. * @param string $filename the name of the streamed file
  901. * @param array $options header options (see above)
  902. */
  903. function stream($filename, $options = null) {
  904. $this->save_locale();
  905. $this->write_log();
  906. if ( !is_null($this->_pdf) ) {
  907. $this->_pdf->stream($filename, $options);
  908. }
  909. $this->restore_locale();
  910. }
  911. /**
  912. * Returns the PDF as a string
  913. *
  914. * The file will open a download dialog by default. The options
  915. * parameter controls the output. Accepted options are:
  916. *
  917. *
  918. * 'compress' = > 1 or 0 - apply content stream compression, this is
  919. * on (1) by default
  920. *
  921. *
  922. * @param array $options options (see above)
  923. *
  924. * @return string
  925. */
  926. function output($options = null) {
  927. $this->save_locale();
  928. $this->write_log();
  929. if ( is_null($this->_pdf) ) {
  930. return null;
  931. }
  932. $output = $this->_pdf->output( $options );
  933. $this->restore_locale();
  934. return $output;
  935. }
  936. /**
  937. * Returns the underlying HTML document as a string
  938. *
  939. * @return string
  940. */
  941. function output_html() {
  942. return $this->_xml->saveHTML();
  943. }
  944. }