Changeset 489014
- Timestamp:
- 01/12/2012 09:22:49 PM (14 years ago)
- File:
-
- 1 edited
-
latex-everything/trunk/html-to-latex.php (modified) (4 diffs)
Legend:
- Unmodified
- Added
- Removed
-
latex-everything/trunk/html-to-latex.php
r489013 r489014 18 18 'code' => Array ( 'handler' => 'environment', 'tex' => 'verbatim' ), 19 19 'dd' => Array ( 'handler' => 'other', 'tex' => Array( '', 20 "\n" ) ),20 "\n" ) ), 21 21 'dl' => Array ( 'handler' => 'environment', 'tex' => 'description' ), 22 22 'dt' => Array ( 'handler' => 'other', 'tex' => Array( '\item', 23 ']' ) ),23 ']' ) ), 24 24 'em' => Array ( 'handler' => 'command', 'tex' => 'emph' ), 25 25 'h1' => Array ( 'handler' => 'command', 'tex' => 'section*' ), … … 31 31 'hr' => Array ( 'handler' => 'single', 'tex' => '\hline' ), 32 32 'i' => Array ( 'handler' => 'command', 'tex' => 'emph' ), 33 //'img' => Array ( 'handler' => 'image', 'tex' => 'includegraphics' ),33 //'img' => Array ( 'handler' => 'image', 'tex' => 'includegraphics' ), 34 34 'li' => Array ( 'handler' => 'single', 'tex' => '\item' ), 35 35 'ol' => Array ( 'handler' => 'environment', 'tex' => 'enumerate' ), … … 38 38 'script' => Array ( 'handler' => 'ignore', 'tex' => '' ), 39 39 'strong' => Array ( 'handler' => 'command', 'tex' => 'textbf' ), 40 //'table' => Array ( 'handler' => 'table', 'tex' => 'table' ),41 //'td' => Array ( 'handler' => 'table', 'tex' => 'tr' ),40 'table' => Array ( 'handler' => 'table', 'tex' => 'table' ), 41 'td' => Array ( 'handler' => 'table', 'tex' => 'tr' ), 42 42 'title' => Array ( 'handler' => 'command', 'tex' => 'title' ), 43 //'tr' => Array ( 'handler' => 'table', 'tex' => 'td' ),43 'tr' => Array ( 'handler' => 'table', 'tex' => 'td' ), 44 44 'ul' => Array ( 'handler' => 'environment', 'tex' => 'itemize' ), 45 45 ); … … 126 126 } 127 127 128 function _table_handler( $element, $tex ) { 129 $output = ''; 130 if ( $tex == 'table' ) { 131 $output = $this->_create_latex_table( $element ); 132 } else { 133 // It's a tr or td. Create_latex_table does all of the work, 134 // so we just output the texified content. 135 $output = $this->_texify( $element ); 136 } 137 return $output; 138 } 139 140 function _create_latex_table( $table ) { 141 $output = ''; 142 143 // Find the size of the table 144 $rows = $table->getElementsByTagName('tr'); 145 $row_count = $rows->length; 146 147 $column_count = 0; 148 foreach( $rows as $row ) { 149 $columns = $this->_get_tr_columns( $row ); 150 if( $columns->length > $column_count ) 151 $column_count = $columns->length; 152 } 153 154 // Create column alignments for every column based on the first row 155 $column_alignments = ''; 156 if( $row = $rows->item(0) ) { 157 $columns = $this->_get_tr_columns( $row ); 158 for( $c = 0; $c < $column_count; ++$c ) { 159 $column = $columns->item( $c ); 160 if( $column ) { 161 $align = $column->getAttribute( 'align' ); 162 if( $align == 'right' ) 163 $align = 'r'; 164 else if( $align == 'center' ) 165 $align = 'c'; 166 else 167 $align = 'l'; 168 } else { // No colums at this index on the first row, so repeat alignments 169 $align = substr( $column_alignments, -1 ) or $align = 'l'; 170 } 171 $column_alignments .= $align; 172 } 173 } 174 175 $output .= "\n\n\\begin{tabular}{{$column_alignments}}\n"; 176 $output .= "\\hline\n"; 177 178 for( $r = 0; $r < $row_count; ++$r ) { 179 $row = $rows->item( $r ); 180 $columns = $this->_get_tr_columns( $row ); 181 for ( $c = 0; $c < $column_count; ++$c ) { 182 $column = $columns->item( $c ); 183 // Write the contents 184 if ( $column ) 185 $output .= "{$this->_texify( $column )} "; 186 // Add punctuation between columns when not the last one 187 if( $c < $column_count - 1 ) 188 $output .= "& "; 189 } 190 // Add punctuation at the end of the row 191 $output .= "\\\\\n"; 192 // Add lines under header rows 193 if ( $column && $column->tagName == 'th' ) 194 $output .= "\\hline\n"; 195 } 196 197 $output .= "\\hline\n"; 198 $output .= "\\end{tabular}\n\n"; 199 200 return $output; 201 } 202 203 /* Returns the column from a <tr> element, regardless of whether 204 * they're <td> or <th> elements 205 */ 206 function _get_tr_columns( $row ) { 207 $columns = $row->getElementsByTagName('th'); 208 if ($columns->length == 0) 209 $columns = $row->getElementsByTagName('td'); 210 return $columns; 211 } 212 213 /* HTML: <img src="bar.png"> 214 * Latex: \includegraphic{bar.png} 215 */ 128 216 /* 129 function _table_handler( $element, $tex ) { 130 $output = ''; 131 if ( $tex == 'table' ) { 132 $output = _create_latex_table( $element ); 133 } else { 134 // It's a tr or td. Create_latex_table does all of the work, 135 // so we just output the texified content. 136 $output = $this->_texify( $element ); 137 } 138 return $output; 139 } 140 */ 141 142 /* HTML: <img src="bar.png"> 143 * Latex: \includegraphic{bar.png} 144 */ 145 /*function _image_handler( $element, $tex ) { 146 $source = _locate_image( $element->getAttribute( 'src' ) ); 147 $alt = $element->getAttribute( 'alt' ); 148 149 if ( $source ) { 150 return "\\{$tex}{{$soruce}}"; 151 } else { 152 // Image couldn't be found 153 return $alt; 154 } 155 156 } 157 */ 158 159 // Run on html text nodes before output 160 function quote_expansion_filter ( $text ) { 161 $text = preg_replace( '/([^\s\[\{\)~])"/', "$1''", $text ); 162 $text = preg_replace( '/"/', '``', $text ); 163 return $text; 164 } 165 166 // Run on html text nodes before output 167 function urlify_filter ( $text ) { 168 // Wraps urls in \url{} 169 // Lovingly stolen from http://daringfireball.net/2010/07/improved_regex_for_matching_urls 170 $pattern = '/(?i)\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))/'; 171 return preg_replace( $pattern, 172 '\url{$0}', 173 $text ); 174 } 217 function _image_handler( $element, $tex ) { 218 $source = $this->_locate_image( $element->getAttribute( 'src' ) ); 219 $alt = $element->getAttribute( 'alt' ); 220 221 if ( $source ) { 222 return "\\{$tex}{{$soruce}}"; 223 } else { 224 // Image couldn't be found 225 return $alt; 226 } 227 } 228 */ 229 230 // Run on html text nodes before output 231 function quote_expansion_filter ( $text ) { 232 $text = preg_replace( '/([^\s\[\{\)~])"/', "$1''", $text ); 233 $text = preg_replace( '/"/', '``', $text ); 234 return $text; 235 } 236 237 // Run on html text nodes before output 238 function urlify_filter ( $text ) { 239 // Wraps urls in \url{} 240 // Lovingly stolen from http://daringfireball.net/2010/07/improved_regex_for_matching_urls 241 $pattern = '/(?i)\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))/'; 242 return preg_replace( $pattern, 243 '\url{$0}', 244 $text ); 245 } 175 246 } 176 247
Note: See TracChangeset
for help on using the changeset viewer.