<?php
set_time_limit(-1
);
class Odt {
public static $pdo;
public static $log;
public static function tei($odt, $destFile=null) {
$xml=self::
odtx($odt, $destFile);
$params=array();
if ($destFile) $params['filename'
]=preg_replace( '/\.[^\.]*$/'
, ''
, basename($destFile) );
$xml=self::
xsl($xml, dirname(__FILE__
).'/odt_tei.xsl'
, $params);
$preg=self::
sed_preg(file_get_contents(dirname(__FILE__
).'/tei.sed'
));
$xml = preg_replace($preg[0
], $preg[1
], $xml);
$xml=self::
xsl($xml, dirname(__FILE__
).'/tei_post.xsl'
);
if ($destFile) file_put_contents($destFile, $xml);
return $xml;
}
public static function corr($odt) {
$xml=self::
tei($odt);
$xml=self::
xsl($xml, dirname(__FILE__
).'/tei_corr.xsl'
);
return $xml;
}
public static function philo3($odt) {
$xml=self::
tei($odt);
$xml=self::
xsl($xml, dirname(__FILE__
).'/tei_philo3.xsl'
);
return $xml;
}
public static function odtx($odt, $destFile=null) {
if (!extension_loaded("zip"
)) {
echo '<p class="error">Cette fonction nécessite l\'extension PHP zip.</p>'
;
return;
}
$zip = new ZipArchive();
if (!$zip->
open($odt)) {
echo '<p class="error">'
.$odt.' non trouvé.</p>'
;
return false;
}
$xml=''
;
$xml .= $zip->
getFromName('meta.xml'
);
$xml .= $zip->
getFromName('styles.xml'
);
$xml .= $zip->
getFromName('content.xml'
);
$preg=self::
sed_preg(file_get_contents(dirname(__FILE__
).'/odtx.sed'
));
$xml = preg_replace($preg[0
], $preg[1
], $xml);
$xml='<?xml version="1.0" encoding="UTF-8"?>
<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0">
'
.$xml."\n</office:document>"
;
if ($destFile) {
$destName=preg_replace( '/\.[^\.]*$/'
, ''
, basename($destFile) );
$destDir=dirname($destFile).'/'
.$destName;
$entries=array();
for($i = $zip->
numFiles -1
; $i >=
0
; $i--) {
if (strpos($zip->
getNameIndex($i), 'Pictures/'
) !==
0
) continue;
$entries[]=$zip->
getNameIndex($i);
echo '<textarea> coucou ?'
;
print_r($entries);
echo '</textarea>'
;
}
}
$zip->
close();
return $xml;
}
public static function sed_preg($script) {
$search=array();
$replace=array();
$lines=explode("\n"
, $script);
$lines=array_filter($lines, 'trim'
);
foreach($lines as $l){
if ($l[0
] !=
's'
) continue;
list($a,$s,$r)=explode($l[1
], $l);
$search[]=$l[1
].$s.$l[1
].'u'
;
$replace[]=preg_replace('/\\\\([0-9]+)/'
, '\\$$1'
, $r);
}
return array($search, $replace);
}
public static function html($odt) {
$xml=self::
tei($odt);
$xsl=dirname(__FILE__
)."/../../transform/tei_html.xsl"
;
if (file_exists($xsl)) return self::
xsl(
$xml,
$xsl
);
return self::
xsl($xml, "http://subversion.cru.fr/diple/trunk/transform/tei_html.xsl"
);
}
public static function actesroyaux($odt) {
$xml=self::
tei($odt);
return $xml;
}
public static function ngml($odt) {
$xml=self::
norm($odt);
$preg=array(
'@ type="N.G.M.L."@'
=> ''
, '@</?(ref|bg_)[^>]*>@'
=> ''
, '@<num>id.</num>@'
=> '<name>id.</name>'
,
'@ <hi>@'
=> '<hi> '
, '@(?<=[ ])(cf\.|f\.|m\.|n\.|v\.|v\. lect\.|var\. lect\.|s\.? ?v\.|sc\.)@'
=> '<abbr>$1</abbr>'
, '@(?<=[ ])(\(?)(absol\.|au fig\.|au plur\.|au propre|au sing\.|avec gén\.|avec inf\.|dans l\'expr\.|dans les expr\.|en général|en part\.|fautif pour|par ext\.|par métaph\.|par méton\.|péj\.|sens abstrait|sens actif|sens concret|sens passif|spéc\.")(\)?)@'
=> '<usg>$1$2$3</usg>'
,
'@(?<!>)\([a-zéèêâ]+\.\)@u'
=> '<domain>$0</domain>'
, '@(?<=[ ])(adj\.|adv\.|comp\.|indécl\.|sive|superl\.)@'
=> '<foreign>$1</foreign>'
, '@(?<=<hi>)([ ]*)<(abbr|domain|foreign|usg)>([^<]+)</\2>([ ]*)@'
=> '</hi>$1<$2>$3</$2>$4<hi>'
, '@<hi>([ ]*)<(abbr|usg|foreign)>([^<]+)</\2>([ ]*)@'
=> '$1<$2>$3</$2>$4<hi>'
,
'@([ ]*)<(abbr|usg|foreign)>([^<]+)</\2>([ ]*)</hi>@'
=> '</hi>$1<$2>$3</$2>$4'
,
'@<hi>([, :;]+)@u'
=> '$1<hi>'
, '@<hi>[\(\) :]*</hi>@u'
=> ''
,
'@\(?DuC\)?@'
=> '<ref>$0</ref>'
, '@FEW *<num>[^<]+</num>( *[0-9pb\.\-]+)?@'
=> '<ref>$0</ref>'
, '@(?<!TLL )<abbr>(v\.|s\.v\.)</abbr> ([^\]\[: <]+)@u'
=> '<xr rend="$1">$2</xr>'
, '@(</xr> *<hi>et</hi> )([^<]+)@u'
=> '$1<xr>$2</xr>'
, '@</xr>( *<hi>[IV]+</hi>)@'
=> '$1</xr>'
, '@([\.\]\( ]+)</xr>@u'
=> '</xr>$1'
,
'@<p>\s*<ident>([1-9]\. )?([a-z][a-zç\(\)]+)</ident>@u'
=> '<p><orth>$1$2</orth>'
, '@</body>@'
=> ' </entry>
</body>'
, '@<p><orth>@'
=> ' </entry>
<entry>
<p><orth>'
, '@<body>\n </entry>@'
=> '<body>
'
, '@<ident>(.|II|III|IV|VI|VII|VIII)</ident>@u'
=> "\n"
.'<n>$1</n>'
, '@<ident>([^<]+)</ident>@u'
=> '<orth>$1</orth>'
, '@(: )([a-z]*?)( :)@'
=> '$1<orth>$2</orth>$3'
, '@(<hi>forme</hi> )([a-z]*?)( :)@'
=> '$1<orth>$2</orth>$3'
,
'@[\[\(]([ac]\. )?[0-9-—–]+[\]\)]@u'
=> '<date>$0</date>'
, '@ [ac]\. [0-9]{3,4}@'
=> ' <date>$0</date>'
, '@(?<!>)\[.*?\](?!</date>)@'
=> '<etym>$0</etym>'
, '@<etym>(\[[0-9\.p ]+\])</etym>@u'
=> '$1'
, '@(\([^(]*?)<etym>(\[.*?\])</etym>@'
=> '$1$2'
,
'@</phr>([a-z]{1})@'
=> '$1</phr>'
,
'@ ib\.@'
=> ' <name>ib.</name>'
, '@[\[\(]\?[\]\)]@'
=> '⟨?⟩'
, '@(<name>.+?)(?=:|<bg|<hi>|<def>|<etym>|<n>|<name>|<note|<usg|<xr|\]</etym>|\][^<]|</p>)@'
=> '<bibl>$1</bibl>'
, '@(\([^\)\<\>]+)</bibl>( *\))@'
=> '$1$2</bibl>'
, '@([ ]+)</bibl>@u'
=> '</bibl>$1'
, '@</bibl>\.@'
=> '.</bibl>'
, '@</bibl>\]@'
=> ']</bibl>'
, '@<name>(Carta|Chron.)</name> <date>[^<]+</date>( <ref>\(DuC\)</ref>)?@'
=> '<sigle>$0</sigle>'
,
'@</bibl> : *(...*?)(?=:[ ][^a-z«‑]|.<abbr>cf.|<bibl>|<etym>|\]</etym>| <hi|<n>|<note|<phr>|</p>|<usg|<xr|\r|\n)@'
=> '</bibl> : <quote>$1</quote>'
,
'@<quote><bibl>(.+?)</quote>@'
=> '<bibl>$1'
, '@<quote><xr(.+?)</quote>@'
=> '<xr$1'
,
'@</orth>([^>]+<abbr>[fmn]\.</abbr>)@'
=> '</orth><gram>$1</gram>'
, '@</orth>([ ,][^\<\>]+)@'
=> '</orth><gram>$1</gram>'
, '@\[</gram>@'
=> '</gram>['
,
'@<gram>([ ,]+)@'
=> '$1<gram>'
, '@ +</gram>@'
=> '</gram> '
,
);
$xml= preg_replace(
array_keys($preg),
array_values($preg),
$xml
);
$base=dirname(__FILE__
).'/scriptores.sqlite'
;
if (file_exists($base)) {
self::
$keyStop=array(
"Arist. eth."
=>""
,
"Aug. c. acad."
=>""
,
"Aug. civ."
=>""
,
"Aug. cons. evang."
=>""
,
"Aug. enchir."
=>""
,
"Aug. in evang. Ioh."
=>""
,
"Aug. in psalm."
=>""
,
"Aug. retract."
=>""
,
"Aug. serm."
=>""
,
"Aug. tract."
=>""
,
"Aug. vera relig."
=>""
,
"Beda hex."
=>""
,
"Beda homil. evang."
=>""
,
"Beda metr."
=>""
,
"Beda tabern."
=>""
,
"Beda temp. rat."
=>""
,
"Cassiod. in psalm."
=>""
,
"Char. gramm."
=>""
,
"Cic. inv."
=>""
,
"Cic. Lael."
=>""
,
"Diosc."
=>""
,
"Fest."
=>""
,
"Fulg. myth."
=>""
,
"Hier. c. Ioh."
=>""
,
"Hier. epist."
=>""
,
"Hier. in Dan."
=>""
,
"Hier. in Is."
=>""
,
"Hier. in Matth."
=>""
,
"Hier. in psalm."
=>""
,
"Hier. nom. hebr."
=>""
,
"Hier. onom. num."
=>""
,
"Hier. pref. Vulg. Ezech."
=>""
,
"Hier. vita Pauli"
=>""
,
"Hor. ars"
=>""
,
"Hor. carm. I"
=>""
,
"Hor. sat."
=>""
,
"Hyg. fab."
=>""
,
"Hygin. astron."
=>""
,
"Isid. diff."
=>""
,
"Isid. etym."
=>""
,
"Isid. reg. monach."
=>""
,
"Mart."
=>""
,
"Mart. Cap."
=>""
,
"Mart. Cap. I"
=>""
,
"Mart. epigr."
=>""
,
"Max. Conf."
=>""
,
"Max. Taur."
=>""
,
"Plaut. Aul."
=>""
,
"Plaut. Capt."
=>""
,
"Plin. epist."
=>""
,
"Prisc."
=>""
,
"Prisc. gramm."
=>""
,
"Prisc. gramm. II"
=>""
,
"Rufin. hist."
=>""
,
"Rufin. Orig. in gen."
=>""
,
"Rufin. Orig. in Rom. pref. Rufin."
=>""
,
"Rufin. patr."
=>""
,
"Sen. epist."
=>""
,
"Serv. ecl."
=>""
,
"Sidon. epist."
=>""
,
"Suet. Nero"
=>""
,
"Tert. orat."
=>""
,
"Varro"
=>""
,
"Vell."
=>""
,
"Ven. Fort. carm."
=>""
,
"Ven. Fort. vita Radeg."
=>""
,
"Verg. Aen."
=>""
,
"Verg. georg."
=>""
,
"Vet. Lat. Eph."
=>""
,
"Vet. Lat. exod."
=>""
,
"Vet. Lat. Sirach"
=>""
,
);
self::
$pdo=new PDO('sqlite:'
.dirname(__FILE__
).'/scriptores.sqlite'
);
self::
$keyLike=self::
$pdo->
prepare('SELECT sigle FROM siglae WHERE norm LIKE ?'
);
self::
$keyEl="sigle"
;
self::
$keyOut=fopen("php://output"
, "w"
);
print "<!-- "
;
$xml = preg_replace_callback(
'@(?<!<sigle>)<name>.*?(?=</bibl>| <| s.v.| p\.| col\.| t\.| f\.| P [0-9<]| [0-9]| p[0-9]| \(éd.)@'
,
array(__CLASS__
, 'key'
), $xml
);
print "-->"
;
if (is_resource(self::
$keyOut)) fclose(self::
$keyOut);
}
return $xml;
}
static $keyEl="name"
;
static $keyExact;
static $keyLike;
static $keyStop=array();
static $keyOut;
static $keyCount=1
;
public static function key($matches) {
$preg=array(
'@<[^>]+>@'
=> ''
,
'@[\*\(\)\?⟨⟩]@u'
=> ''
,
'@^ib\..*@'
=> ''
,
'@[ :\(]+$@u'
=> ''
,
'@\s+@'
=> ' '
,
);
$regex=array_keys($preg);
$replace=array_values($preg);
$value=$matches[0
];
$after=""
; $i=10
; while ($value &&
$i) {
$count=0
; $key=preg_replace($regex, $replace, $value);
if (!$key) break;
if (isset(self::
$keyStop[$key])) {
break;
$key=""
;
}
self::
$keyLike->
execute(array($key)); $values=self::
$keyLike->
fetchAll(PDO::
FETCH_COLUMN);
if (!count($values)) {
self::
$keyLike->
execute(array( rtrim($key, '.'
).'%'
)); $values=self::
$keyLike->
fetchAll(PDO::
FETCH_COLUMN);
}
$count=count($values);
if ($count==
1
) { $key=$values[0
];
break;
}
if ($count > 1
) { $key=$key."*"
;
if (is_resource(self::
$keyOut)) {
fwrite(self::
$keyOut, "\n"
.self::
$keyCount++.' "'
.preg_replace($regex, $replace, $matches[0
]).'" : '
);
if ($count < 10
) fwrite(self::
$keyOut, implode($values, ', '
));
else fwrite(self::
$keyOut, $key." ("
. count($values).')'
);
}
break;
}
$i--;
$pos=strrpos($value, ' '
);
$after=" "
.substr($value, $pos+1
).$after;
$value=substr($value, 0
, $pos);
if (!$pos ||
!$value ||
(strrpos($value, '<name>'
) !==
false &&
!strrpos($value, '</name>'
) )
) {
$value=$matches[0
];
$after=""
;
if (is_resource(self::
$keyOut)) fwrite(self::
$keyOut, "\n"
.self::
$keyCount++.' "'
.preg_replace($regex, $replace, $matches[0
]).'" : ?'
);
$key="?"
;
break;
}
}
$att=""
;
if ($key) $att=' key="'
.$key.'"'
;
return '<'
.self::
$keyEl.$att.'>'
.$value.'</'
.self::
$keyEl.'>'
.$after;
}
public static function xsl($xml, $xsl_file, $params=null) {
$dom = new DOMDocument("1.0"
, "UTF-8"
);
$dom->
loadXML($xml);
$xsl = new DOMDocument();
$xsl->
load($xsl_file);
$proc = new XSLTProcessor();
$proc->
importStyleSheet($xsl);
if($params &&
count($params)) foreach ($params as $key => $value) $proc->
setParameter(''
, $key, $value);
return $proc->
transformToXML($dom);
}
static function error_handler( $errno, $errstr, $errfile, $errline, $errcontext) {
self::
$log .= $errstr."\n"
;
}
}
if (basename($_SERVER['SCRIPT_FILENAME'
]) !=
basename(__FILE__
));
else if (php_sapi_name() ==
"cli"
) {
array_shift($_SERVER['argv'
]); if (!count($_SERVER['argv'
])) exit('
usage : php -f Odt.php src.odt format? dest/?
src.odt : glob patterns are allowed, but in quotes, to not be expanded by shell "folder/*.odt"
format? : optional dest format, default tei, others may be odtx, html, ngml
'
);
$glob=array_shift($_SERVER['argv'
]);
$format=array_shift($_SERVER['argv'
]);
if (!$format) $format="tei"
;
$ext=".$format";
if ($ext==
'.tei'
) $ext=".xml"
;
foreach(glob($glob) as $odt) {
$dest=dirname($odt).'/'
.basename($odt, ".odt"
).$ext;
print "$odt > $dest\n";
file_put_contents($dest,
call_user_func_array(
array("Odt"
,$format),
array($odt)
)
);
}
}
else {
if (!count($_FILES)) exit;
reset($_FILES);
$tmp=current($_FILES);
if(!$file=$tmp['tmp_name'
]) exit;
if(isset($_REQUEST['format'
])) $format=$_REQUEST['format'
];
else $format="tei"
;
if (isset($_REQUEST['download'
])) {
header ("Content-Type: text/xml"
);
if (isset($tmp)) {
$name=$tmp['name'
];
$name=substr($name, 0
, strrpos($name, '.'
));
}
else $name="odt_tei.xml"
;
header('Content-Disposition: attachment; filename="'
.$name.'.xml"'
);
}
else if ($format ==
'html'
) header ("Content-Type: text/html; charset=UTF-8"
);
else {
header ("Content-Type: text/plain; charset=UTF-8"
);
}
$xml=call_user_func_array(
array("Odt"
,$format),
array($file)
);
$dom = new DOMDocument("1.0"
, "UTF-8"
);
$dom->
formatOutput=true;
$dom->
preserveWhiteSpace =false;
$dom->
recover=true;
$oldError=set_error_handler(array('Odt'
,"error_handler"
), E_ALL
);
$dom->
loadXML($xml, LIBXML_NOENT
| LIBXML_NONET
| LIBXML_NSCLEAN
| LIBXML_NOCDATA
| LIBXML_COMPACT
| LIBXML_PARSEHUGE
);
restore_error_handler();
echo $dom->
saveXML();
if (Odt::
$log) {
echo "<!--\n"
,Odt::
$log,'-->'
;
}
}
?>