dwww Home | Show directory contents | Find package

#!/usr/bin/perl

use strict;

use Getopt::Long;

use vars qw/$infile $outfile $code $tmpfile $fontencs $commands $r2lmode
    $combine $charset $tolerant $default_fontencs $default_charset
    $default_r2lmode $tolerant $tolerant_unknownchar @options @packages
    $title $cleanup $view $escape $print $perhapsnofile
    $fribidi_cmd
/;

$tmpfile = "u2ps-tmp";
$default_fontencs = 'local,X2,LGR,T1';
$default_charset = 'utf-8';
$default_r2lmode = 'mark';
$fribidi_cmd = "fribidi";


sub define($) {
    my $str = shift;
    return $str if defined $str;
    return '';
}

sub showhelp($) {
    my $topic = shift;
    $topic =~ s/^--//;
    if ($topic eq '') {
        print
'Converts an UTF-8 plain text to postscript using LaTeX and the ucs package.
Usage: u2ps [options] [input file] [output file]
Available options:

    --charset      Select input character set
    --combine      Enables combining characters
    --commands     Specify additional LaTeX-code to execute
    --escape       Allow inline LaTeX code
    --fontencs     Specify fontencodings to use
    --fontsize     Specify font size
    --fontstyle    Specify font style
    --options      Unicode options
    --package      Load package
    --print        Print file
    --r2l          Enables right-to-left code
    --tolerant     Accept files with broken input encoding
    --title        Set document title
    --view         View file using gv
    --help         This page, --help=<option> gives detailed help.

Defaults for input and output file are stdin resp. stdout.
';
    } elsif ($topic eq 'escape') {
        print 'Option --escape

Allow execution of inline LaTeX code. The code must be of the form
    ^[{<code>}
where ^[ is U+001B (ESC) and <code> a piece of LaTeX code, properly
nested with respect to braces.

Example: a ^[{\bgroup\Huge}Huge^[{\egroup} word
';
    } elsif ($topic eq 'print') {
        print 'Option --print[=<printer>]

Print output (using lpr). If no output file is explicitly given, do not
create an output file.

If a printer is given, print with lpr -P<printer>.
';
    } elsif ($topic eq 'view') {
        print 'Option --view

View output (using gv). If no output file is explicitly given, do not
create an output file.
';
    } elsif ($topic eq 'options') {
        print 'Option --options=<option>:

These are the Unicode options to be activated. They are passed as
options to the package ucs.

You can specify a priority by saying e.g. cjkjis=123. Options without
specified priority get one less than the preceding option, the first
getting 99 (one less than the priority for the option \'document\').
';
    } elsif ($topic eq 'package') {
        print "Option --package <string>:

Specifies a package to load. The following syntaxes for <string> are
allowed:

    packagename
    {packagename}
    [options]{packagename}

This option can be given several times to load different packages.
";
    } elsif ($topic eq 'title') {
        print "Option --title <string> (default: name of input file):

Specifies a title for the document. It is stored in the structured
comments of the resulting postscript file.

In future, the title may be also used in other contexts like headings
etc.
";
    } elsif ($topic eq 'fontencs') {
        print "Option --fontencs <string> (default: $default_fontencs):".'

The keywords given to this option are directly passed to the package autofe.

This package takes the same options as fontenc, i.e. you have to give a list
of all needed fontencodings, whereby the last should be the fontencoding
used mainly in the document.

If some ASCII characters are rendered incorrectly, try using a font
encoding beginning with T (e.g. T1, T2A, ...) as the last
fontencodingand perhaps additionally the option \'local\'.

You may give the fontencodings as a comma-separated list or by 
multiple calls of --fontencs.

Some usefull fontencodings are:
    T1     Most latin scripts
    T2A    Cyrillic scripts like russian
    T2B    Other cyrillic scripts
    T2C    Other cyrillic scripts
    T3     IPA
    T5     Vietnamese

The following fontencodings should be combined with local and T1
(e.g. local,X2,T1):

    LGR    Greek
    LHE    Hebrew
    TS1    Some symbols
    X2     All supported cyrillic scripts

If you are unsure, try running without any additional fontencodings and
read the hints in the error messages.
';
    } elsif ($topic eq 'commands') {
        print 'Option --commands <string>:

The commands given to --commands will be executed as LaTeX-code before
rendering the input file.

You may call --commands several times.
';
    } elsif ($topic eq 'fontstyle') {
        print 'Option --fontstyle <string>

This specifies the style of the fonts which are used to render the document.
Available arguments are:

    bold        (short: bf)
    italic      (short: it)
    sansserif   (short: sf)
    slanted     (short: sl)
    smallcaps   (short: sc)
    typewriter  (short: tt)

You may call --fontstyle several times to combine styles. No warning
will be uttered, if you choose a disallowed combination like 
italic and slanted.
';
    } elsif ($topic eq 'fontstyle') {
        print 'Option --fontsize <string>

This specifies the size of the fonts which are used to render the document.
Available arguments are:

    tiny scriptsize footnotesize smallcaps normal large Large LARGE huge Huge

The semantics are as with LaTeX
';
    } elsif ($topic eq 'r2l') {
        print "Option --r2l[=<mode>] (default <mode>: $default_r2lmode):".'

This enables preprocessing the input via the Unicode bidi algorithm to
support right-to-left text. <mode> can be one of:

    reorder    Reorders the characters of each line into visual order
    mark       Inserts LROs/RLOs/PDFs into the text to indicate the directions

To use \'reorder\', the program fribidi must be installed and in the
path. Combining accents work incorrectly with this mode. Line breaks
done by LaTeX may show improper right-to-left behaviour.

If you use \'mark\', you need to have rlmark in your path. Further
e-LaTeX is used instead of LaTeX.

Each line of the input is processed separately.

If this option is not given, right-to-left text will be displayed in
the wrong direction without warning.
';
    } elsif ($topic eq 'combine') {
        print 'Option --combine:

This enables combining characters. You have to use this option, if
your text contains non-precomposed characters, i.e. characters with
accents, which are given as a sequence of code positions.

Warning: Enabling this option will disable kerning and ligatures.

If you are unsure, wether you need this option, try without. You will
get the message
! Package ucs Error: Please activate option \'combine\'.
if this option is needed.
';
    } elsif ($topic eq 'help') {
        print 'Option --help[=<option>]:

Gives a summary of options or, with some option name
(e.g. --help=r2l), an explanation of that option.
';
    } elsif ($topic eq 'tolerant') {
        print 'Option --tolerant[=<replacement>]

Be tolerant with respect to broken input encodings. This passes the
switch -c to iconv(1). Additionally the option warnunknown is passed
to LaTeX, i.e. the document is rendered even if unknown characters
appear.

If <replacement> is given, this LaTeX-code is executed in place of
unknown characters. #1 gets replaced by the code position of the
character (as a TeX-number, prefix by \number if you want to render it).
';
    } elsif ($topic eq 'charset') {
        print 'Option --charset=<string> (default: $default_charset):

Selects the character set of the input. Everything accepted by
iconv(1) can be specified. Run iconv -l for a list.
';
    } else {
        print "No help provided for given topic\n";
    }
    exit 0;
}

sub parseopts() {
    my %opt;
    Getopt::Long::Configure(qw/bundling no_ignore_case/);
    unless (GetOptions(\%opt,qw/
                       fontencs=s@
                       title=s
                       commands=s@
                       fontstyle=s@
                       fontsize=s
                       r2l:s
                       combine
                       help|h:s
                       tolerant:s
                       charset=s
                       options=s@
                       package=s@
                       cleanup!
                       view!
                       print:s
                       escape!
                       /)) {
        die "Bad command line options";
    }
    $view = $opt{view};
    $print = $opt{print};
    $perhapsnofile = $view || defined $print;
    showhelp($opt{help}) if defined $opt{help};
    if ($#ARGV >= 2) {
        die "Give input and output file, not more";
    }
    $infile = $ARGV[0];
    $outfile = $ARGV[1];
    $outfile = "$tmpfile.ps" if ($perhapsnofile && !define $outfile);
    if (defined $infile && !defined $outfile) {
        $outfile = $infile;
        $outfile =~ s/\.[^.]+$//;
        $outfile .= ".ps";
    } elsif (!defined $infile) {
        $infile = "/dev/stdin";
        $outfile = "/dev/stdout" if !defined $outfile;
    }
    if (!-r $infile) {
        die "Input file $infile is not readable";
    }
    $fontencs = join ',', @{$opt{fontencs}} if defined $opt{fontencs};
    $fontencs = $default_fontencs unless defined $fontencs;
    $commands = $opt{commands};
    $commands = [] unless defined $commands;

    my %fontsizes = 
        ('large' => '\large',
         'Large' => '\Large',
         'tiny' => '\tiny',
         'scriptsize' => '\scriptsize',
         'footnotesize' => '\footnotesize',
         'normal' => '',
         'normalfont' => '',
         'small' => '\small',
         'huge' => '\huge',
         'Huge' => '\Huge');
    my %fontstyles = 
        ('typewriter' => '\ttfamily',
         'tt' => '\ttfamily',
         'bold' => '\bfseries',
         'bf' => '\bfseries',
         'italic' => '\itshape',
         'it' => '\itshape',
         'slanted' => '\slshape',
         'sl' => '\slshape',
         'sansserif' => '\sffamily',
         'sf' => '\sffamily',
         'smallcaps' => '\scshape',
         'sc' => '\scshape',
         );
    if (defined $opt{fontsize}) {
        die "Unknown fontsize '$opt{fontsize}'" 
            unless defined $fontsizes{$opt{fontsize}};
        push @$commands, $fontsizes{$opt{fontsize}};
    }
    for my $fs (@{$opt{fontstyle}}) {
        die "Unknown fontstyle '$fs'" 
            unless defined $fontstyles{$fs};
        push @$commands, $fontstyles{$fs};
    }
    $tolerant = 1 if defined $opt{tolerant};
    $tolerant_unknownchar = $opt{tolerant} if define($opt{tolerant}) ne '';
    if ($tolerant_unknownchar) {
        push @$commands, '\def\unicode@invalidglyph#1{'.
            $tolerant_unknownchar.'}';
    }
    $r2lmode = 'reorder' if defined $opt{r2l} && $opt{r2l} eq '';
    $r2lmode = $opt{r2l} unless defined $r2lmode;
    $r2lmode = 'none' unless defined $r2lmode;
    $combine = '';
    $combine = '\SetUnicodeOption{combine}' if $opt{combine};
    $charset = $default_charset;
    $charset = $opt{charset} if defined $opt{charset};
    my $options = $opt{options};
    $options = [] unless defined $options;
    push @$options, 'warnunknown' if defined $tolerant;
    $options = join ',',@$options;
    my $oprio = 99;
    for my $o (split ',', $options) {
        my ($on,$prio) = ($o =~ /^(.+?)(?:=([0-9]+))?$/);
        $prio = $oprio unless defined $prio;
        push @$commands, "\\SetUnicodeOption[$prio]{$on}";
        $oprio = $prio-1;
    }
    @packages = @{$opt{package}} if $opt{package};
    $title = $infile;
    $title = $opt{title} if defined $opt{title};
    $commands = join "\n", @$commands;
    $cleanup = $opt{cleanup};
    $cleanup = 1 unless defined $cleanup;
    $escape = $opt{escape};
}

$code = '
\documentclass{article}
\usepackage{ucs}
\usepackage[utf8x]{inputenc}
\let\oldgimel\gimel
\usepackage[@@@[FONTENCS]]{autofe}
\let\gimel\oldgimel
%\usepackage{verbatim}
@@@[PACKAGES]
\usepackage{geometry}

\nofiles

\makeatletter

\toks255\expandafter{\try@load@fontshape}
\edef\try@load@fontshape{\begingroup\noexpand\uni@resetcatcodes\the\toks255\endgroup}

\newdimen\parfirst
\newdimen\parsecond
\newif\ifescape

\def\setchar#1#2{%
  \catcode\count255=13\relax
  \begingroup
    \uccode`\~#1 %SPACE
    \uppercase{%
  \endgroup
    \edef~}%
  {#2}}

\def\escape{\unicodecombine\begingroup\uni@resetcatcodes\@escape}
\def\@escape#1{\endgroup#1}

\def\loadfile{%
  \bgroup
  \count255=1
  \loop\ifnum\count255<128\relax
    \setchar{\count255}{\noexpand\unichar{\number\count255}}
    \advance\count255by1\relax
  \repeat
  \setchar{13}{\noexpand\unicodecombine
    \noexpand\noindent
    \noexpand\null
    \noexpand\\\\%
    \noexpand\leavevmode}
  \setchar{32}{\noexpand\unicodecombine\noexpand\unichar{32}}
  \setchar{12}{\noexpand\unicodecombine\noexpand\newpage}
  \setchar{27}{\noexpand\ifescape\noexpand\expandafter\noexpand\escape\noexpand\else\noexpand\unichar{27}\noexpand\fi}
  @@@[COMBINE]%
  \input{@@@[FILE]}%
  \unicodecombine
  \egroup
}

\def\parindentstuff{%
  \let\temp@a\undefined
  \ifnum\parfirst=0\else\let\temp@a1\fi
  \ifnum\parsecond=0\else\let\temp@a1\fi
  \ifx\temp@a1%
    \parindent=\parfirst\advance\parindent by-\parsecond
    \everypar{%
      \@tempdima\linewidth\advance\@tempdima by-\parsecond
      \parshape\@ne\parsecond\@tempdima}
  \fi
}

\DeclareUnicodeCharacter{9}{\space\space\space\space\space\space\space\space}

\begin{document}

\frenchspacing
\parindent=0pt
\thispagestyle{empty}
\raggedright
@@@[SWITCHES]
@@@[CMDS]
\parindentstuff
\loadfile

\end{document}
';

sub makecode() {
    my $code = $code;
    $code =~ s/\Q@@@[FILE]/$tmpfile.txt/;
    $code =~ s/\Q@@@[FONTENCS]/$fontencs/;
    $code =~ s/\Q@@@[CMDS]/$commands/;
    $code =~ s/\Q@@@[COMBINE]/$combine/;
    my $packages = join "\n", map { 
        my ($opts,$pkg) = /^(?:\[(.*?)\])?\{?(.*?)\}?$/;
        if (defined $opts) { $opts = "[$opts]" } else { $opts = '' };
        "\\usepackage${opts}"."{$pkg}";
    } @packages;
    $code =~ s/\Q@@@[PACKAGES]/$packages/;
    my $switches = '';
    $switches .= '\escapetrue' if $escape;
    $code =~ s/\Q@@@[SWITCHES]/$switches/;
#    $code =~ s/\Q@@@[OPTIONS]/$options/;
    open MASTER, ">$tmpfile.tex" or
        die "Could not open $tmpfile.tex for writing: $!";
    print MASTER $code or
        die "Could not write to $tmpfile.tex";
    close MASTER;
}

sub _fribidi_call($) {
    my $option = shift;
    my $rc = system("$fribidi_cmd $option </dev/null &>/dev/null");
    die "$fribidi_cmd could not be started: $!" if $rc == -1;
    $rc /= 256;
#    print "fribidi $option => $rc\n";
    return !$rc;
}

sub fribidi_call() {
#    _fribidi_call('') or
#       die "$fribidi_cmd cannot be executed";
    my %opts = (charset => 'UTF-8',
                nopad => {},
                nobreak => {},
                clean => {},
                swapnsm => {},
                );
    my $opts = '';
    while (my ($k,$v) = each %opts) {
        $opts .= " --$k"; $opts .= " $v" unless ref $v; };
    unless (_fribidi_call($opts)) {
        my $o = '--';
        _fribidi_call('-charset UTF-8') and $o = '-';
        for my $a (keys %opts) {
            unless (_fribidi_call($o.$a.((ref $opts{$a})?'':" $opts{$a}"))) {
                warn "WARNING: $fribidi_cmd does not support option $o$a";
                delete $opts{$a};
            }
        }
        $opts = '';
        while (my ($k,$v) = each %opts) {
            $opts .= " --$k"; $opts .= " $v" unless ref $v; };
    }
    my $cmd = "$fribidi_cmd $opts";
    #print "[[[$cmd]]]\n";
    return $cmd;
}

sub copyfile() {
    my @ops;
# <''\Q$infile\E >$tmpfile.txt
    push @ops, "cat <''\Q$infile\E";
    if ($charset ne 'utf-8' || $tolerant) {
        my $c = '';
        $c = '-c' if $tolerant;
        push @ops, "iconv $c -f ''\Q$charset\E -t utf-8";
    }
    if ($r2lmode eq 'none') {
    } elsif ($r2lmode eq 'reorder') {
#       push @ops, "fribidi --nopad --nobreak ".
#           "--charset UTF-8 --clean";
        push @ops, fribidi_call();
    } elsif ($r2lmode eq 'mark') {
        push @ops, "rlmark";
    } else {
        die "Internal error: Unknown R2L mode '$r2lmode'";
    }
    my $cmd = (join ' | ', @ops)." >\Q$tmpfile.txt\E";
    system($cmd) and
        die "Could not copy/convert file using\n$cmd";
}

sub latex() {
    makecode;
    copyfile;
    my $latex = 'latex';
    $latex = 'elatex' if ($r2lmode eq 'mark');
    system("$latex -interaction nonstopmode ''\Q$tmpfile.tex\E >/dev/stderr") 
        and die "$latex returned an error";
#    system("cat $tmpfile.tex >/dev/stderr");
    my $t = $title; $t =~ s/\\/\\\\/g; $t =~ s/[^\x20-\x7e\xa0-\xff]/
        sprintf "\\%03o", ord($&)/gei;
    system("dvips -o /dev/stdout ''\Q$tmpfile.dvi\E | ".
           "sed -e 's/^%%Title: .*\\.dvi\$/%%Title: \Q$t\E/' ".
           "    -e 's/^%%Creator: /%%Creator: u2ps and /' ".
           ">''\Q$outfile\E") and
        die "dvips returned an error";
}

sub view() {
    my $cmd = "gv -antialias -watch ''\Q$outfile\E";
    system($cmd) and 
        die "Could not run '$cmd'";
}

sub printfile() {
    my $printer = '';
    $printer = "-P $print" unless $print eq '';
    my $cmd = "lpr $printer -J ''\Q$title\E ''\Q$outfile\E";
    system($cmd) and 
        die "Could not run '$cmd'";
}

sub cleanup() {
    my $suffixes = 'dvi,log,tex,txt';
    $suffixes .= ',ps' if $outfile eq "$tmpfile.ps";
    system("rm $tmpfile.{$suffixes} >/dev/stderr") and
        die "Could not clean up";
}

sub main() {
    parseopts;
    latex;
    view if $view;
    printfile if defined $print;
    cleanup if $cleanup;
}

&main;

Generated by dwww version 1.15 on Thu May 23 06:40:53 CEST 2024.