Man Linux: Main Page and Category List

NAME

       pavuk  -  HTTP,  HTTP  over SSL, FTP, FTP over SSL and Gopher recursive
       document retrieval program

SYNOPSIS

       pavuk [-mode {normal | resumeregets | singlepage | singlereget | sync |
       dontstore | ftpdir | mirror}] [-X] [-runX] [-bg/-nobg] [prefs/-noprefs]
       [-h]  [-v]  [-progress/-noprogress]  [-stime/-nostime]  [-xmaxlog  $nr]
       [-logfile  $file]  [-slogfile  $file] [-auth_file $file] [-msgcat $dir]
       [-language      $str]      [-gui_font      $font]      [-quiet/-verbose
       [-read_css/-noread_css]  [-cdir  $dir]  [-scndir $dir] [-scenario $str]
       [-dumpscn  $filename]  [-lmax  $nr]  [-dmax  $nr]  [-leave_level   $nr]
       [-maxsize  $nr]  [-minsize $nr] [-asite $list] [-dsite $list] [-adomain
       $list] [-ddomain $list] [-asfx $list] [-dsfx  $list]  [-aprefix  $list]
       [-dprefix  $list]  [-amimt  $list]  [-dmimet $list] [-pattern $pattern]
       [-url_pattern $pattern]  [-rpattern  $regexp]  [-url_rpattern  $regexp]
       [-skip_pattern  $pattern]  [-skip_url_pattern $pattern] [-skip_rpattern
       $regexp] [-skip_url_rpattern $regexp] [-newer_than $time]  [-older_than
       $time]         [-schedule        $time]        [-reschedule        $nr]
       [-dont_leave_site/-leave_site]             [-dont_leave_dir/-leave_dir]
       [-http_proxy   $site[:$port]]  [-ftp_proxy  $site[:$port]]  [-ssl_proxy
       $site[:$port]]              [-gopher_proxy               $site[:$port]]
       [-ftp_httpgw/-noftp_httpgw]         [-ftp_dirtyproxy/-noftp_dirtyproxy]
       [-gopher_httpgw/-nogopher_httpgw]     [-noFTP/-FTP]     [-noHTTP/-HTTP]
       [-noSSL/-SSL]   [-noGopher/-Gopher]  [-FTPdir/-noFTPdir]  [-noCGI/-CGI]
       [-FTPlist/-noFTPlist]   [-FTPhtml/-noFTPhtml]   [-noRelocate/-Relocate]
       [-force_reget/-noforce_reget]                         [-nocache/-cache]
       [-check_size/-nocheck_size]      [-noRobots/-Robots]      [-noEnc/-Enc]
       [-auth_name      $user]      [-auth_passwd     $pass]     [-auth_scheme
       1/2/3/4/user/Basic/Digest/NTLM]
       [-auth_reuse_nonce/-no_auth_reuse_nonce]    [-http_proxy_user    $user]
       [-http_proxy_pass               $pass]                [-http_proxy_auth
       1/2/3/4/user/Basic/Digest/NTLM]
       [-auth_reuse_proxy_nonce/-no_auth_reuse_proxy_nonce]     [-ssl_key_file
       $file]  [-ssl_cert_file  $file] [-ssl_cert_passwd $pass] [-from $email]
       [-send_from/-nosend_from]               [-identity                $str]
       [-auto_referer/-noauto_referer]  [-referer/-noreferer]  [-alang  $list]
       [-acharset $list] [-retry $nr] [-nregets $nr] [-nredirs $nr] [-rollback
       $nr]  [-sleep  $nr]  [-timeout  $nr]  [-preserve_time/-nopreserve_time]
       [-preserve_perm/-nopreserve_perm] [-preserve_slinks/-nopreserve_slinks]
       [-bufsize  $nr]  [-maxrate  $nr]  [-minrate $nr] [-user_condition $str]
       [-cookie_file           $file]            [-cookie_send/-nocookie_send]
       [-cookie_recv/-nocookie_recv]         [-cookie_update/-nocookie_update]
       [-cookies_max $nr] [-disabled_cookie_domains $list]  [-disable_html_tag
       $TAG,[$ATTRIB][;...]]      [-enable_html_tag      $TAG,[$ATTRIB][;...]]
       [-tr_del_chr $str]  [-tr_str_str  $str1  $str2]  [-tr_chr_chr  $chrset1
       $chrset2] [-index_name $str] [-store_index/-nostore_index] [-store_name
       $str]   [-debug/-nodebug]   [-debug_level   $level]   [-browser   $str]
       [-urls_file $file] [-file_quota $nr] [-trans_quota $nr] [-fs_quota $nr]
       [-enable_js/-disable_js]       [-fnrules        $t        $m        $r]
       [-store_info/-nostore_info]             [-all_to_local/-noall_to_local]
       [-sel_to_local/-nosel_to_local]       [-all_to_remote/-noall_to_remote]
       [-url_strategie  $strategie]  [-remove_adv/-noremove_adv] [-adv_re $RE]
       [-check_bg/-nocheck_bg]  [-send_if_range/-nosend_if_range]  [-sched_cmd
       $str]  [-unique_log/-nounique_log]  [-post_cmd  $str] [-ssl_version $v]
       [-unique_sslid/-nounique_sslid] [-aip_pattern $re]  [-dip_pattern  $re]
       [-use_http11/-nouse_http11]    [-local_ip    $addr]   [-request   $req]
       [-formdata $req] [-httpad  $str]  [-nthreads  $nr]  [-immesg/-noimmesg]
       [-dumpfd    $nr]    [-dump_urlfd   $nr]   [-unique_name/-nounique_name]
       [-leave_site_enter_dir/-dont_leave_site_enter_dir]   [-max_time    $nr]
       [-del_after/-nodel_after]                   [-singlepage/-nosinglepage]
       [-dump_after/-nodump_after]           [-dump_response/-nodump_response]
       [-auth_ntlm_domain  $str]  [-auth_proxy_ntlm_domain  $str] [-js_pattern
       $re]   [-follow_cmd    $str]    [-retrieve_symlink/-noretrieve_symlink]
       [-js_transform   $p   $t   $h   $a]   [-js_transform2   $p  $t  $h  $a]
       [-ftp_proxy_user         $str]          [-ftp_proxy_pass          $str]
       [-limit_inlines/-dont_limit_inlines]      [-ftp_list_options      $str]
       [-fix_wuftpd_list/-nofix_wuftpd_list]     [-post_update/-nopost_update]
       [-info_dir  $dir]  [-mozcache_dir  $dir]  [-aport $list] [-dport $list]
       [-hack_add_index/-nohack_add_index]       [-default_prefix        $str]
       [-rsleep/-norsleep]     [-ftp_login_handshake     $host     $handshake]
       [-js_script_file      $file]       [-dont_touch_url_pattern       $pat]
       [-dont_touch_url_rpattern    $pat]    [-dont_touch_tag_rpattern   $pat]
       [-tag_pattern $tag $attrib  $url]  [-tag_rpattern  $tag  $attrib  $url]
       [-nss_cert_dir                                                    $dir]
       [-nss_accept_unknown_cert/-nonss_accept_unknown_cert]
       [-nss_domestic_policy/-nss_export_policy]    [-[no]verify]   [-tlogfile
       $file] [-trelative {object | program}] [-transparent_proxy FQDN[:port]]
       [-transparent_ssl_proxy FQDN[:port]] [-sdemo] [-noencode] [URLs]

       pavuk -mode {normal | singlepage | singlereget} [-base_level $nr]

       pavuk      -mode      sync      [-ddays     $nr]     [-subdir     $dir]
       [-remove_old/-noremove_old]

       pavuk -mode resumeregets [-subdir $dir]

       pavuk -mode linkupdate [-X]  [-h]  [-v]  [-cdir  $dir]  [-subdir  $dir]
       [-scndir $dir] [-scenario $str]

       pavuk -mode reminder [-remind_cmd $str]

       pavuk   -mode   mirror   [-subdir   $dir]   [-remove_old/-noremove_old]
       [-remove_before_store/-noremove_before_store]
       [-always_mdtm/-noalways_mdtm]

DESCRIPTION

       This  manual  page  describes  how  to  use pavuk. Pavuk can be used to
       mirror contents of internet/intranet servers and to maintain copies  in
       a local tree of documents.  Pavuk stores retrieved documents in locally
       mapped disk space. The structure of the local tree is the same  as  the
       one on the remote server. Each supported service (protocol) has its own
       subdirectory in the local tree.  Each referenced  server  has  its  own
       subdirectory  in  these  protocols subdirectories; followed by the port
       number on which the service resides, delimited by character can  be  be
       changed.  With the option -fnrules you can change the default layout of
       the local document tree, without losing link consistency.
       With pavuk it is possible to have up-to-date copies of remote documents
       in the local disk space.
       As   of   version   0.3pl2,  pavuk  can  automatically  restart  broken
       connections, and reget partial content from an FTP server  (which  must
       support  the REST command), from a properly configured HTTP/1.1 server,
       or from a HTTP/1.0 server which supports Ranges.
       As of version 0.6 it is possible to handle configurations via so called
       scenarios.   The best way to create such a configuration file is to use
       the X Window interface and simply save the created  configuration.  The
       other way is to use the -dumpscn switch.
       As   of  version  0.7pl1  it  is  possible  to  store  authentification
       information into an authinfo file, which pavuk can then parse and  use.
       As  of  version  0.8pl4  pavuk  can  fetch documents for use in a local
       proxy/cache server without storing them to local documents tree.
       As of version 0.9pl4 pavuk supports SOCKS (4/5) proxies if you have the
       required libraries.
       As  of  version  0.9pl12 pavuk can preserve permissions of remote files
       and symbolic links, so it can be used for powerful FTP mirroring.
       Pavuk supports SSL connections to FTP servers, if you  specify  ftps://
       URL instead of ftp://.
       Pavuk  can  automatically  handle file names with unsafe characters for
       filesystem.  This is yet implemented only for Win32 platform and it  is
       hard coded.
       Pavuk  can  now  use  HTTP/1.1  protocol  for  communication  with HTTP
       servers.  It can use persistent  connections,  so  one  TCP  connection
       should  be  used to transfer several documents without closing it. This
       feature saves network bandwidth and also speedup network communication.
       Pavuk  can  do  configurable  POST requests to HTTP servers and support
       also file uploading via HTTP POST request.
       Pavuk can automatically fill found HTML forms, if user will supply data
       for its fields before with option -formdata.
       Pavuk  can  run configurable number of concurrently running downloading
       threads when compiled with multithreading support.

Format of supported URLs

       HTTP
       http://[[user][:password]@]host[:port][/document]
       [[user][:password]@]host[:port][/document]

       HTTPS
       https://[[user][:password]@]host[:port][/document]
       ssl[.domain][:port][/document]

       FTP
       ftp://[[user][:password]@]host[:port][/relative_path][;type=x]
       ftp://[[user][:password]@]host[:port][//absolute_path][;type=x]
       ftp[.domain][:port][/document][;type=x]

       FTPS
       ftps://[[user][:password]@]host[:port][/relative_path][;type=x]
       ftps://[[user][:password]@]host[:port][//absolute_path][;type=x]
       ftps[.domain][:port][/document][;type=x]

       Gopher
       gopher://host[:port][/type[document]]
       gopher[.domain][:port][/type[document]]

Default mapping of URLs to local filenames

       HTTP
       http://[[user][:password]@]host[:port][/document][?query]
       to
       http/host_port/[document][?query]

       HTTPS
       https://[[user][:password]@]host[:port][/document][?query]
       to
       https/host_port/[document][?query]

       FTP
       ftp://[[user][:password]@]host[:port][/path]
       to
       ftp/host_port/[path]

       FTPS
       ftps://[[user][:password]@]host[:port][/path]
       to
       ftps/host_port/[path]

       Gopher
       gopher://host[:port][/type[document]]
       to
       gopher/host_port/[type[document]]

       NOTE: Pavuk will use the string with which it queries the target server
       as  the  name  of  the results file. This file name may, in some cases,
       contain punctuations such as $,?,=,& etc. Such  punctuation  can  cause
       problems  when  you  are  trying  to  browse downloaded files with your
       browser or you are  trying  to  process  downloaded  files  with  shell
       scripts  or  view  files with file management utilities which reference
       the name of the results file.  If you believe that this  maybe  causing
       problems  for  you, then you can remove all punctuation from the result
       file name with the option: -tr_del_chr [:punct:] or with other  options
       for adjusting filenames.

OPTIONS

        All options are case insensitive.

List of options chapters

       Mode
       Help
       Indicate/Logging/Interface options
       Netli options
       Special start
       Scenario/Task options
       Directory options
       Preserve options
       Proxy options
       Proxy Authentification
       Protocol/Download Option
       Authentification
       Site/Domain/Port Limitation Options
       Limitation Document properties
       Limitation Document name
       Limitation Protocol Option
       Other Limitation Options
       Javascript support
       Cookie
       HTML rewriting engine tuning options
       Filename/URL Conversion Option
       Other Options

Mode

       -mode {normal, linkupdate, sync, singlepage, singlereget, resumeregets}
              Set operation mode.
              normal - retrieves recursive documents
              linkupdate - update remote URLs in local HTML documents to local
              URLs if these URLs exist in the local tree
              sync - synchronize remote documents with local tree (if a  local
              copy  of  a  document  is  older  than  remote,  the document is
              retrieved again, otherwise nothing happens)
              singlepage - URL is  retrieved  as  one  page  with  all  inline
              objects  (picture,  sound  ...)   this  mode is now obsoleted by
              -singlepage option.
              resumeregets - pavuk scans the local tree for  files  that  were
              not  retrieved  fully and retrieves them again (uses partial get
              if possible)
              singlereget - get URL until it is retrieved in full
              dontstore - transfer page from server, but don’t store it to the
              local  tree.   This mode is suitable for fetching pages that are
              held in a local proxy/cache server.
              reminder - used to inform the user about changed documents
              ftpdir - used to list of contents of FTP directories

              default operation mode is normal mode.

Help

       -h     Print long verbose help message

       -v     Show version informations and configuration at compilation time.

Indicate/Logging/Interface options

       -quiet Don’t show any messages on the screen.

       -verbose
              Force to show output messages on the screen (default)

       -progress/-noprogress
              Show  retrieving progress while running in the terminal (default
              is progress off)

       -stime/-nostime
              Show start  and  end  time  of  transfer.  (default  isn’t  this
              information shown)

       -xmaxlog $nr
              Maximum  number  of  log  lines  in  the  Log  widget.  0  means
              unlimited.  This option is available only when compiled with the
              GTK+ GUI. (default value is 0)

       -logfile $file
              File where all produced messages are stored.

       -unique_log/-nounique_log
              When  logfile  as  specified with the option -logfile is already
              used by another process, try to generate new unique name for the
              log file. (default is this option turned off)

       -slogfile $file
              File  to  store  short  logs  in. This file contains one line of
              informations per processed document.  This is meant to  be  used
              in   connection   with  any  sort  of  script  to  produce  some
              statistics,  for  validating  links  on  your  website,  or  for
              generating  simple  sitemaps.   Multiple pavuk processes can use
              this file concurrently, without overwriting each others entries.
              Record structure:

              - PID of pavuk process
              - TIME current time
              - COUNTER in the format current/total number of URLs
              - STATUS contains the type of the error: FATAL, ERR,
                WARN or OK
              - ERRCODE is the number code of the error
                (see errcode.h in pavuk sources)
              - URL of the document
              - PARENTURL first parent document of this URL
                (when it doesn’t have parent - [none])
              - FILENAME is the name of the local file the
                document is saved under
              - SIZE size of requested document if known
              - DOWNLOAD_TIME time which takes downloading of this
                document in format seconds.mili_seconds
              - HTTPRESP contains the first line of the HTTP server
                response

       -language $str
              Native language that pavuk should use for communication with its
              user (works only when  there  is  a  message  catalog  for  that
              language) GNU gettext support (for message internationalization)
              must also be compiled in. Default language is  taken  from  your
              NLS environment variables.

       -gui_font $font
              Font  used  in  the GUI interface. To list available X fonts use
              the xlsfonts  command.   This  option  is  available  only  when
              compiled with GTK+ GUI support.

Netli options

       -[no]read_css
              Enable or disable fetching objects mentioned in style sheets.

       -[no]verify
              Enable or disable verifying server CERTS in SSL mode.

       -tlogfile $file
              Turn on Netli logging with output to specified file.

       -trelative {object | program}
              Make  Netli timings relative to the start of the first object or
              the program.

       -transparent_proxy FQDN[:port]
              When processing URL, send the original, but send it  to  the  IP
              address at FQDN

       -transparent_ssl_proxy FQDN[:port]
              When processing HTTPS URL, send the original, but send it to the
              IP address at FQDN

       -sdemo Output in sdemo compatible format. This is only used  by  sdemo.
              (For  now  it  simply  means  output  ’-1’ rather than ’*’  when
              measurements are invalid.)

       -noencode
              Do not escape characters that are "unsafe" in URLS.

Special start

       -X     Start program with X Window interface (if compiled with  support
              for  GTK+).  Pavuk as default starts without GUI, and behaves as
              regular commandline tool.

       -runX  When used together with the -X option, pavuk  starts  processing
              of  URLs  immediately  after the GUI window is launched. Without
              the -X  given,  this  option  doesn’t  have  any  effect.   Only
              available when compiled with GTK+ support .

       -bg/-nobg
              This  option allows pavuk to detach from its terminal and run in
              background mode.  Pavuk will not  output  any  messages  to  the
              terminal  than. If you want to see messages, you have to use the
              -log_file option to  specify  a  file  where  messages  will  be
              written.  Default pavuk executes at foreground.

       -check_bg/-nocheck_bg
              Normally,  programs  sent into the background after being run in
              foreground continue to output messages to the terminal.  If this
              option is activated, pavuk checks if it is running as background
              job and will not write any messages  to  the  terminal  in  this
              case.  After  it  becomes  a foreground job again, it will start
              writing messages to terminal in the normal way.  This option  is
              available  only when your system supports retrieving of terminal
              info via tc*() functions.

       -prefs/-noprefs
              When you turn this option on, pavuk will preserve  all  settings
              when  exiting,  and when you run pavuk with GUI interface again,
              all settings will be restored.  The settings will be  stored  in
              the  ~./pavuk_prefs  file. Default pavuk want restore its option
              when started.  This option is available only when compiled  with
              GTK+.

       -schedule $time
              Execute  pavuk at the time specified as parameter. The Format of
              the $time parameter is YYYY.MM.DD.hh.mm.  You  need  a  properly
              configured  scheduling  with  the  at command on your system for
              using this option.   If  default  configuration  (at  -f  %f  %t
              %d.%m.%Y)  of  scheduling command won’t work on your system, try
              to adjust it with -sched_cmd option.

       -reschedule $nr
              Execute pavuk periodically with  $nr  hours  period.   You  need
              properly  configured  scheduling  with  the  at  command on your
              system for using this option.

       -sched_cmd $str
              Command  to  use  for  scheduling.  Pavuk  explicitly   supports
              scheduling  with  at  $str should contain regular characters and
              macros, escaped by % character.  Supported macros are:
                 %f
                  - for script filename
                 %t
                  - for time (in format HH:MM)
                  - all macros as supported by the strftime() function

       -urls_file $file
              If you use this option, pavuk will read URLs from  $file  before
              it  starts  processing.  In this file, each URL needs to be on a
              separate line. After the last URL, a single dot . followed by  a
              LF  (line-feed)  character  denotes  the  end.  Pavuk will start
              processing right after all URLs have been  read.   If  $file  is
              given as the - character, standard input will be read.

       -store_info/-nostore_info
              This  option  causes  pavuk  to  store  information  about  each
              document into a separate file in the .pavuk_info directory. This
              file  is  used to store the original URL from which the document
              was downloaded. For files that are downloaded via HTTP or  HTTPS
              protocols,  the  whole  HTTP  response header is stored there. I
              recommend to use this option when you  are  using  options  that
              change  the  default  layout of the local document tree, because
              this info file helps pavuk to map the local filename to the URL.
              This  option  is  also  very useful when different URLs have the
              same filename in the local tree. When this occurs, pavuk detects
              this  using  info  files, and it will prefix the local name with
              numbers.  At  default  is  disabled  storing   of   this   extra
              informations.

       -info_dir $dir
              You  can set with this option location of separate directory for
              storing info files created when -store_info option is used. This
              is  useful  when  you don’t want to mix in destination directory
              the info files with regular document files. The structure of the
              info files is preserved, just are stored in different directory.

       -request $req
              With this option  you  can  specify  extended  informations  for
              starting  URLs.  With this option you can specify query data for
              POST or GET .  Current syntax of this option is : URL:["]$url["]
              [METHOD:["]{GET|POST}["]]                 [ENCODING:["]{u|m}["]]
              [FIELD:["]variable=value["]]       [FILE:["]variable=filename["]
              [LNAME:["]local_filename["]]

              - URL: specifies request URL
              - METHOD: specifies request method for URL and is
                one of GET or POST.
              - ENCODING: specifies encoding for request body data.
                  m is for multipart/form-data encoding
                  u is for application/x-www-form-urlencoded
                  encoding
              - FIELD: specifies field of request data in format
                  variable=value. For encoding of special characters
                  in variable and value you can use same encoding
                  as is used in application/x-www-form-urlencoded
                  encoding.
              - FILE: specifies special field of query, which is
                  used to specify file for POST based file upload.
              - LNAME: specifies localname for this request
       When  you  need  to  use  inside the FIELD: and FILE: fields of request
       specification special characters, you should use the application/x-www-
       form-urlencoded   encoding   of   characters.  It  means  all  nonASCII
       characters,  quote  character  ("),  space  character  (  ),  ampersand
       character  (&), percent character (%) and equal character (=) should be
       encoded in form %xx where xx is  hexadecimal  representation  of  ASCII
       value  of  character. So for example % character should be encoded like
       %25.

       -formdata $req
              This option gives you chance to specify contents for HTML  forms
              found during traversing document tree.
               Current  syntax  of this option is same as for -request option,
              but  ENCODING:  and  METHOD:  are  meaningless  in  this  option
              semantics.
               In URL: you have to specify HTML form action URL, which will be
              matched against action URLs found in processed  HTML  documents.
              If  pavuk  finds  action  URL  which  matches  that  supplied in
              -formdata option, pavuk will construct GET or POST request  from
              data  supplied in this option and from default form field values
              supplied in HTML document. Values supplied on  commandline  have
              precedence before that supplied in HTML file.

       -nthreads $nr
              By  means  of  this  option  you can specify how many concurrent
              threads  will  download  documents.  Default  pavuk  executes  3
              concurrent  downloading  threads.  This option is available only
              when pavuk is compiled to support multithreading.

       -immesg/-noimmesg
              Default  pavuks  behavior  when  running  multiple   downloading
              threads  is  to  buffer all output messages in memory buffer and
              flush that buffered data just when thread finishes processing of
              one  document.  With this option you can change this behavior to
              see the messages immediately when it is  produced.  It  is  only
              usable  when  you  want to debug some specials in multithreading
              environment.  This  option  is  available  only  when  pavuk  is
              compiled to support multithreading.

       -dumpfd $nr
              For  scripting  is  sometimes  usable  to  be  able  to download
              document directly to pipe or variable instead of storing  it  to
              regular  file. In such case you can use this option to dump data
              for example to stdout ($nr = 1).

       -dump_after/-nodump_after
              While  using  -dumpfd  option  in  multithreaded  pavuk,  it  is
              required  to  dump  document  in  one  moment  because documents
              downloaded in multiple threads can overlap. This option is  also
              useful  when you want to dump document after pavuk adjusts links
              inside HTML documents.

       -dump_response/-nodump_response
              This option have effect only when used with -dumpfd  option.  It
              is used to dump HTTP response headers.

       -dump_urlfd $nr
              When  you will use this option, pavuk will output all URLs found
              in HTML documents to file  descriptor  $nr.  You  can  use  this
              option to extract and convert all URLs to absolute.

Scenario/Task options

       -scenario $str
              Name  of scenario to load and/or run. Scenarios are files with a
              structure similar to the .pavukrc file.  Scenarios contain saved
              configurations.   You  can  use  it  for  periodical  mirroring.
              Parameters from scenarios specified at the command line  can  be
              overwritten  by command line parameters.  To be able to use this
              option, you need to specify scenario base directory with  option
              -scndir.

       -dumpscn $filename
              Store   actual   configuration  into  scenario  file  with  name
              $filename.  This is  useful  to  quickly  create  pre-configured
              scenarios for manual editing.

Directory options

       -msgcat $dir
              Directory  which contains the message catalog for pavuk.  If you
              do not have permission to store a pavuk message catalog  in  the
              system  directory, you should simply create similar structure of
              directories in your home directory as it is on your system.

              For example:

              Your native language is  German,  and  your  home  directory  is
              /home/jano.

              You     should     at     first     create     the     directory
              /home/jano/locales/de/LC_MESSAGES/, then put the German pavuk.mo
              there  and  set  -msgcat  to  /home/jano/locales/.   If you have
              properly set locale  environment  values,  you  will  see  pavuk
              speaking  German.   This  option  is  available  only  when  you
              compiled    in    support    for    GNU     gettext     messages
              internationalization.

       -cdir $dir
              Directory  where  are all retrieved documents are stored. If not
              specified, the current  directory  is  used.  If  the  specified
              directory doesn’t exist, it will be created.

       -scndir $dir
              Directory in which your scenarios are stored.  You must use this
              option when you are loading or storing scenario files.

Preserve options

       -preserve_time/-nopreserve_time
              Store downloaded document with same modification time as on  the
              remote  site.  Modification  time  will  be  set  only when such
              information is available (some FTP servers do  not  support  the
              MDTM  command,  and  some  documents on HTTP servers are created
              online so pavuk can’t retrieve the  modification  time  of  this
              document).   At  default  modification  time  of documents isn’t
              preserved.

       -preserve_perm/-nopreserve_perm
              Store downloaded document with the same permissions  as  on  the
              remote  site.   This  option  has effect only when downloading a
              file through FTP protocol and assumes that the  -ftplist  option
              is used. At default permissions are not preserved.

       -preserve_slinks/-nopreserve_slinks
              Set  symbolic  links to point exactly to same location as on the
              remote server; don’t do any relocations.  This option has effect
              only when downloading file through FTP protocol and assumes that
              the -ftplist option is used.  Default  symbolic  links  are  not
              preserved,  and  are  retrieved  as  regular documents with full
              contents of linked file.

              For example, assume that on the FTP server ftp.xx.org there is a
              symbolic  link  /pub/pavuk/pavuk-current.tgz,  which  points  to
              /tmp/pub/pavuk-0.9pl11.tgz.  Pavuk  will  create  symbolic  link
              ftp/ftp.xx.org_21/pub/pavuk/pavuk-current.tgz
              if  option -preserve_slinks will be used this symbolic link will
              point to /tmp/pub/pavuk-0.9pl11.tgz
              if option -preserve_slinks want be used, this symbolic link will
              point to
               ../../tmp/pub/pavuk-0.9pl11.tgz

       -retrieve_symlink/-noretrieve_symlink
              Retrieve  files  behind  symbolic  links  instead of replicating
              symlinks in local tree.

Proxy options

       -http_proxy $site[:$port]
              If this parameter is used, then  all  HTTP  requests  are  going
              through  this  proxy server. This is useful if your site resides
              behind a firewall, or if you want to  use  a  HTTP  proxy  cache
              server.  The  default  port number is 8080.  Pavuk allows you to
              specify  multiple  HTTP  proxies  (using  multiple   -http_proxy
              options)  and  it  will  rotate proxies with roundrobin priority
              disabling proxies with errors.

       -nocache/-cache
              Use this option whenever you want to get the  document  directly
              from the site and not from your HTTP proxy cache server. Default
              pavuk allows transfer of document copies from cache.

       -ftp_proxy $site[:$port]
              If this parameter is used,  then  all  FTP  requests  are  going
              through  this  proxy  server.   This  is  useful  when your site
              resides behind a firewall, or if you want to use FTP proxy cache
              server.   The  default  port number is 22.  Pavuk supports three
              different types of proxies for FTP, see the options -ftp_httpgw,
              -ftp_dirtyproxy.  If none of the mentioned options is used, then
              pavuk assumes a regular FTP proxy with USER user@host connecting
              to remote FTP server.

       -ftp_httpgw/-noftp_httpgw
              The  specified FTP proxy is a HTTP gateway for the FTP protocol.
              Default FTP proxy is regular FTP proxy.

       -ftp_dirtyproxy/-noftp_dirtyproxy
              The specified FTP proxy is a HTTP proxy which supports a CONNECT
              request  (pavuk  should  use full FTP protocol, except of active
              data connections).  Default FTP proxy is regular FTP proxy.   If
              both    -ftp_dirtyproxy    and    -ftp_httpgw   are   specified,
              -ftp_dirtyproxy is preferred.

       -gopher_proxy $site[:$port]
              Gopher gateway or proxy/cache server.

       -gopher_httpgw/-nogopher_httpgw
              The specified Gopher proxy server is a HTTP gateway  for  Gopher
              protocol.   When  -gopher_proxy  is  set and this -gopher_httpgw
              option isn’t used, pavuk is using  proxy  as  HTTP  tunnel  with
              CONNECT request to open connections to Gopher servers.

       -ssl_proxy $site[:$port]
              SSL  proxy  (tunneling) server [as that in CERN httpd + patch or
              in Squid] with enabled CONNECT request (at least on  port  443).
              This  option  is  available  only when compiled with SSL support
              (you need the  SSleay  or  OpenSSL  libraries  with  development
              headers)

Proxy Authentification

       -http_proxy_user $user
              Username for HTTP proxy authentification.

       -http_proxy_pass $pass
              Password for HTTP proxy authentification.

       -http_proxy_auth {1/2/3/4/user/Basic/Digest/NTLM}
              Authentification scheme for proxy access. Similar meaning as the
              -auth_scheme option (see help for this option for more details).
              Default is 2 (Basic scheme).

       -auth_proxy_ntlm_domain $str
              NT  or  LM domain used for authorization again HTTP proxy server
              when NTLM authentification scheme is required.  This  option  is
              available only when compiled with OpenSSL or libdes libraries.

       -auth_reuse_proxy_nonce/-noauth_reuse_proxy_nonce
              When  using HTTP Proxy Digest access authentification scheme use
              first received nonce value in multiple following requests.

       -ftp_proxy_user $user
              Username for FTP proxy authentification.

       -ftp_proxy_pass $pass
              Password for FTP proxy authentification.

Protocol/Download Options

       -ftp_passive
              Uses passive ftp when downloading via ftp.

       -ftp_active
              Uses active ftp when downloading via ftp.

       -active_ftp_port_range $min:$max
              This option permits to specify the ports used  for  active  ftp.
              This  permits  easier  firewall configuration since the range of
              ports can be restricted.

              Pavuk will randomly choose a number from  within  the  specified
              range until an open port is found. Should no open ports be found
              within the given range, pavuk will default to a  normal  kernel-
              assigned port, and a message (debug level net) is output.

              The port range selected must be in the non-privileged range (eg.
              greater than or equal to 1024); it is STRONGLY RECOMMENDED  that
              the  chosen  range  be  large enough to handle many simultaneous
              active  connections  (for  example,   49152-65534,   the   IANA-
              registered ephemeral port range).

       -always_mdtm/-noalways_mdtm
              Force   pavuk  to  always  use  "MDTM"  to  determine  the  file
              modification time and never uses cached  times  determined  when
              listing the remote files.

       -remove_before_store/-noremove_before_store
              Force  unlink’ing  of  files  before  new content is stored to a
              file. This is helpful if the local files are hardlinked to  some
              other  directory  and after mirroring the hardlinks are checked.
              All "broken" hardlinks indicate a file update.

       -retry $nr
              Set the number  of  attempts  to  transfer  processed  document.
              Default  set  to  1,  this  mean  pavuk  will  retry once to get
              documents which failed on first attempt.

       -nregets $nr
              Set the number of allowed regets on a single document,  after  a
              broken transfer.  Default value for this option is 2.

       -nredirs $nr
              Set  number  of allowed HTTP redirects. (use this for prevention
              of loops) Default value for this option is  5,  and  conform  to
              HTTP specification.

       -force_reget/-noforce_reget
              Force  reget’ing  of  the whole document after a broken transfer
              when the server doesn’t support retrieving of  partial  content.
              Pavuk  default behavior is to stop getting documents which don’t
              allow restarting of transfer from specified position.

       -timeout $nr
              Timeout for stalled connections in minutes. This value  is  also
              used  for  connection  timeouts. For sub-minute timeouts you can
              use floating point numbers.  Default timeout is 0, an that means
              timeout checking is disabled.

       -noRobots/-Robots
              This switch suppresses the use of the robots.txt standard, which
              is used to restrict access of Web robots to  some  locations  on
              the  web server. Default is allowed checking of robots.txt files
              on  HTTP  servers.  Enable  this  option  always  when  you  are
              downloading  huge sets of pages with unpredictable layout.  This
              prevents you from upsetting server administrators :-).

       -noEnc/-Enc
              This switch suppresses using of  gzip  or  compress  or  deflate
              encoding in transfer. I don’t know if some servers are broken or
              what, but they are propagating that MIME  type  application/gzip
              or  application/compress  as encoded. Turn this option off, when
              you doesn’t have libz support compiled in and also gzip  program
              which  is  used to decode document encoded this way.  At default
              is decoding of downloaded document disabled.

       -check_size/-nocheck_size
              The option -nocheck_size should be used if  you  are  trying  to
              download  pages  from a HTTP server which sends a wrong Content-
              Length: field in the MIME header  of  response.   Default  pavuk
              behavior  is  to check this field and complain when something is
              wrong.

       -maxrate $nr
              If you don’t want to give all your transfer bandwidth to  pavuk,
              use  this  option  to  set  pavuk’s  maximum transfer rate. This
              option accepts a floating point number to specify  the  transfer
              rate in kB/s. If you want get optimal settings, you also have to
              play with the size of the read buffer (option -bufsize)  because
              pavuk  is  doing  flow  control  only  at application level.  At
              default pavuk use full bandwidth.

       -minrate $nr
              If you hate slow transfer rates, this option allows you to break
              transfers  with  slow  speed.  You  can set the minimum transfer
              rate, and if the connection gets slower than the given rate, the
              transfer  will be stopped. The minimum transfer rate is given in
              kB/s.  At default pavuk doesn’t check this limit.

       -bufsize $nr
              This option is used to specify  the  size  of  the  read  buffer
              (default  size:  32kB).  If you have a very fast connection, you
              may increase the size  of  the  buffer  to  get  a  better  read
              performance.  If you need to decrease the transfer rate, you may
              need to decrease the size of the  buffer  and  set  the  maximum
              transfer  rate with the -maxrate option. This option accepts the
              size of the buffer in kB.

       -fs_quota $nr
              If you are running pavuk on a multiuser system, you may need  to
              avoid  filling up your file system. This option lets you specify
              how many space must remain free. If pavuk detects an underrun of
              the  free  space,  it  will stop downloading files. Specify this
              quota in kB. Default value is 0, and that mean  no  checking  of
              this quota.

       -file_quota $nr
              This  option is useful when you want to limit downloading of big
              files, but want to download at  least  $nr  kilobytes  from  big
              files.   A big file will be transferred, and when it reaches the
              specified size, transfer  will  break.  Such  document  will  be
              processed  as properly downloaded, so be careful when using this
              option.   At  default  pavuk  is  transferring  full   size   of
              documents.

       -trans_quota $nr
              If you are aware that your selection should address a big amount
              of data, you  can  use  this  option  to  limit  the  amount  of
              transferred data.  Default is by size unlimited transfer.

       -max_time $nr
              Set  maximum  amount  of  time  for  program  run. After time is
              exceeded, pavuk will stop  downloading.  Time  is  specified  in
              minutes.  Default  value  is 0, and it means downloading time is
              not limited.

       -url_strategy $strategy
              This option allows you to specify a downloading order  for  URLs
              in  document tree.  This option accepts the following strings as
              parameters :

              level - will order URLs as it loads it from HTML files (default)
              leveli - as previous, but inline objects URLs come first
              pre  -  will  insert  URLs  from  actual HTML document at start,
              before other
              prei - as previous, but inline objects URLs come first

       -send_if_range/-nosend_if_range
              Send If-Range: header in HTTP request. I found  out,  that  some
              HTTP  servers  (greetings,  MS  :-)) are sending different ETag:
              fields in different responses for the same, unchanged  document.
              This  causes  problems  when  pavuk attempts to reget a document
              from such a server: pavuk will remember the old ETag  value  and
              uses  it it following requests for this document.  If the server
              checks it with the new ETag value and it differs, it will refuse
              to  send  only part of the document, and start the download from
              scratch.

       -ssl_version $v
              Set required SSL protocol version for SSL communication.  $v  is
              one of ssl2, ssl23, ssl3 or tls1.  This option is available only
              when compiled with SSL support.  Default is ssl23.

       -unique_sslid/-nounique_sslid
              This option can be used if you want to use a unique SSL  ID  for
              all  SSL  sessions.  Default pavuk behavior is to negotiate each
              time new  session  ID  for  each  connection.   This  option  is
              available only when compiled with SSL support.

       -use_http11/-nouse_http11
              This  option  is  used  to  switch between HTTP/1.0 and HTTP/1.1
              protocol used with  HTTP  servers.  Now  is  using  of  HTTP/1.1
              protocol  not  default  because its implementation is very fresh
              and not 100% tested. Even  though  using  of  HTTP/1.1  is  very
              recommended,  because  it  is faster than HTTP/1.0 and uses less
              network bandwidth for initiating  connections.  In  any  further
              version I will activate using of HTTP/1.1 as default.

       -local_ip $addr
              You  can  use this option when you want to use specified network
              interface for communication with other  hosts.  This  option  is
              suitable  for  multihomed hosts with several network interfaces.
              Address should be entered as regular IP address or as host name.

       -identity $str
              This  option  allows you to specify content of User-Agent: field
              of HTTP request.  This is usable, when scripts on remote  server
              returns  different  document on same URL for different browsers,
              or if some HTTP server refuse to serve document for  Web  robots
              like   pavuk.   Default   pavuk   sends   in  User-Agent:  field
              pavuk/$VERSION string.

       -auto_referer/-noauto_referer
              This option forces pavuk to send HTTP Referer: header field with
              starting  URLs.   Content  of this field will be self URL. Using
              this option is required, when remote server checks the  Referer:
              field.   At default pavuk wont send Referer: field with starting
              URLs.

       -referer/-noreferer
              This option allows to enable and  disable  the  transmission  of
              HTTP  Referer:  header  field.  At  default pavuk sends Referer:
              field.

       -httpad $str
              In some cases you  may  want  to  add  user  defined  fields  to
              HTTP/HTTPS  requests.   This option is exactly for this purpose.
              In $str you can directly specify content of  additional  header.
              If  you  specify  only  raw  header,  it  will  be used only for
              starting requests. When you want to use this  header  with  each
              request while crawling, prefix the header with + character.

       -del_after/-nodel_after
              This  option allows you to delete FILES from REMOTE server, when
              download is properly finished. At default is this option off.

       -FTPlist/-noFTPlist
              When option -FTPlist will be used, pavuk will  retrieve  content
              of FTP directories with FTP command LIST instead of NLST. So the
              same listing will be retrieved as with  "ls  -l"  UNIX  command.
              This  option  is required if you need to preserve permissions of
              remote files or you need  to  preserve  symbolic  links.   Pavuk
              supports  wide  listing  on FTP servers with regular BSD or SYSV
              style "ls -l"  directory  listing,  on  FTP  servers  with  EPFL
              listing format, VMS style listing, DOS/Windows style listing and
              Novel listing format.  Default pavuk behavior is to use NLST fro
              FTP directory listings.

       -ftp_list_options $str
              Some FTP servers require to supply extra options to LIST or NLST
              FTP commands to show all files and directories properly. But  be
              sure  not  to use any extra options which can reformat output of
              the listing. Useful is especially  -a  option  which  force  FTP
              server  to  show  also dot files and directories and with broken
              WuFTP servers it also helps to produce full  directory  listings
              not just files.

       -fix_wuftpd/-nofix_wuftpd
              This  option  is  result  of  several attempts to to get working
              properly the -remove_old option with WuFTPd server when -ftplist
              option  is  used. The problem is that FTP command LIST on WuFTPd
              don’t mind  when  trying  to  list  nonexisting  directory,  and
              indicates  success in FTP response code.  When you activate this
              option, pavuk uses extra FTP command  (STAT  -d  dir)  to  check
              whether the directory really exists. Don’t use this option until
              you are sure that you really need it!

Authentification

       -auth_file $file
              File where you  have  stored  authentification  information  for
              access  to  some  service. For file structure see below in FILES
              section.

       -auth_name $user
              If  you   are   using   this   parameter,   program   is   doing
              authentification  with  each  HTTP  access to document. Use this
              only if you know that only one HTTP server could be accessed  or
              use   -asite   option   to   specify   site  to  which  you  use
              authentification. Else your auth parameters will be sent to each
              accessed HTTP server.

       -auth_passwd $passwd
              Value of this parameter is used as password for authentification

       -auth_scheme {1/2/3/4/user/Basic/Digest/NTLM}
              This parameter specifies used authentification scheme.
              1 or user means user authentification scheme is used as  defined
              in  HTTP/1.0  or  HTTP/1.1.   Password  and  user  name are sent
              unencoded.
              2 or Basic  means  Basic  authentification  scheme  is  used  as
              defined  in  HTTP/1.0.   Password  and user name are sent BASE64
              encoded.
              3 or Digest means Digest access authentification scheme based on
              MD5 checksums as defined in RFC2069.
              4  or NTLM means NTLM proprietary access authentification scheme
              used by Microsoft IIS or  Proxy  servers.   When  you  use  this
              scheme,  you  must  also  specify  NT  or  LM domain with option
              -auth_ntlm_domain. This scheme is supported only  when  compiled
              with OpenSSL or libdes libraries.

       -auth_ntlm_domain $str
              NT  or  LM  domain used for authorization again HTTP server when
              NTLM  authentification  scheme  is  required.  This  option   is
              available only when compiled with OpenSSL or libdes libraries.

       -auth_reuse_nonce/-noauth_reuse_nonce
              While using HTTP Digest access authentification scheme use first
              received nonce value in more following requests.  Default  pavuk
              negotiates nonce for each request.

       -ssl_key_file $file
              File with public key for SSL certificate (learn more from SSLeay
              or OpenSSL documentation) This option  is  available  only  when
              compiled  with SSL support (you need SSleay or OpenSSL libraries
              and development headers)

       -ssl_cert_file $file
              Certificate file in  PEM  format  (learn  more  from  SSLeay  or
              OpenSSL  documentation)  This  option  is  available  only  when
              compiled with SSL support (you need SSleay or OpenSSL  libraries
              and development headers)

       -ssl_cer_passwd $str
              Password used to generate certificate (learn more from SSLeay or
              OpenSSL  documentation)  This  option  is  available  only  when
              compiled  with SSL support (you need SSLeay or OpenSSL libraries
              and development headers)

       -nss_cert_dir $dir
              Config  directory  for   NSS   (Netscape   SSL   implementation)
              certificates.   Usually   ~/.netscape   (created   by   Netscape
              communicator/navigator) or profile  directory  below  ~/.mozilla
              (created  by  Mozilla  browser).  The  directory  should contain
              cert7.db and  key3.db  files.  If  you  don’t  use  Mozilla  nor
              Netscape,  you  must  create this files by utilities distributed
              with  NSS  libraries.  Pavuk  opens  certificate  database  only
              readonly.   This option is available only when pavuk is compiled
              with SSL support provided by Netscape NSS SSL implementation.

       [-nss_accept_unknown_cert/-nonss_accept_unknown_cert]
              By default will pavuk reject  connection  to  SSL  server  which
              certificate  is not stored in local certificate database (set by
              -nss_cert_dir option).  You must explicitly force pavuk to allow
              connection to servers with unknown certificates.  This option is
              available only when pavuk is compiled with SSL support  provided
              by Netscape NSS SSL implementation.

       [-nss_domestic_policy/-nss_export_policy]
              Selects  sets  of  ciphers allowed/disabled by USA export rules.
              This option is available only when pavuk is  compiled  with  SSL
              support provided by Netscape NSS SSL implementation.

       -from $email
              This  parameter  is  used when accessing anonymous FTP server as
              password or is optionally  inserted  into  From  field  in  HTTP
              request.  If  not  specified  pavuk  discovers  this  from  USER
              environment variable and from site hostname.

       -send_from/-nosend_from
              This option is used for enabling or disabling  sending  of  user
              identification,  entered  in -from option, as FTP anonymous user
              password and From: field of HTTP request.  As  default  is  this
              option off.

       -ftp_login_handshake $host $handshake
              When you need to use nonstandard login procedure for some of FTP
              servers, you can use this option to change default  pavuk  login
              procedure.  To  allow more flexibility, you can assign the login
              procedure to some server or to all. When $host is  specified  as
              empty  string (""), than attached login procedure is assigned to
              all  FTP  servers  besides  those  having  assigned  own   login
              procedures.  In  the  $handshake parameter you can specify exact
              login procedure specified by FTP commands followed  by  expected
              FTP response codes delimited with backslash (\) characters.
              For  example  this  is  default  login procedure when logging in
              regular ftp server without going through  proxy  server  :  USER
              %u\331\PASS  %p\230.  There  are  two  commands  followed by two
              response codes. After USER command pavuk  expects  FTP  response
              code  331  and  after PASS command pavuk expects from server FTP
              response code 230. In ftp commands you can use following  macros
              which will be replaced by respective values:

               %u - user name used to access FTP server
               %p - password used to access FTP server
               %U - user name used to access FTP proxy server
               %P - password used to access FTP proxy server
               %h - hostname of FTP server
               %s - port number on which FTP server listens

Site/Domain/Port Limitation Options

       -asite $list
              Specify   comma   separated  list  of  allowed  sites  on  which
              referenced documents are stored.

       -dsite $list
              Specify comma separated  list  of  disallowed  sites.   Previous
              parameter  is  opposite  to  this one. If both are used the last
              occurrence of them is used to be valid.

       -adomain $list
              Specify  comma  separated  list  of  allowed  domains  on  which
              referenced documents are stored.

       -ddomain $list
              Specify  comma  separated  list  of disallowed domains. Previous
              parameter is opposite to this one. If both  are  used  the  last
              occurrence of them is used to be valid.

       -aport $list
              In $list, you can write comma separated list of ports from which
              you allow to download documents.

       -dport $list
              This option is opposite option to previous option. It is used to
              specify denied ports. If both -aport and -dport options are used
              the last occurrence of them is used to be valid  and  all  other
              occurrences will be omitted.

Limitation Document properties

       -amimet $list
              List  of  comma  separated  allowed MIME types. You can use with
              this option also wildcard patterns.

       -dmimet $list
              List of comma separated disallowed MIME types. You can use  with
              this  option  also  wildcard  patterns.   Previous  parameter is
              opposite to this one. If both are used the  last  occurrence  of
              them is used to be valid.

       -maxsize $nr
              Maximum  allowed  size of document.  This option is applied only
              when pavuk is able to detect the document  before  starting  the
              transfer.   Default  value  is  0, and it means this limit isn’t
              applied.

       -minsize $nr
              minimal allowed size of document.  This option is  applied  only
              when  pavuk  is  able to detect the document before starting the
              transfer.  Default value is 0, and it  means  this  limit  isn’t
              applied.

       -newer_than $time
              Allow  only  transfer  of documents with modification time newer
              than  specified  in  parameter  $time.  Format  of   $time   is:
              YYYY.MM.DD.hh:mm.   To  apply  this option pavuk must be able to
              detect modification time of document.

       -older_than $time
              Allow only transfer of documents with  modification  time  older
              than   specified   in  parameter  $time.  Format  of  $time  is:
              YYYY.MM.DD.hh:mm.  To apply this option pavuk must  be  able  to
              detect modification time of document.

       -noCGI/-CGI
              this   switch   prevents   to   transfer  dynamically  generated
              parametric documents through CGI  interface.  This  is  detected
              with  occurrence  of  ?  character  inside  URL.   Default pavuk
              behavior is to allow transfer of URLs with query strings.

       -alang $list
              this allows you to  specify  ordered  comma  separated  list  of
              preferred natural languages. This option work only with HTTP and
              HTTPS protocol using Accept-Language: MIME field.

       -acharset $list
              This options  allows  you  to  enter  comma  separated  list  of
              preferred encoding of transfered documents. This works only with
              HTTP and HTTPS urls and only  if  such  document  encodings  are
              located on destination server.
              example: -acharset iso-8859-2,windows-1250,utf8

Limitation Document name

       -asfx $list
              This  parameter  allows  you  to specify set of suffixes used to
              restrict selection of documents which will be processed.

       -dsfx $list
              Set  of  suffixes  that  are  used  to  specify  restriction  on
              selection of documents.  This one is inverse to previous option.
              They are segregating each other.

       -aprefix $list, -dprefix $list
              This two  options  allow  you  to  specify  set  of  allowed  or
              disallowed  prefixes  of  documents.  They  are segregating each
              other.

       -pattern $pattern
              This  option  allows  you  to  specify  wildcard   pattern   for
              documents.  All documents are tested if they match this pattern.

       -rpattern $reg_exp
              This  is  equal  option  as  previous,  but  this  uses  regular
              expressions.    Available  only  on  platforms  which  have  any
              supported RE implementation.

       -skip_pattern $pattern
              This option allows you to specify wildcard pattern for documents
              that  should be skipped.  All documents are tested if they match
              this pattern.

       -skip_rpattern $reg_exp
              This  is  equal  option  as  previous,  but  this  uses  regular
              expressions.    Available  only  on  platforms  which  have  any
              supported RE implementation.

       -url_pattern $pattern
              This option allows you to specify wildcard pattern for URLs. All
              URLs are tested if they match this pattern.
              Example:
              -url_pattern   http://\*.idata.sk:\*/~ondrej/\*  .  this  option
              enables all HTTP URLs from domain .idata.sk on all  ports  which
              are located under /~ondrej/.

       -url_rpattern $reg_exp
              This  is  equal  option  as  previous,  but  this  uses  regular
              expressions.   Available  only  on  platforms  which  have   any
              supported RE implementation.

       -skip_url_pattern $pattern
              This option allows you to specify wildcard pattern for URLs that
              should be skipped.  All URLs  are  tested  if  they  match  this
              pattern.

       -skip_url_rpattern $reg_exp
              This  is  equal  option  as  previous,  but  this  uses  regular
              expressions.   Available  only  on  platforms  which  have   any
              supported RE implementation.

       -aip_pattern $re
              This  option allows you to limit set of transferred documents by
              server IP address.  IP  address  can  be  specified  as  regular
              expressions, so it is possible to specify set of IP addresses by
              one expression.  Available only  on  platforms  which  have  any
              supported RE implementation.

       -dip_pattern $re
              This  option  similar to previous option, but is used to specify
              set of disallowed IP addresses.   Available  only  on  platforms
              which have any supported RE implementation.

       -tag_pattern $tag $attrib $url
              More  powerful  version  of -url_pattern option for more precise
              matching of allowed URLs based on HTML tag  name  pattern,  HTML
              tag  attribute  name  pattern and on URL pattern. You can use in
              all three parameters of  this  option  wildcard  patterns,  thus
              something  like  -tag_pattern*’  ’*url_pattern is equal to
              -url_pattern url_pattern. The $tag and  $attrib  parameters  are
              always  matched again uppercase strings. For example if you want
              just  let  pavuk  follow  only  regular   links   ignoring   any
              stylesheets, images, etc., use option -tag_pattern A HREF*’.

       -tag_rpattern $tag $attrib $url
              This   is   variation  on  the  -tag_pattern.  It  uses  regular
              expression patterns in parameters instead of  wildcard  patterns
              used in the previous option.

Limitation Protocol Option

       -noHTTP/-HTTP
              This  switch  suppresses  all  transfers  through HTTP protocol.
              Default is transfer trough HTTP enabled.

       -noSSL/-SSL
              This switch suppresses  all  transfers  through  HTTPS  protocol
              (HTTP  protocol  over  SSL)  .  Default is transfer trough HTTPS
              enabled.  This option is available only when compiled  with  SSL
              support  (you  need  SSleay or OpenSSL libraries and development
              headers)

       -noGopher/-Gopher
              Suppress  all  transfers  through  Gopher   Internet   protocol.
              Default is transfer trough Gopher enabled.

       -noFTP/-FTP
              This  switch  prevents processing documents allocated on all FTP
              servers.  Default is transfer trough FTP enabled.

       -noFTPS/-FTPS
              This switch prevents processing documents allocated on  all  FTP
              servers  accessed  through SSL.  Default is transfer trough FTPS
              enabled.  This option is available only when compiled  with  SSL
              support  (you  need  SSleay or OpenSSL libraries and development
              headers)

       -FTPhtml/-noFTPhtml
              By using of option -FTPhtml you can force pavuk to process  HTML
              files  downloaded  with  FTP  protocol.   At default pavuk won’t
              parse HTML files from FTP servers.

       -FTPdir/-noFTPdir
              Force recursive processing of FTP directories too.   At  default
              is recursive downloading from FTP servers denied.

       -disable_html_tag $TAG,[$ATTRIB][;...]
              -enable_html_tag   $TAG,[$ATTRIB][;...]    Enable   or   disable
              processing of particular HTML tags or  attributes.   At  default
              all supported HTML tags are enabled.

              For  example  if you don’t want to process all images you should
              use option -disable_html_tagIMG,SRC;INPUT,SRC;BODY,BACKGROUND.

Other Limitation Options

       -subdir $dir
              Subdirectory of local tree directory, to limit some of the modes
              {sync, resumeregets, linkupdate} in its tree scan.

       -dont_leave_site/-leave_site
              (Don’t) leave starting site. At default pavuk can span host when
              recursing through WWW tree.

       -dont_leave_dir/-leave_dir
              (Don’t)  leave  starting directory. If -dont_leave_dir option is
              used pavuk will stay only in starting directory  (including  its
              own  subdirectories).   At  default  pavuk  can  leave  starting
              directories.

       -leave_site_enter_dir/-dont_leave_site_enter_dir
              If you are downloading WWW tree which spans multiple hosts  with
              huge  trees, you may want to allow downloading of document which
              are in directory hierarchy below directory which we  visited  as
              first    on    each   site.   To   obtain   this,   use   option
              -dont_leave_site_enter_dir. As default pavuk  will  go  also  to
              higher directory levels on that site.

       -lmax $nr
              Set maximum allowed level of tree traverse. Default is set to 0,
              what means that pavuk can traverse at infinitum.  As of  version
              0.8pl1  inline objects of HTML pages are placed at same level as
              parent HTML page.

       -leave_level $nr
              Maximum level of documents outside from site  of  starting  URL.
              Default is set to 0, and 0 means that checking is not applied.

       -site_level $nr
              Maximum  level  of  sites  outside  from  site  of starting URL.
              Default is set to 0, and 0 means that checking is not applied.

       -dmax $nr
              Set maximum allowed number  of  documents  that  are  processed.
              Default  value  is  0.   That  means no restrictions are used in
              number of processed documents.

       -singlepage/-nosinglepage
              Using option -singlepage allows you to transfer just HTML  pages
              with all its inlined objects (pictures, sounds, frame documents,
              ...).  As default is disabled single page transfer. This  option
              makes -mode singlepage option obsolete.

       -limit_inlines/-dont_limit_inlines
              With  this option you can control whether limiting options apply
              also to inline objects (pictures, sounds, ...). This  is  useful
              when  you  want to download specified set of HTML pages with all
              inline options without any restrictions.

       -user_condition $str
              Script or program name for users own conditions.  You can  write
              any  script  which should with exit value decide if download URL
              or not.  Script gets from pavuk any number of options, with this
              meaning :

                 -url $url - processed URL
                 -parent $url - any number of parent URLs
                 -level $nr - level of this URL from starting URL
                 -size $nr - size of requested URL
                 -date  $datenr - modification time of requested URL in format
                 YYYYMMDDhhmmss

              The exit status 0 of script or program means  that  current  URL
              should be rejected and nonzero exit status means that URL should
              be accepted.
              Warning : use user conditions only if required  because  of  big
              slowdowns caused by forking scripts for each checked URL.

       -follow_cmd $str
              This option allows you to specify script or program which can by
              its exit status decide whether to follow URLs from current  HTML
              document. This script will be called after download of each HTML
              document.   The  script  will  get  following  options  as  it’s
              parameters:

                 -url $url - URL of current HTML document
                 -infile $file - local file where is stored HTML document

              The  exit  status  0  of  script or program means that URLs from
              current document will be disallowed, other  exit  status  means,
              that pavuk can follow links from current HTML document.

Javascript support

       Support for scripting languages like JavaScript or VBScript in pavuk is
       done bit hacky way. There is no interpreter for this languages, so  not
       all things will work. Whole support which pavuk have for this scripting
       languages is based on regular expression patterns  specified  by  user.
       Pavuk search for this patterns in DOM event attributes of HTML tags, in
       javascript:... URLs, in  inline  scripts  in  HTML  documents  enclosed
       between  <script></script>  tags  and  in  separate  javascript  files.
       Support for  scripting  languages  is  only  available  when  pavuk  is
       compiled with proper regular expression library (POSIX/GNU/PCRE).

       -enable_js/-disable_js
              This  options  are  used  to  enable  or  disable  processing of
              Javascript parts of HTML documents. You must enable this  option
              to be able to use processing of javascript patterns.

       -js_pattern $re
              With   this  option  you  are  specifying  what  patterns  match
              interested  parts  of  Javascript  for  extracting   URLs.   The
              parameter  must  be RE pattern with exactly one subpattern which
              match exactly  the  URL  part.  For  example  to  match  URL  in
              following type of javascript expressions :
                document.b1.src=’pics/button1_pre.jpg’
              you can use this pattern
                "^document.[a-zA-Z0-9_]*.src[ ]*=[ ]*’(.*)’$"

       -js_transform $p $t $h $a
              This  option  is  similar  to  previous,  but you can use custom
              transform rules for the URL parts of patterns and  also  specify
              the exact HTML tag and attribute where to look for this pattern.
              The $p is the pattern to match the interested  part  of  script.
              The  $t  is transform rule for the URL, in this parameter the $x
              parts will be replaced by x-th subpattern of the $p pattern. The
              $h  parameter  is  exact  HTML  tag  or  "*"  when this apply to
              javascript: URLs or DOM event attribs or "" (empty string)  when
              this  apply  to  javascript body of HTML document or separate JS
              file. The $a parameter is exact HTML attrib of tag or ""  (empty
              string) when this rule apply to javascript body.

       -js_transform2 $p $t $h $a
              This  option  is  very  similar  to previous. The meaning of all
              parameters is same, just  the  pattern  $p  can  have  only  one
              substring  which  will be used in the transform rule $t. This is
              required to allow  rewriting  of  URL  parts  of  the  tags  and
              scripts.  This  option  can  also  be  used  to  force  pavuk to
              recognize  HTML  targ/attribute  pairs  which  pavuk  does   not
              support.

Cookie

       -cookie_file $file
              File  where  are  stored  cookie  infos.  This  file  must be in
              Netscape cookie file format (generated with  Netscape  Navigator
              or Communicator ...).

       -cookie_send/-nocookie_send
              Use  collected  cookies  in HTTP/HTTPS requests.  Pavuk will not
              send at default cookies.

       -cookie_recv/-nocookie_recv
              Store received cookies from  HTTP/HTTPS  responses  into  memory
              cookie  cache.   At  default  pavuk  will  not remember received
              cookies.

       -cookie_update/-nocookie_update
              Update cookie file on disk and synchronize it with changes  made
              by  any  concurrent processes.  At default pavuk will not update
              cookie file on disk.

       -cookies_max $nr
              Maximum number of cookies in memory cookie cache.  Default value
              is 0, and that means no restrictions for cookies number.

       -disabled_cookie_domains $list
              Comma-separated  list  of  cookie domains which are permitted to
              send cookies stored into cookie cache

       -cookie_check/-nocookie_check
              Check when receiving cookie, if cookie domain is equal to domain
              of  server  which  sends  this cookie. At default pavuk check is
              server is setting cookies for its domain, and if it tries to set
              cookie  for  foreign  domain  pavuk will complain about that and
              will reject such cookie.

HTML rewriting engine tuning options

       -noRelocate/-Relocate
              This switch prevents the program to  rewrite  relative  URLs  to
              absolute,  after  HTML  document  is  transfered.  Default pavuk
              behavior is to maintain link consistence of HTML  documents.  So
              always  when  HTML document is downloaded pavuk will rewrite all
              URLs to point to local document if it is available and if it  is
              not  available  it will point to remote document. After document
              is  properly  downloaded,  pavuk  will  update  links  in   HTML
              documents, which point to this one.

       -all_to_local/-noall_to_local
              This option forces pavuk to change all URLs inside HTML document
              to local URLs immediately after download of document. Default is
              this option disabled.

       -sel_to_local/-nosel_to_local
              This  option  forces  pavuk to change all URLs, which accomplish
              conditions  for  download,  to  local   inside   HTML   document
              immediately after download of document.  I recommend to use this
              option, when you are sure, that transfer  will  be  without  any
              problems. This option can save a lot of processor time.  Default
              is this option disabled.

       -all_to_remote/-noall_to_remote
              This option forces pavuk to change all URLs inside HTML document
              to  remote URLs immediately after download of document.  Default
              is this option disabled.

       -post_update/-nopost_update
              This option is especially designed to allow in  -fnrules  option
              doing  rules  based on MIME type of document. This option forces
              pavuk to generate local names for  documents  just  after  pavuk
              knows what is the MIME type of document. This have big impact on
              the rewriting engine of links inside HTML documents. This option
              causes  disfunction  of  other  options for controlling the link
              rewriting engine. Use this option only when you  know  what  you
              are doing :-)

       -dont_touch_url_pattern $pat
              This   options  serves  to  deny  rewriting  and  processing  of
              particular URLs  in  HTML  documents  by  pavuk  HTML  rewriting
              engine.  This  option  accepts wildcard patterns to specify such
              URLs. Matching is done against untouched URLs so when he URL  is
              relative,  you  must use pattern which matches the relative URL,
              when it is absolute, you must use absolute URL.

       -dont_touch_url_rpattern $pat
              This option is variation  on  previous  option.  This  one  uses
              regular  patterns  for  matching  of  URLs  instead  of wildcard
              patterns used by -dont_touch_url_pattern option. This option  is
              available  only  when pavuk is compiled with support for regular
              expression patterns.

       -dont_touch_tag_rpattern $pat
              This option is variation on previous option,  just  matching  is
              made  on  full  HTML  tag  with included <>. This option accepts
              regular expression patterns. It is available only when pavuk  is
              compiled with support for regular expression patterns.

Filename/URL Conversion Option

       -tr_del_chr $str
              All  characters found in $str will be deleted from local name of
              document.  $str should contain escape sequences similar like  in
              tr command:
              \n - newline
              \r - carriage return
              \t - horizontal tab space
              \0xXX - hexadecimal  ASCII value
              [:upper:] - all uppercase letters
              [:lower:] - all lowercase letters
              [:alpha:] - all letters
              [:alnum:] - all letters and digits
              [:digit:] - all digits
              [:xdigit:] - all hexadecimal digits
              [:space:] - all horizontal and vertical whitespace
              [:blank:] - all horizontal whitespace
              [:cntrl:] - all control characters
              [:print:] - all printable characters including space
              [:nprint:] - all non printable characters
              [:punct:] - all punctation characters
              [:graph:] - all printable characters excluding space

       -tr_str_str $str1 $str2
              String  $str1  from local name of document will be replaced with
              $str2.

       -tr_chr_chr $chrset1 $chrset2
              Characters from $chrset1 from local name  of  document  will  be
              replaced  with  corresponding character from $chrset2. $charset1
              and $charset2 should have same syntax  as  $str  in  -tr_del_chr
              option.

       -store_name $str
              When  you want to change local filename of first file downloaded
              with singlepage mode, you should use this option.

       -index_name $str
              With this option you can change directory index name. As default
              is used _._.html .

       -store_index/-nostore_index
              With  option -nostore_index you should deny storing of directory
              indexes into HTML files.

       -fnrules $t $m $r
              This is a very powerful option! This option is used to  flexible
              change   layout   of  local  document  tree.  It  accepts  three
              parameters. First parameter $t is  used  to  say  what  type  is
              following  pattern.   F  is  used  for  wildcard  pattern  (uses
              fnmatch()) and R is used for regular expression  pattern  (using
              any  supported RE implementation).  Second parameter is matching
              pattern used to select URLs for this rule.  If  URL  match  this
              pattern,  then  local  name  for  this URL is computed following
              rules of third parameter.  And third  parameter  is  local  name
              building  rule.  Pavuk  now  supports  two  kinds  of local name
              building rules. One is simple based only on  simple  macros  and
              other  more  complicated  extended  rule,  which also enables to
              perform several functions.  Recognition between those two  kinds
              of rules is done by looking at first character of rule.  In case
              when first character is ’(’, rule is extended and in  all  other
              cases it is the simple kind of rule.

              Simple  rule  should contain literals or escaped macros.  Macros
              are escaped by % character or by $ character.

              Here is list of recognized macros:

              $x - where x is any positive number. This macro is replaced with
              x-th  substring matched by RE pattern. (If you use this you need
              to understand RE !)
              %i - is replaced with protocol id (http, https, ftp, gopher)
              %p - is replaced with password. (use this only when usable)
              %u - is replaced with username.
              %h - is replaced with host name.
              %m - is replaced with domain name.
              %r - is replaced with port number.
              %d - is replaced with path to document.
              %n - is replaced with document name.
              %b - is replaced with basename of document (without  extension).
              %e - is replaced with extension.
              %s - is replaced with searchstring.
              %M  - is replaced with MIME type of document. When you are using
              this macro, you *must* use  also  -post_update  option  else  it
              won’t work.
              %E - is replaced with default extension assigned to MIME type of
              document. When you are using this macro,  you  *must*  use  also
              -post_update option else it won’t work.
              %x  - where x is positive number. This macro is replaced with x-
              th directory from path to document from beginning.
              %-x - where x is positive number. This macro is replaced with x-
              th directory from path to document from end.

              Here  is  example.  If  you  want  place  document  into  single
              directories by  extension,  you  should  use  following  fnrules
              option:
              -fnrules F*’ ’/%e/%nExtended  rule ever begins with character ´(’. It uses some kind
              of LISP like syntax.

              Here are base rules for writing extended rules  :  -  the  local
              filename of of this kind is return value function
              - each function is enclosed inside round braces ()
              - first token right after opening brace is function name
              - each function have nonzero fixed number of parameters
              - each function returns numeric or string value
              -  function  parameters  are  separated  by  any number of space
              characters
              - parameter of function should be string, number, macro or other
              function
              - string is ever quoted with "
              -  each  numeric  parameter  can be in any encoding supported by
              strtod() function (octal, decimal, hexadecimal, ...)
              - there is no implicit conversion from number to string
              - each macro is prefixed by % character  and  is  one  character
              long
              -  each  macro  is  replaced  by  its string representation from
              current URL
              - function parameters are typed strictly
              - toplevel function must return string value

              Extended rule supports full set of %  escaped  macros  supported
              with simple rules, plus two following addition macros :
              %U - URL string
              %o - default localname for URL

              Here is description of all supported functions

              sc - concat two string parameters
                 - accepts two string parameters
                 - returns string value
              ss - substring form string
                 - accepts three parameters.
                   - first is string from which we want to cut subpart
                   -  second  is  number which represents starting position in
              string
                   - third is  number  which  represents  ending  position  in
              string
                 - returns string value
              hsh - compute modulo hash value from string with specified base
                 - accepts two parameters
                   - first is string for which we are computing the hash value
                   - second is numeric value for base of modulo hash
                 - returns numeric value
              md5 - compute MD5 checksum for string
                 - accepts one string value
                 - returns string which represents MD5 checksum
              lo - convert all characters inside string to lower case
                 - accepts ane string value
                 - returns string value
              up - convert all characters inside string to upper case
                 - accepts one string value
                 - returns string value
              ue - encode unsafe characters in string with same encoding which
              is  used  for  encoding  unsafe  characters  inside URL (%xx) As
              default are encoded all nonascii values when  this  function  is
              used.
                 - accepts two string values
                   - first is string which we want to encode
                   - second is string which contains unsafe characters
                 - return string value
              dc  -  delete  unwanted  characters  from  string  (have similar
              functionality as -tr_del_chr option)
                 - accepts two string values
                   - first is string from which we want delete
                   - second is string which contains  characters  we  want  to
              delete.
                 - returns string value
              tc  -  replace  character  with  other character in string (have
              similar functionality as -tr_chr_chr option)
                 - accepts three string values
                   -  first  is  string  inside  which  we  want  to   replace
              characters
                   - second is set of characters which we want to replace
                   - third is set of characters with which we are replacing
                 - returns string value
              ts  -  replace  some  string inside string with any other string
              (have similar functionality as -tr_str_str option)
                 - accepts three string values
                   - first is string inside which we want to replace string
                   - second is the from string
                   - third is to string
                 - returns string value
              spn - calculate initial length of  string  which  contains  only
              specified  set  of  characters.   (have  same  functionality  as
              strspn() libc function)
                 - accepts two string values
                   - first is input string
                   - second is set of acceptable characters
                 - returns numeric value
              cspn - calculate initial length of string which doesn’t  contain
              specified  set  of  characters.   (have  same  functionality  as
              strcspn() libc function)
                 - accepts two string values
                   - first is input string
                   - second is set of unacceptable characters
                 - returns numeric value
              sl - calculate length of string
                 - accepts one string value
                 - returns numeric value
              ns - convert number to string by format
                 - accepts two parameters
                   - first parameter is format string  same  as  for  printf()
              function
                   - second is number which we want to convert
                 - returns string value
              lc  -  return position of last occurrence of specified character
              inside string
                 - accepts two string parameters
                   - first string which we are searching in
                   - second string contains character for which we are looking
              for
                 - returns numeric value
              + - add two numeric values
                 - accepts two numeric values
                 - returns numeric value
              - - subtract two numeric values
                 - accepts two numeric values
                 - returns numeric value
              % - modulo addition
                 - accepts two numeric values
                 - returns numeric value
              * - multiple two numeric values
                 - accepts two numeric values
                 - returns numeric value
              / - divide two numeric values
                 - accepts two numeric values
                 - returns numeric value
              rmpar - remove parameter from query string
                - accepts two string
                  - first string is string which we are adjusting
                  -  second  parameter  is  name  of parameter which should be
              removed
                - returns adjusted string
              getval - get query string parameter value
                - accepts two string
                  - first string  is  query  string  from  which  to  get  the
              parameter
                    value (usually %s)
                  -  second  string  is name of parameter for which we want to
              get
                    the value
                - returns value of the parameter  or  empty  string  when  the
              parameter
                  doesn’t exists
              sif - logical decision
                - accepts three parameters
                  -  first  is  numeric  and  when is zero than result of this
              decision
                    is result of second parameter, else result  is  result  of
              third
                    parameter
                  - second parameter is string
                  - third parameter is string
                - returns string result of decision
              ! - logical not
                - accepts one numeric parameter
                - returns negation of parameter
              & - logical and
                - accept two numeric parameters
                - returns logical and of parameters
              | - logical or
                - accept two numeric parameters
                - returns logical or of parameters
              getext - get file extension
                - accept one sting (filename or path)
                - return string containing extension of parameter
              seq - compare two strings
                - accepts two strings for comparison
                - returns numeric value 0 - if different 1 - if equal
              jsf - execute JavaScript function
                - accepts one string parameter which holds name of
                  JavaScript function specified in script loaded with
                  -js_script_file option.
                - returns string value equal to return value of
                  JavaScript function
                - this function is available only when pavuk is compiled
                  with support for JavaScript bindings

              For  example,  if you are mirroring very huge number of internet
              sites into  same  local  directory,  too  much  entries  in  one
              directory,  should  cause  performance problems. You may use for
              example hsh or md5 functions to generate one additional level of
              hash directories based on hostname whit one of following options
              :

              -fnrules F * (sc (nc "%02d/" (hsh %h 100)) %o)
              -fnrules F * (sc (ss (md5 %h) 0 2) %o)

       -base_level $nr
              Number of directory levels to omit in local tree.

              For         example         when         downloading         URL
              ftp://ftp.idata.sk/pub/unix/www/pavuk-0.7pl1.tgz  you  enter  at
              command line  -base_level  4  in  local  tree  will  be  created
              www/pavuk-0.7pl1.tgz                                         not
              ftp/ftp.idata.sk_21/pub/unix/www/pavuk-0.7pl1.tgz as normally.

       -default_prefix $str
              Default prefix of mirrored directory. This option is  used  only
              when  you  are trying to synchronize content of remote directory
              which was downloaded using -base_level option. Also you must use
              directory   based   synchronization   method,   not   URL  based
              synchronization method. This is especially useful, when used  in
              conjunction with -remove_old option.

       -remove_adv/-noremove_adv
              This  option is used for turn on/off of removing HTML tags which
              contains advertisement banners.  The  banners  are  not  removed
              from  HTML file, but are commented out.  Such URLs also will not
              be downloaded.  This option have  effect  only  when  used  with
              option   -adv_re.   Default  is  turned  off.   This  option  is
              available  only  when  your  system  have  support  for  one  of
              supported regular expressions implementation.

       -adv_re $RE
              This  option is used to specify regular expressions for matching
              URLs  of  advertisement  banners.    For   example   :   -adv_re
              http://ad.doubleclick.net/.*   is  used  to match all files from
              server ad.doubleclick.net.  This option is available  only  when
              your    system    have   any   supported   regular   expressions
              implementation.

       -unique_name/-nounique_name
              Pavuk as default always attempts to assign to unique URL  unique
              local  filename.   If  this  behavior is not wanted, you can use
              option -nounique_name to disable this.

Other Options

       -sleep $nr
              This option allows you to specify number of seconds during  that
              the  program  will be suspended between two transfers. Useful to
              deny server overload.  Default value for this option is 0.

       -rsleep/-norsleep
              When this option is active, pavuk randomizes the the sleep  time
              between  transfers  in interval between zero and value specified
              with -sleep option. Default is this option inactive.

       -ddays $nr
              If document has modification time later as  $nr  days,  then  in
              sync mode pavuk attempts to retrieve newer copy of document from
              remote server. Default value is 0.

       -remove_old/-noremove_old
              Remove improper documents (that, which doesn’t exist  on  remote
              site).   This  option  have  effect  only when used in directory
              based sync mode.  When used with URL based sync mode, pavuk will
              not  remove any old files which were excluded from document tree
              and are not referenced in any HTML document.  You must also  use
              option -subdir, to let pavuk find files which belongs to current
              mirror.  As default pavuk won’t remove any old files.

       -browser $str
              is used to set your browser command (in URL tree dialog you  can
              use  right click to raise menu, from which you can start browser
              on actually selected URL).  This option is available  only  when
              compiled with GTK GUI and with support for URL tree preview.

       -debug/-nodebug
              turns  on displaying of debug messages. This option is available
              only when compiled with -DDEBUG.  If -debug option is used pavuk
              will  output verbose information about documents, whole protocol
              level information, locking informations  and  more  (depends  on
              -debug_level  setup).  This options is used just like trigger to
              enable output of debug messages selected by -debug_level option.
              Default is debug mode turned off.

       -debug_level $level
              Set  level of required debug informations. $level can be numeric
              value which represent binary mask for requested debug levels, or
              comma separated list of supported debug levels.  Currently pavuk
              supports following debug levels :
              html - for HTML parser debugging
              protos - to see server side protocol messages
              protoc - to see client side protocol messages
              procs - to see some special procedure calls
              locks - for debugging of documents locking
              net - for debugging some low level network stuff
              misc - for miscellaneous unsorted debug messages
              user - for verbose user level messages
              all - request all currently supported debug levels
              mtlock - locking of resources in multithreading environment
              mtthr  -  launching/weaking/sleeping/stoping   of   threads   in
              multithreaded environment
              protod - for DEBUGGING of POST requests
              limits - for debugging limiting options, you will see the reason
              why particular URLs are  rejected  by  pavuk  and  which  option
              caused this.
              ssl - to enable verbose reporting about SSL related things.

       -remind_cmd $str
              This  option  have  effect  only  when running pavuk in reminder
              mode. To command specified with this option pavuk  sends  result
              of  running  reminder  mode.   There  are  listed URLs which are
              changed and URLs which have any errors.  Default remind  command
              is "mailx user@server -s \"pavuk reminder result\"" .

       -nscache_dir $dir
              Path  to  Netscape  browser cache directory. If you specify this
              path, pavuk attempts to find out if you have URL in this  cache.
              If  URL  is there it will be fetched else pavuk will download it
              from net. The cache directory index file must be named  index.db
              and  must  be  located  in the cache directory.  To support this
              feature, pavuk have to be linked with BerkeleyDB 1.8x .

       -mozcache_dir $dir
              Path to Mozilla browser cache directory. Same  functionality  as
              with  previous option, just for different browser with different
              cache formats.  Pavuk supports both formats of  Mozilla  browser
              disk  cache  (old for versions <0.9 and new used in 0.9=<).  The
              old format cache directory must contain  cache  directory  index
              database  with  name  cache.db.  Then new format cache directory
              must  contain  map  file  _CACHE_MAP_,  and  three  block  files
              _CACHE_001_,  _CACHE_002_,  _CACHE_003_.  To support old Mozilla
              cache format, pavuk have to be linked with BerkeleyDB 1.8x.  New
              Mozilla cache format doesn’t require any external library.

       -post_cmd $str
              Post-processing command, which will be executed after successful
              download of document.  This  command  may  somehow  handle  with
              document.  During  time  of  running  this command, pavuk leaves
              actual document locked, so there isn’t chance  that  some  other
              pavuk process will modify document.  This postprocessing command
              will get three additional parameters from pavuk.
                 - local name of document
                 - 1/0 1 if document is HTML document, 0 if not
                 - original URL of this document

       -hack_add_index/-nohack_add_index
              This is bit hacky option. It forces pavuk to add  to  URL  queue
              also directory indexes of all queued documents. This allow pavuk
              to download more documents from site, than it is able achieve in
              normal  traversing  of  HTML documents.  Bit dirty but useful in
              some cases.

       -js_script_file $file
              Pavuk have optionally builtin JavaScript  interpreter  to  allow
              high  level customization of some internal procedures. Currently
              you are allowed to customize with your own JavaScript  functions
              two  things.  You can use it to set precise limiting options, or
              you can write own functions which can be used  inside  rules  of
              -fnrules  option.   With  this  option  you  can load JavaScript
              script  with   functions   into   pavuks   internal   JavaScript
              interpreter. To learn more about this capabilities read separate
              document jsbind.txt which comes with pavuk sources  in  toplevel
              directory.  This option is available only when you have compiled
              pavuk with support for JavaScript bindings.

EXIT STATUS

       As of version 0.9pl29 pavuk have changed indication of status  by  exit
       codes.   In earlier versions exit status 0 was for no error and nonzero
       exit status was something like  count  of  failed  documents.   In  all
       version after 0.0pl29 there are defined following exit codes:

           0 - no error, everything is OK
           1 - error in configuration of pavuk options or
               error in config files
           2 - some error occurred while downloading documents

ENVIRONMENTAL VARIABLES

       USER   variable  is  used  to  construct  email  address  from user and
              hostname

       LC_* or LANG
              used to set internationalized environment

       PAVUKRC_FILE
              with this variable you can specify alternative location for your
              pavukrc configuration file.

REQUIRED EXTERNAL PROGRAMS

       at     is used for scheduling.

       gunzip is used to decode gzip or compress encoded documents.

Bugs

       If you find any, please let me know.

FILES

       @SYSCONFDIR@/pavukrc

       ~/.pavukrc

       ~/.pavuk_prefs

              These  files  are  used as default configuration files.  You may
              specify there some constant values like  your  proxy  server  or
              your   preferred  WWW  browser.  Configuration  options  reflect
              command line options.  Not all parameters are suitable  for  use
              in  default  configuration file.  You should select only some of
              them, which you really need.

              File ~/.pavuk_prefs is special file which contains automatically
              stored  configuration.   This file is used only when running GUI
              interface of pavuk and option -prefs is active.

              First (if present)  parsed  file  is  @SYSCONFDIR@/pavukrc  then
              ~/.pavukrc (if present), then ~/.pavuk_prefs (if present).  Last
              the command line is parsed. The precedence is as follows :

              - highest -
              Entered in user interface
              Entered in command line
              ~/.pavuk_prefs
              ~/.pavukrc
              @SYSCONFDIR@/pavukrc
              - lowest -

              Here is table of config file - command line options pairs.

              MaxLevel:                  --->  -lmax
              MaxDocs:                   --->  -dmax
              MaxSize:                   --->  -maxsize
              MinSize:                   --->  -minsize
              SleepBetween:              --->  -sleep
              MaxRetry:                  --->  -retry
              MaxRegets:                 --->  -nregets
              MaxRedirections:           --->  -nredirs
              CommTimeout:               --->  -timeout
              RegetRollbackAmount:       --->  -rollback
              DocExpiration:             --->  -ddays
              UseCache:                  --->  -nocache
              UseRobots:                 --->  -noRobots
              AllowFTP:                  --->  -noFTP
              AllowHTTP:                 --->  -noHTTP
              AllowSSL:                  --->  -noSSL
              AllowGopher:               --->  -noGopher
              AllowCGI:                  --->  -noCGI
              AllowGZEncoding:           --->  -noEnc
              AllowFTPRecursion:         --->  -FTPdir
              ForceReget:                --->  -force_reget
              Debug:                     --->  -debug
              AllowedSites:              --->  -asite
              DisallowedSites:           --->  -dsite
              AllowedDomains:            --->  -adomain
              DisallowedDomains:         --->  -ddomain
              AllowedPrefixes:           --->  -aprefix
              DisallowedPrefixes:        --->  -dprefix
              AllowedSuffixes:           --->  -asfx
              DisallowedSuffixes:        --->  -dsfx
              AllowedMIMETypes:          --->  -amimet
              DisallowedMIMETypes:       --->  -dmimet
              PreferredLanguages:        --->  -alang
              PreferredCharset:          --->  -acharset
              WorkingDir:                --->  -cdir
              WorkingSubDir:             --->  -subdir
              HTTPAuthorizationScheme:   --->  -auth_scheme
              HTTPAuthorizationName:     --->  -auth_name
              HTTPAuthorizationPassword: --->  -auth_passwd
              AuthReuseDigestNonce:      --->  -auth_reuse_nonce
              SSLCertPassword:           --->  -ssl_cert_passwd
              SSLCertFile:               --->  -ssl_cert_file
              SSLKeyFile:                --->  -ssl_key_file
              EmailAddress:              --->  -from
              MatchPattern:              --->  -pattern
              REMatchPattern:            --->  -rpattern
              SkipMatchPattern:          --->  -skip_pattern
              SkipREMatchPattern:        --->  -skip_rpattern
              URLMatchPattern:           --->  -url_pattern
              URLREMatchPattern:         --->  -url_rpattern
              SkipURLMatchPattern:       --->  -skip_url_pattern
              SkipURLREMatchPattern:     --->  -skip_url_rpattern
              DefaultMode:               --->  -mode
              FTPProxy:                  --->  -ftp_proxy
              HTTPProxy:                 --->  -http_proxy
              SSLProxy:                  --->  -ssl_proxy
              GopherProxy:               --->  -gopher_proxy
              FTPViaHTTPProxy:           --->  -ftp_httpgw
              GopherViaHTTPProxy:        --->  -gopher_httpgw
              HTTPProxyUser:             --->  -http_proxy_user
              HTTPProxyPass:             --->  -http_proxy_pass
              HTTPProxyAuth:             --->  -http_proxy_auth
              AuthReuseProxyDigestNonce: --->  -auth_reuse_proxy_nonce
              Browser:                   --->  -browser
              ScenarioDir:               --->  -scndir
              ShowProgress:              --->  -progress
              XMaxLogSize:               --->  -xmaxlog
              LogFile:                   --->  -logfile
              RemoveOldDocuments:        --->  -remove_old
              AuthFile:                  --->  -auth_file
              BaseLevel:                 --->  -base_level
              FTPDirtyProxy:             --->  -ftp_dirtyproxy
              ActiveFTPData:             --->  -ftp_active/-ftp_passive
              ActiveFTPPortRange:        --->  -active_ftp_port_range
              AlwaysMDTM:                --->  -always_mdtm/-noalways_mdtm
              RemoveBeforeStore:         --->  -(no)remove_before_store
              ShowDownloadTime:          --->  -stime
              NLSMessageCatalogDir:      --->  -msgcat
              Quiet:                     --->  -quiet/-verbose
              NewerThan:                 --->  -newer_than
              OlderThan:                 --->  -older_than
              Reschedule:                --->  -reschedule
              DontLeaveSite:             --->  -dont_leave_site/-leave_site
              DontLeaveDir:              --->  -dont_leave_dir/-leave_dir
              PreserveTime:              --->  -preserve_time/-nopreserve_time
              LeaveLevel:                --->  -leave_level
              GUIFont:                   --->  -gui_font
              UserCondition:             --->  -user_condition
              CookieFile:                --->  -cookie_file
              CookieSend:                --->  -cookie_send/-nocookie_send
              CookieRecv:                --->  -cookie_recv/-nocookie_recv
              CookieUpdate:              --->  -cookie_update/-nocookie_update
              CookiesMax:                --->  -cookies_max
              CookieCheckDomain:         --->  -cookie_check/-nocookie_check
              DisabledCookieDomains:     --->  -disabled_cookie_domains
              DisableHTMLTag:            --->  -disable_html_tag
              EnableHTMLTag:             --->  -enable_html_tag
              TrDeleteChar:              --->  -tr_del_chr
              TrStrToStr:                --->  -tr_str_str
              TrChrToChr:                --->  -tr_chr_chr
              IndexName:                 --->  -index_name
              StoreName:                 --->  -store_name
              PreservePermisions:        --->  -preserve_perm/-nopreserve_perm
              PreserveAbsoluteSymlinks:  --->  -preserve_slinks/-nopreserve_slinks
              FTPListCMD:                --->  -FTPlist/-noFTPlist
              MaxRate:                   --->  -maxrate
              MinRate:                   --->  -minrate
              ReadBufferSize:            --->  -bufsize
              BgMode:                    --->  -bg/-nobg
              CheckSize:                 --->  -check_size/-nocheck_size
              SLogFile:                  --->  -slogfile
              Identity:                  --->  -identity
              SendFromHeader:            --->  -send_from/-nosend_from
              RunX:                      --->  -runX
              FnameRules:                --->  -fnrules
              StoreDocInfoFiles:         --->  -store_info/-nostore_info
              AllLinksToLocal:           --->  -all_to_local/-noall_to_local
              AllLinksToRemote:          --->  -all_to_remote/-noall_to_remote
              SelectedLinksToLocal:      --->  -sel_to_local/-nosel_to_local
              ReminderCMD:               --->  -remind_cmd
              AutoReferer:               --->  -auto_referer/-noauto_referer
              URLsFile:                  --->  -urls_file
              UsePreferences:            --->  -prefs/-noprefs
              FTPhtml:                   --->  -FTPhtml/-noFTPhtml
              StoreDirIndexFile:         --->  -store_index/-nostore_index
              Language:                  --->  -language
              FileSizeQuota:             --->  -file_quota
              TransferQuota:             --->  -trans_quota
              FSQuota:                   --->  -fs_quota
              EnableJS:                  --->  -enable_js/-disable_js
              UrlSchedulingStrategy:     --->  -url_strategy
              NetscapeCacheDir:          --->  -nscache_dir
              RemoveAdvertisement:       --->  -remove_adv/-noremove_adv
              AdvBannerRE:               --->  -adv_re
              CheckIfRunnigAtBackground: --->  -check_bg/-nocheck_bg
              SendIfRange:               --->  -send_if_range/-nosend_if_range
              SchedulingCommand:         --->  -sched_cmd
              UniqueLogName:             --->  -unique_log/-nounique_log
              PostCommand:               --->  -post_cmd
              SSLVersion:                --->  -ssl_version
              UniqueSSLID:               --->  -unique_sslid/-nounique_sslid
              AddHTTPHeader:             --->  -httpad
              StatisticsFile:            --->  -statfile
              WaitOnExit:                --->  -ewait
              AllowedIPAdrressPattern:   --->  -aip_pattern
              DisallowedIPAdrressPattern:--->  -dip_pattern
              SiteLevel:                 --->  -site_level
              UseHTTP11:                 --->  -use_http11
              MaxRunTime:                --->  -max_time
              LocalIP:                   --->  -local_ip
              RequestInfo:               --->  -request
              HashSize:                  --->  -hash_size
              NumberOfThreads:           --->  -nthreads
              ImmediateMessages:         --->  -immesg/-noimmsg
              HTMLFormData:              --->  -formdata
              DumpFD:                    --->  -dumpfd
              DumpUrlFD:                 --->  -dump_urlfd
              DeleteAfterTransfer:       --->  -del_after/-nodel_after
              UniqueDocName:             --->  -unique_name/-nounique_name
              LeaveSiteEnterDirectory:   --->  -leave_site_enter_dir/-dont_leave_site_enter_dir
              SinglePage:                --->  -singlepage/-nosinglepage
              NTLMAuthorizationDomain:   --->  -auth_ntlm_domain
              NTLMProxyAuthorizationDomain:
                                         --->  -auth_proxy_ntlm_domain
              JavascriptPattern:         --->  -js_pattern
              FollowCommand:             --->  -follow_cmd
              RetrieveSymlinks:          --->  -retrieve_symlink/-noretrieve_symlink
              JSTransform:               --->  -js_transform
              JSTransform2:              --->  -js_transform2
              FTPProxyUser:              --->  -ftp_proxy_user
              FTPProxyPassword:          --->  -ftp_proxy_pass
              LimitInlineObjects:        --->  -limit_inlines/-dont_limit_inlines
              FTPListOptions:            --->  -ftp_list_options
              FixWuFTPDBrokenLISTcmd:    --->  -fix_wuftpd_list/-nofix_wuftpd_list
              PostUpdate:                --->  -post_update/-nopost_update
              SeparateInfoDir:           --->  -info_dir
              MozillaCacheDir:           --->  -mozcache_dir
              AllowedPorts:              --->  -aport
              DisallowedPorts:           --->  -dport
              HackAddIndex:              --->  -hack_add_index/-nohack_add_index
              JavaScriptFile:            --->  -js_script_file
              FtpLoginHandshake:         --->  -ftp_login_handshake
              NSSCertDir:                --->  -nss_cert_dir
              NSSAcceptUnknownCert:      --->  -nss_accept_unknown_cert/-nonss_accept_unknown_cert
              NSSDomesticPolicy:         --->  -nss_domestic_policy/-nss_export_policy
              DontTouchUrlREPattern:     --->  -dont_touch_url_rpattern
              DontTouchUrlPattern:       --->  -dont_touch_url_pattern
              DontTouchTagREPattern:     --->  -dont_touch_tag_rpattern
              HTMLTagPattern:            --->  -tag_pattern
              HTMLTagREPattern:          --->  -tag_rpattern
              URL:                       --->  one URL (more lines with URL:
                                               ... means more URL’s)

       line which begins with ’#’ means comment.
       TrStrToStr: and TrChrToChr:  must  contain  two  quoted  strings.   All
       parameter  names  are  case insensitive. If here is missing any option,
       try to look inside config.c source file.

       See pavukrc.sample file for example

       .pavuk_authinfo

              File should contain as  many  authentification  records  as  you
              need.   Records  are  separated  by  any  number of empty lines.
              Parameter name is case insensitive.

              Structure of record:

              Proto: <proto ID>    ---> identification of protocol
                                        (ftp/http/https/..)
                                   - required field
              Host: <host:[port]>  ---> host name
                                   - required field
              User: <user>         ---> name of user
                                   - optional
              Pass: <password>     ---> password for user
                                   - optional
              Base: <path>         ---> base prefix of document path
                                   - optional
              Realm: <name>        ---> realm for HTTP authorization
                                   - optional
              NTLMDomain: <domain> ---> NT/LM domain for NTLM authorization
                                   - optional
              Type: <type>         ---> HTTP authentification scheme
                                             - 1/user   - user auth scheme
                                             - 2/Basic  - Basic auth scheme (default)
                                             - 3/Digest - Digest auth scheme
                                             - 4/NTLM   - NTLM auth scheme
                                   - optional

       see pavuk_authinfo.sample file for example

       ~/.pavuk_keys
              this is file where are  stored  information  about  configurable
              menu  option  shortcuts.   This  is available only when compiled
              with Gtk+1.2 and higher.

       ~/.pavuk_remind_db
              this file  contains  informations  about  URLs  for  running  in
              reminder  mode.  Structure  of this file is very easy. Each line
              contains information abou one URL.  first entry in line is  last
              known modification time of URL (stored in time_t format - number
              of secons from 1.1.1970 GMT).  And second entry is URL.

EXAMPLE COMMAND LINE

       pavuk -mode mirror -nobg -store_info -info_dir
       /mirror/info -nthreads 1 -cdir /mirror/incoming -subdir
       /mirror/incoming -preserve_time -nopreserve_perm
       -nopreserve_slinks -noretrieve_symlink -force_reget
       -noRobots -trans_quota 16384 -maxsize 16777216
       -max_time 28 -nodel_after -remove_before_store -ftpdir
       -ftplist -ftp_list_options -a -dont_leave_site
       -dont_leave_dir -all_to_local -remove_old -nostore_index
       -active_ftp_port_range 57344:65535 -always_mdtm
       -ftp_passive -base_level 2 http://<my_host>/doc/

SEE ALSO

       Look into ChangeLog file for more informations about  new  features  in
       particular versions of pavuk.

AUTHOR

       Main development Ondrejicka Stefan
       Look into CREDITS file of sources for additional information.

AVAILABILITY

       pavuk is available from http://pavuk.sourceforge.net/