Title Thomas D. Craven Papers
Archive GEN
Plugin edu.bc.DigiToolPlugin by Boston College
Base URL http://dcollections.bc.edu
Description
Repository Type DigiTool

Related Items

This collection has no metadata.
<map>
  <entry>
    <string>plugin_identifier</string>
    <string>edu.bc.DigiToolPlugin</string>
  </entry>
  <entry>
    <string>au_def_pause_time</string>
    <long>6000</long>
  </entry>
  <entry>
    <string>plugin_version</string>
    <string>3</string>
  </entry>
  <entry>
    <string>au_crawl_depth</string>
    <int>3</int>
  </entry>
  <entry>
    <string>au_name</string>
    <string>&quot;BC Digital Collection %s&quot;, volume_name</string>
  </entry>
  <entry>
    <string>plugin_name</string>
    <string>Boston College DigiTool Plugin</string>
  </entry>
  <entry>
    <string>au_def_new_content_crawl</string>
    <long>7257600000</long>
  </entry>
  <entry>
    <string>plugin_notes</string>
    <string>This plugin starts to crawl at Base_URL/lockss/manifest.html 
If fetches all content it encounters whose urls start with Base_URL. 
It is useful for small sites that are to be harvested completely as they are delivered from web servers. Sites that are database/software driven may want to include database dumps and software archives and link to them from the manifest page.</string>
  </entry>
  <entry>
    <string>au_crawlrules</string>
    <list>
      <string>4,&quot;^%s&quot;, base_url</string>
      <string>1,&quot;%s/manifest.html$&quot;, base_url</string>
      <string>1,&quot;^%s/lockss/manifest\.html$&quot;, base_url</string>
      <string>1,&quot;^%s/lockss/%s.*$&quot;, base_url, volume_name</string>
      <string>1,&quot;^%s/webclient/DeliveryManager\?metadata_request=true&amp;GET_XML=1&amp;pid=[0-9]+$&quot;, base_url</string>
      <string>1,&quot;/\?[^/]*$&quot;</string>
    </list>
  </entry>
  <entry>
    <string>au_start_url</string>
    <string>&quot;%s/lockss/manifest.html&quot;, base_url</string>
  </entry>
  <entry>
    <string>plugin_cache_result_list</string>
    <list>
      <string>404=org.lockss.util.urlconn.CacheException$NoRetryDeadLinkException</string>
    </list>
  </entry>
  <entry>
    <string>plugin_config_props</string>
    <list>
      <org.lockss.daemon.ConfigParamDescr>
        <key>base_url</key>
        <displayName>Base URL</displayName>
        <description>Usually of the form http://&lt;journal-name&gt;.com/</description>
        <type>3</type>
        <size>40</size>
        <definitional>true</definitional>
        <defaultOnly>false</defaultOnly>
      </org.lockss.daemon.ConfigParamDescr>
      <org.lockss.daemon.ConfigParamDescr>
        <key>volume_name</key>
        <displayName>Volume Name</displayName>
        <type>1</type>
        <size>20</size>
        <definitional>true</definitional>
        <defaultOnly>false</defaultOnly>
      </org.lockss.daemon.ConfigParamDescr>
    </list>
  </entry>
</map>
Parameter Values Status Copies
(Loading)