'textfield',
'#title' => t('Excluded file extensions'),
'#default_value' => variable_get('apachesolr_attachments_excluded_extensions', $default),
'#size' => 80,
'#maxlength' => 255,
'#description' => t('File extensions that are excluded from indexing. Separate extensions with a space and do not include the leading dot. Extensions are internally mapped to a MIME type, so it is not necessary to put variations that map to the same type (e.g. tif is sufficient for tif and tiff)'),
);
$form['apachesolr_attachments_extract_using'] = array(
'#type' => 'radios',
'#title' => t('Extract using'),
'#options' => array(
'tika' => t('Tika (local java application)'),
'solr' => t('Solr (remote server)'),
),
'#description' => t("Extraction will be faster if run locally using tika."),
'#default_value' => variable_get('apachesolr_attachments_extract_using', 'tika'),
);
$form['apachesolr_attachments_tika_path'] = array(
'#type' => 'textfield',
'#title' => t('Tika directory path'),
'#size' => 80,
'#maxlength' => 100,
'#description' => t("The full path to the tika directory. All library jars must be in the same directory. If on Windows, use forward slashes in the path."),
'#default_value' => variable_get('apachesolr_attachments_tika_path', ''),
);
$form['apachesolr_attachments_tika_jar'] = array(
'#type' => 'textfield',
'#title' => t('Tika jar file'),
'#size' => 20,
'#description' => t("The name of the tika CLI application jar file, e.g. tika-app-1.1.jar."),
'#default_value' => variable_get('apachesolr_attachments_tika_jar', 'tika-app-1.1.jar'),
);
$form = system_settings_form($form);
$form['#validate'][] = 'apachesolr_attachments_settings_validate';
$form['#submit'][] = 'apachesolr_attachments_settings_submit';
return $form;
}
/**
* Form validation for the Apache Solr Attachments settings form.
*
* @see apachesolr_attachments_settings()
*/
function apachesolr_attachments_settings_validate($form, &$form_state) {
if ($form_state['values']['apachesolr_attachments_extract_using'] == 'tika') {
$path = realpath($form_state['values']['apachesolr_attachments_tika_path']);
if (!file_exists($path . '/' . $form_state['values']['apachesolr_attachments_tika_jar'])) {
form_set_error('apachesolr_attachments_tika_path', t('Tika jar file not found at this path.'));
}
}
}
/**
* Form submit handler for the settings Form
*
* @see apachesolr_attachments_settings()
*/
function apachesolr_attachments_settings_submit($form, &$form_state) {
// Delete this so it's rebuilt.
variable_del('apachesolr_attachments_excluded_mime');
drupal_set_message(t('If you changed the allowed file extensions, you may need to delete and re-index all attachments.'));
}
/**
* Form builder for the Apachesolr Attachments actions form.
*
*/
function apachesolr_attachments_index_action_form($form, &$form_state, $env_id) {
$form = array();
$form['action'] = array(
'#type' => 'fieldset',
'#title' => t('Actions'),
'#collapsible' => TRUE,
);
$form['action']['env_id'] = array(
'#type' => 'value',
'#value' => $env_id,
);
$form['action']['reset'] = array(
'#prefix' => '
',
'#suffix' => '
',
'#type' => 'submit',
'#value' => t('Clear the attachment text extraction cache'),
'#submit' => array('apachesolr_attachments_index_action_form_reset_submit'),
);
$form['action']['delete'] = array(
'#prefix' => '',
'#suffix' => '
',
'#type' => 'submit',
'#value' => t('Delete the attachments from the index'),
'#submit' => array('apachesolr_attachments_index_action_form_delete_submit'),
);
$form['action']['extract'] = array(
'#prefix' => '',
'#suffix' => '
',
'#type' => 'submit',
'#value' => t('Test your tika extraction'),
'#submit' => array('apachesolr_attachments_index_action_form_extraction_submit'),
);
return $form;
}
/**
* Submit handler for the Indexer actions form, test button.
*/
function apachesolr_attachments_index_action_form_extraction_submit($form, &$form_state) {
$destination = array();
if (isset($_GET['destination'])) {
$destination = drupal_get_destination();
unset($_GET['destination']);
}
$env_id = $form_state['values']['env_id'];
$form_state['redirect'] = array('admin/config/search/apachesolr/attachments/test', array('query' => $destination));
}
/**
* Submit handler for the Indexer actions form, reset button.
*/
function apachesolr_attachments_index_action_form_reset_submit($form, &$form_state) {
$destination = array();
if (isset($_GET['destination'])) {
$destination = drupal_get_destination();
unset($_GET['destination']);
}
$env_id = $form_state['values']['env_id'];
$form_state['redirect'] = array('admin/config/search/apachesolr/attachments/confirm/clear-cache', array('query' => $destination));
}
/**
* Submit handler for the Indexer actions form, delete button.
*/
function apachesolr_attachments_index_action_form_delete_submit($form, &$form_state) {
$destination = array();
if (isset($_GET['destination'])) {
$destination = drupal_get_destination();
unset($_GET['destination']);
}
$env_id = $form_state['values']['env_id'];
$form_state['redirect'] = array('admin/config/search/apachesolr/attachments/confirm/delete', array('query' => $destination));
}
/**
* Index confirmation form
*
* @see apachesolr_attachments_confirm_submit()
*/
function apachesolr_attachments_confirm($form, $form_state, $operation) {
$form = array();
$form['operation'] = array(
'#type' => 'value',
'#value' => $operation,
);
switch ($operation) {
case 'delete':
$text = t('Are you sure you want to delete and re-index the text of all file attachments?');
break;
case 'clear-cache':
$text = t('Are you sure you want to delete the cache of extracted text from file attachments?');
break;
}
return confirm_form($form, $text, 'admin/config/search/apachesolr/attachments', NULL, t('Confirm'), t('Cancel'));
}
/**
* Form submit handler for the index confirmation form
*
* @see apachesolr_attachments_confirm()
*/
function apachesolr_attachments_confirm_submit($form, &$form_state) {
switch ($form_state['values']['operation']) {
case 'delete':
if (apachesolr_attachments_delete_index() && apachesolr_attachments_solr_reindex()) {
drupal_set_message(t('File text has been deleted from the Apache Solr index. You must now run cron until all files have been re-indexed.', array('@url' => url('admin/reports/status/run-cron', array('query' => array('destination' => 'admin/config/search/apachesolr/index'))))));
}
else {
if (module_exists('dblog')) {
drupal_set_message(t('Could not delete file text from the Apache Solr index. Check recent log messages.', array('@url' => url('admin/reports/dblog'))));
}
else {
drupal_set_message(t('Could not delete file text from the Apache Solr index.'));
}
}
break;
case 'clear-cache':
apachesolr_attachments_solr_reindex();
drupal_set_message(t('The local cache of extracted text has been deleted.'));
break;
}
$form_state['redirect'] = 'admin/config/search/apachesolr/attachments';
}
/**
* Function to test if our extracting with tika succeeds
*/
function apachesolr_attachments_test_tika_extraction() {
module_load_include('inc', 'apachesolr_attachments', 'apachesolr_attachments.index');
$indexer_table = apachesolr_get_indexer_table('file');
// Create new file
$file = new stdClass();
$file->uri = drupal_get_path('module', 'apachesolr_attachments') . '/tests/test-tika.pdf';
$file->filemime = 'application/pdf';
$file->fid = 0;
$text = apachesolr_attachments_get_attachment_text($file);
// Check if the text can be succesfully extracted. Only checking 1 word is
// sufficient
if (strpos($text, 'extraction')) {
drupal_set_message(t('Text can be succesfully extracted'));
}
else {
drupal_set_message(t('Text can not be succesfully extracted. Please check your settings'), 'error');
}
// Delete our test file from indexing table
db_delete($indexer_table)->condition('entity_id', $file->fid)->execute();
drupal_goto('admin/config/search/apachesolr/attachments');
}
/**
* @see apachesolr_delete_index()
*/
function apachesolr_attachments_delete_index() {
try {
$solr = apachesolr_get_solr();
$solr->deleteByQuery("entity_type:file AND hash:" . apachesolr_site_hash());
$solr->commit();
module_load_include('inc', 'apachesolr', 'apachesolr.index');
apachesolr_attachments_solr_reindex();
return TRUE;
}
catch (Exception $e) {
watchdog('Apache Solr Attachments', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR);
}
return FALSE;
}