From f5e6f8e2d87a37e6f06e16bde7ad6275ff3afc07 Mon Sep 17 00:00:00 2001 From: Ike Hecht Date: Sun, 21 Oct 2018 13:04:38 -0600 Subject: [PATCH] Allow querying from private wikis * Code by WikiWorks --- DefaultSettings.php | 13 ++++ src/ByHttpRequest/QueryResultFetcher.php | 81 +++++++++++++++++++++++- src/ByHttpRequestQueryLookup.php | 12 +++- src/HookRegistry.php | 30 +++++++++ src/QueryEncoder.php | 2 +- 5 files changed, 133 insertions(+), 5 deletions(-) diff --git a/DefaultSettings.php b/DefaultSettings.php index 8952a40..db2f910 100644 --- a/DefaultSettings.php +++ b/DefaultSettings.php @@ -39,3 +39,16 @@ class_alias( 'SEQL\ByHttpRequestQueryLookup', 'SMWExternalAskQueryLookup' ); * into MediaWiki's interwiki table or through this setting. */ $GLOBALS['seqlgExternalRepositoryEndpoints'] = array(); + +/** + * An array that defines credentials to access remote wikis in case they're read-protected + * Array keys should be named after interwiki prefixes from "seqlgExternalRepositoryEndpoints" + * and contain an array with "username" and "password" keys + */ +$GLOBALS['seqlgExternalRepositoryCredentials'] = array(); + +/** + * An array defines list of namespaces allowed to execute queries against remote sources. + * Keep empty to allow every namespace. + */ +$GLOBALS['seqlgExternalQueryEnabledNamespaces'] = array(); diff --git a/src/ByHttpRequest/QueryResultFetcher.php b/src/ByHttpRequest/QueryResultFetcher.php index 5f7b362..64c6b3f 100644 --- a/src/ByHttpRequest/QueryResultFetcher.php +++ b/src/ByHttpRequest/QueryResultFetcher.php @@ -50,6 +50,16 @@ class QueryResultFetcher { */ private $httpResponseCacheLifetime; + /** + * @var array + */ + private $credentials; + + /** + * @var string + */ + private static $cookies; + /** * @since 1.0 * @@ -57,10 +67,11 @@ class QueryResultFetcher { * @param QueryResultFactory $queryResultFactory * @param JsonResponseParser $jsonResponseParser */ - public function __construct( HttpRequestFactory $httpRequestFactory, QueryResultFactory $queryResultFactory, JsonResponseParser $jsonResponseParser ) { + public function __construct( HttpRequestFactory $httpRequestFactory, QueryResultFactory $queryResultFactory, JsonResponseParser $jsonResponseParser, $credentials ) { $this->httpRequestFactory = $httpRequestFactory; $this->queryResultFactory = $queryResultFactory; $this->jsonResponseParser = $jsonResponseParser; + $this->credentials = $credentials; } /** @@ -99,6 +110,65 @@ public function setHttpResponseCacheLifetime( $httpResponseCacheLifetime ) { $this->httpResponseCacheLifetime = $httpResponseCacheLifetime; } + /** + * Authenticates query against remote wiki using 'login' api and stores + * cookies to use on other requests + * + * @param array $credentials + */ + public function doAuthenticateRemoteWiki( $credentials ) { + + $cookiefile = 'seql_'.time(); + + $httpRequest = $this->httpRequestFactory->newCurlRequest(); + + $httpRequest->setOption( CURLOPT_FOLLOWLOCATION, true ); + + $httpRequest->setOption( CURLOPT_RETURNTRANSFER, true ); + $httpRequest->setOption( CURLOPT_FAILONERROR, true ); + $httpRequest->setOption( CURLOPT_SSL_VERIFYPEER, false ); + $httpRequest->setOption( CURLOPT_COOKIESESSION, true ); + $httpRequest->setOption( CURLOPT_COOKIEJAR, $cookiefile ); + $httpRequest->setOption( CURLOPT_COOKIEFILE, $cookiefile ); + + $httpRequest->setOption( CURLOPT_URL, $this->httpRequestEndpoint . '?action=query&format=json&meta=tokens&type=login' ); + + $response = $httpRequest->execute(); + $result = json_decode( $response, true ); + + if( isset( $result['query']['tokens']['logintoken'] ) ) { + + $token = $result['query']['tokens']['logintoken']; + + $httpRequest->setOption( CURLOPT_FOLLOWLOCATION, true ); + $httpRequest->setOption( CURLOPT_RETURNTRANSFER, true ); + $httpRequest->setOption( CURLOPT_FAILONERROR, true ); + $httpRequest->setOption( CURLOPT_SSL_VERIFYPEER, false ); + $httpRequest->setOption( CURLOPT_POST, true ); + $httpRequest->setOption( CURLOPT_URL, $this->httpRequestEndpoint ); + $httpRequest->setOption( CURLOPT_COOKIEJAR, $cookiefile ); + $httpRequest->setOption( CURLOPT_COOKIEFILE, $cookiefile ); + + $httpRequest->setOption( CURLOPT_POSTFIELDS, http_build_query( array( + 'action' => 'login', + 'format' => 'json', + 'lgname' => $credentials['username'], + 'lgpassword' => $credentials['password'], + 'lgtoken' => $token + )) + ); + + $response = $httpRequest->execute(); + $result = json_decode( $response, true ); + + if ( isset( $result['login']['lguserid'] ) ) { + self::$cookies = $cookiefile; + } + + } + + } + /** * @since 1.0 * @@ -110,6 +180,10 @@ public function fetchQueryResult( Query $query ) { $this->doResetPrintRequestsToQuerySource( $query ); + if( $this->credentials && !self::$cookies ) { + $this->doAuthenticateRemoteWiki( $this->credentials ); + } + list( $result, $isFromCache ) = $this->doMakeHttpRequestFor( $query ); if ( $result === array() || $result === false || $result === null ) { @@ -185,6 +259,11 @@ private function doMakeHttpRequestFor( $query ) { 'Content-Type: application/json; charset=utf-8' ) ); + if( self::$cookies ) { + $httpRequest->setOption( CURLOPT_COOKIEJAR, self::$cookies ); + $httpRequest->setOption( CURLOPT_COOKIEFILE, self::$cookies ); + } + $response = $httpRequest->execute(); return array( json_decode( $response, true ), $httpRequest->isFromCache() ); diff --git a/src/ByHttpRequestQueryLookup.php b/src/ByHttpRequestQueryLookup.php index 01e84fc..5791ae1 100644 --- a/src/ByHttpRequestQueryLookup.php +++ b/src/ByHttpRequestQueryLookup.php @@ -52,19 +52,25 @@ public function getQueryResult( Query $query ) { return $this->queryResultFactory->newEmptyQueryResult( $query ); } - return $this->fetchQueryResultFor( $query, $interwiki ); + $credentials = false; + if ( isset( $GLOBALS['seqlgExternalRepositoryCredentials'][ $interwiki->getWikiID() ] ) ) { + $credentials = $GLOBALS['seqlgExternalRepositoryCredentials'][ $interwiki->getWikiID() ]; + } + + return $this->fetchQueryResultFor( $query, $interwiki, $credentials ); } protected function tryToMatchInterwikiFor( Query $query ) { return Interwiki::fetch( $query->getQuerySource() ); } - protected function fetchQueryResultFor( Query $query, $interwiki ) { + protected function fetchQueryResultFor( Query $query, $interwiki, $credentials = false ) { $queryResultFetcher = new QueryResultFetcher( new HttpRequestFactory( $this->getCacheFactory()->newMediaWikiCompositeCache( $GLOBALS['seqlgHttpResponseCacheType'] ) ), $this->queryResultFactory, - new JsonResponseParser( new DataValueDeserializer( $query->getQuerySource() ) ) + new JsonResponseParser( new DataValueDeserializer( $query->getQuerySource() ) ), + $credentials ); $queryResultFetcher->setHttpRequestEndpoint( $interwiki->getApi() ); diff --git a/src/HookRegistry.php b/src/HookRegistry.php index c5591f7..01f5a20 100644 --- a/src/HookRegistry.php +++ b/src/HookRegistry.php @@ -67,6 +67,36 @@ private function addCallbackHandlers( $options ) { $this->handlers['InterwikiLoadPrefix'] = function( $prefix, &$interwiki ) use( $dynamicInterwikiPrefixLoader ) { return $dynamicInterwikiPrefixLoader->tryToLoadIwMapForExternalRepository( $prefix, $interwiki ); }; + + /** + * Prevents ask parser function with "source" parameter defined from + * being executed outside of allowed namespaces. This supports transclusion too. + * + * @param \Parser $parser + * @param \PPFrame $frame + * @param $args + * @param $override + */ + $this->handlers['smwAskParserFunction'] = $this->handlers['smwShowParserFunction'] = function( $parser, $frame, $args, &$override ) { + if( $frame ) { + $params = []; + foreach ($args as $key => $value) { + if ( $key == 0 || ( $value !== '' && $value{0} === '?' ) ) { + continue; + } + if ( strpos( $value, '=' ) === false ) { + continue; + } + $pair = explode('=', $value); + $params[$pair[0]] = $pair[1]; + } + + if( array_key_exists('source', $params) && !in_array( $frame->getTitle()->getNamespace(), $GLOBALS['seqlgExternalQueryEnabledNamespaces'] ) ) { + $override = 'Warning: source parameter is not allowed in the namespace!'; + } + } + }; + } } diff --git a/src/QueryEncoder.php b/src/QueryEncoder.php index 084b0ec..8437ac1 100644 --- a/src/QueryEncoder.php +++ b/src/QueryEncoder.php @@ -60,7 +60,7 @@ private static function doSerializePrintouts( $query ) { foreach ( $query->getExtraPrintouts() as $printout ) { $serialization = $printout->getSerialisation(); - if ( $serialization !== '?#' ) { + if ( $serialization !== '?#' && $serialization !== '' ) { // #show adds an extra = at the end which is interpret as // requesting an empty result hence it is removed $printouts[] = substr( $serialization, -1 ) === '=' ? substr( $serialization, 0, -1 ) : $serialization;