PageRenderTime 46ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/core/Segment.php

https://github.com/CodeYellowBV/piwik
PHP | 454 lines | 298 code | 36 blank | 120 comment | 51 complexity | be9c0d07eedf81d061a0cd84efa843a0 MD5 | raw file
Possible License(s): LGPL-3.0, JSON, MIT, GPL-3.0, LGPL-2.1, GPL-2.0, AGPL-1.0, BSD-2-Clause, BSD-3-Clause
  1. <?php
  2. /**
  3. * Piwik - free/libre analytics platform
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. *
  8. */
  9. namespace Piwik;
  10. use Exception;
  11. use Piwik\Plugins\API\API;
  12. /**
  13. * Limits the set of visits Piwik uses when aggregating analytics data.
  14. *
  15. * A segment is a condition used to filter visits. They can, for example,
  16. * select visits that have a specific browser or come from a specific
  17. * country, or both.
  18. *
  19. * Individual segment dimensions (such as `browserCode` and `countryCode`)
  20. * are defined by plugins. Read about the {@hook API.getSegmentDimensionMetadata}
  21. * event to learn more.
  22. *
  23. * Plugins that aggregate data stored in Piwik can support segments by
  24. * using this class when generating aggregation SQL queries.
  25. *
  26. * ### Examples
  27. *
  28. * **Basic usage**
  29. *
  30. * $idSites = array(1,2,3);
  31. * $segmentStr = "browserCode==ff;countryCode==CA";
  32. * $segment = new Segment($segmentStr, $idSites);
  33. *
  34. * $query = $segment->getSelectQuery(
  35. * $select = "table.col1, table2.col2",
  36. * $from = array("table", "table2"),
  37. * $where = "table.col3 = ?",
  38. * $bind = array(5),
  39. * $orderBy = "table.col1 DESC",
  40. * $groupBy = "table2.col2"
  41. * );
  42. *
  43. * Db::fetchAll($query['sql'], $query['bind']);
  44. *
  45. * **Creating a _null_ segment**
  46. *
  47. * $idSites = array(1,2,3);
  48. * $segment = new Segment('', $idSites);
  49. * // $segment->getSelectQuery will return a query that selects all visits
  50. *
  51. * @api
  52. */
  53. class Segment
  54. {
  55. /**
  56. * @var SegmentExpression
  57. */
  58. protected $segment = null;
  59. /**
  60. * Truncate the Segments to 8k
  61. */
  62. const SEGMENT_TRUNCATE_LIMIT = 8192;
  63. /**
  64. * Constructor.
  65. *
  66. * @param string $segmentCondition The segment condition, eg, `'browserCode=ff;countryCode=CA'`.
  67. * @param array $idSites The list of sites the segment will be used with. Some segments are
  68. * dependent on the site, such as goal segments.
  69. */
  70. public function __construct($segmentCondition, $idSites)
  71. {
  72. $segmentCondition = trim($segmentCondition);
  73. if (!SettingsPiwik::isSegmentationEnabled()
  74. && !empty($segmentCondition)
  75. ) {
  76. throw new Exception("The Super User has disabled the Segmentation feature.");
  77. }
  78. // First try with url decoded value. If that fails, try with raw value.
  79. // If that also fails, it will throw the exception
  80. try {
  81. $this->initializeSegment(urldecode($segmentCondition), $idSites);
  82. } catch (Exception $e) {
  83. $this->initializeSegment($segmentCondition, $idSites);
  84. }
  85. }
  86. /**
  87. * @param $string
  88. * @param $idSites
  89. * @throws Exception
  90. */
  91. protected function initializeSegment($string, $idSites)
  92. {
  93. // As a preventive measure, we restrict the filter size to a safe limit
  94. $string = substr($string, 0, self::SEGMENT_TRUNCATE_LIMIT);
  95. $this->string = $string;
  96. $this->idSites = $idSites;
  97. $segment = new SegmentExpression($string);
  98. $this->segment = $segment;
  99. // parse segments
  100. $expressions = $segment->parseSubExpressions();
  101. // convert segments name to sql segment
  102. // check that user is allowed to view this segment
  103. // and apply a filter to the value to match if necessary (to map DB fields format)
  104. $cleanedExpressions = array();
  105. foreach ($expressions as $expression) {
  106. $operand = $expression[SegmentExpression::INDEX_OPERAND];
  107. $cleanedExpression = $this->getCleanedExpression($operand);
  108. $expression[SegmentExpression::INDEX_OPERAND] = $cleanedExpression;
  109. $cleanedExpressions[] = $expression;
  110. }
  111. $segment->setSubExpressionsAfterCleanup($cleanedExpressions);
  112. }
  113. /**
  114. * Returns `true` if the segment is empty, `false` if otherwise.
  115. */
  116. public function isEmpty()
  117. {
  118. return empty($this->string);
  119. }
  120. protected $availableSegments = array();
  121. protected function getCleanedExpression($expression)
  122. {
  123. if (empty($this->availableSegments)) {
  124. $this->availableSegments = API::getInstance()->getSegmentsMetadata($this->idSites, $_hideImplementationData = false);
  125. }
  126. $name = $expression[0];
  127. $matchType = $expression[1];
  128. $value = $expression[2];
  129. $sqlName = '';
  130. foreach ($this->availableSegments as $segment) {
  131. if ($segment['segment'] != $name) {
  132. continue;
  133. }
  134. $sqlName = $segment['sqlSegment'];
  135. // check permission
  136. if (isset($segment['permission'])
  137. && $segment['permission'] != 1
  138. ) {
  139. throw new Exception("You do not have enough permission to access the segment " . $name);
  140. }
  141. if($matchType != SegmentExpression::MATCH_IS_NOT_NULL_NOR_EMPTY
  142. && $matchType != SegmentExpression::MATCH_IS_NULL_OR_EMPTY) {
  143. if(isset($segment['sqlFilterValue'])) {
  144. $value = call_user_func($segment['sqlFilterValue'], $value);
  145. }
  146. // apply presentation filter
  147. if (isset($segment['sqlFilter'])) {
  148. $value = call_user_func($segment['sqlFilter'], $value, $segment['sqlSegment'], $matchType, $name);
  149. // sqlFilter-callbacks might return arrays for more complex cases
  150. // e.g. see TableLogAction::getIdActionFromSegment()
  151. if (is_array($value) && isset($value['SQL'])) {
  152. // Special case: returned value is a sub sql expression!
  153. $matchType = SegmentExpression::MATCH_ACTIONS_CONTAINS;
  154. }
  155. }
  156. }
  157. break;
  158. }
  159. if (empty($sqlName)) {
  160. throw new Exception("Segment '$name' is not a supported segment.");
  161. }
  162. return array($sqlName, $matchType, $value);
  163. }
  164. /**
  165. * Returns the segment condition.
  166. *
  167. * @return string
  168. */
  169. public function getString()
  170. {
  171. return $this->string;
  172. }
  173. /**
  174. * Returns a hash of the segment condition, or the empty string if the segment
  175. * condition is empty.
  176. *
  177. * @return string
  178. */
  179. public function getHash()
  180. {
  181. if (empty($this->string)) {
  182. return '';
  183. }
  184. // normalize the string as browsers may send slightly different payloads for the same archive
  185. $normalizedSegmentString = urldecode($this->string);
  186. return md5($normalizedSegmentString);
  187. }
  188. /**
  189. * Extend an SQL query that aggregates data over one of the 'log_' tables with segment expressions.
  190. *
  191. * @param string $select The select clause. Should NOT include the **SELECT** just the columns, eg,
  192. * `'t1.col1 as col1, t2.col2 as col2'`.
  193. * @param array $from Array of table names (without prefix), eg, `array('log_visit', 'log_conversion')`.
  194. * @param false|string $where (optional) Where clause, eg, `'t1.col1 = ? AND t2.col2 = ?'`.
  195. * @param array|string $bind (optional) Bind parameters, eg, `array($col1Value, $col2Value)`.
  196. * @param false|string $orderBy (optional) Order by clause, eg, `"t1.col1 ASC"`.
  197. * @param false|string $groupBy (optional) Group by clause, eg, `"t2.col2"`.
  198. * @return string The entire select query.
  199. */
  200. public function getSelectQuery($select, $from, $where = false, $bind = array(), $orderBy = false, $groupBy = false)
  201. {
  202. if (!is_array($from)) {
  203. $from = array($from);
  204. }
  205. if (!$this->isEmpty()) {
  206. $this->segment->parseSubExpressionsIntoSqlExpressions($from);
  207. $joins = $this->generateJoins($from);
  208. $from = $joins['sql'];
  209. $joinWithSubSelect = $joins['joinWithSubSelect'];
  210. $segmentSql = $this->segment->getSql();
  211. $segmentWhere = $segmentSql['where'];
  212. if (!empty($segmentWhere)) {
  213. if (!empty($where)) {
  214. $where = "( $where )
  215. AND
  216. ($segmentWhere)";
  217. } else {
  218. $where = $segmentWhere;
  219. }
  220. }
  221. $bind = array_merge($bind, $segmentSql['bind']);
  222. } else {
  223. $joins = $this->generateJoins($from);
  224. $from = $joins['sql'];
  225. $joinWithSubSelect = $joins['joinWithSubSelect'];
  226. }
  227. if ($joinWithSubSelect) {
  228. $sql = $this->buildWrappedSelectQuery($select, $from, $where, $orderBy, $groupBy);
  229. } else {
  230. $sql = $this->buildSelectQuery($select, $from, $where, $orderBy, $groupBy);
  231. }
  232. return array(
  233. 'sql' => $sql,
  234. 'bind' => $bind
  235. );
  236. }
  237. /**
  238. * Generate the join sql based on the needed tables
  239. * @param array $tables tables to join
  240. * @throws Exception if tables can't be joined
  241. * @return array
  242. */
  243. private function generateJoins($tables)
  244. {
  245. $knownTables = array("log_visit", "log_link_visit_action", "log_conversion", "log_conversion_item");
  246. $visitsAvailable = $actionsAvailable = $conversionsAvailable = $conversionItemAvailable = false;
  247. $joinWithSubSelect = false;
  248. $sql = '';
  249. // make sure the tables are joined in the right order
  250. // base table first, then action before conversion
  251. // this way, conversions can be joined on idlink_va
  252. $actionIndex = array_search("log_link_visit_action", $tables);
  253. $conversionIndex = array_search("log_conversion", $tables);
  254. if ($actionIndex > 0 && $conversionIndex > 0 && $actionIndex > $conversionIndex) {
  255. $tables[$actionIndex] = "log_conversion";
  256. $tables[$conversionIndex] = "log_link_visit_action";
  257. }
  258. // same as above: action before visit
  259. $actionIndex = array_search("log_link_visit_action", $tables);
  260. $visitIndex = array_search("log_visit", $tables);
  261. if ($actionIndex > 0 && $visitIndex > 0 && $actionIndex > $visitIndex) {
  262. $tables[$actionIndex] = "log_visit";
  263. $tables[$visitIndex] = "log_link_visit_action";
  264. }
  265. foreach ($tables as $i => $table) {
  266. if (is_array($table)) {
  267. // join condition provided
  268. $alias = isset($table['tableAlias']) ? $table['tableAlias'] : $table['table'];
  269. $sql .= "
  270. LEFT JOIN " . Common::prefixTable($table['table']) . " AS " . $alias
  271. . " ON " . $table['joinOn'];
  272. continue;
  273. }
  274. if (!in_array($table, $knownTables)) {
  275. throw new Exception("Table '$table' can't be used for segmentation");
  276. }
  277. $tableSql = Common::prefixTable($table) . " AS $table";
  278. if ($i == 0) {
  279. // first table
  280. $sql .= $tableSql;
  281. } else {
  282. if ($actionsAvailable && $table == "log_conversion") {
  283. // have actions, need conversions => join on idlink_va
  284. $join = "log_conversion.idlink_va = log_link_visit_action.idlink_va "
  285. . "AND log_conversion.idsite = log_link_visit_action.idsite";
  286. } else if ($actionsAvailable && $table == "log_visit") {
  287. // have actions, need visits => join on idvisit
  288. $join = "log_visit.idvisit = log_link_visit_action.idvisit";
  289. } else if ($visitsAvailable && $table == "log_link_visit_action") {
  290. // have visits, need actions => we have to use a more complex join
  291. // we don't hande this here, we just return joinWithSubSelect=true in this case
  292. $joinWithSubSelect = true;
  293. $join = "log_link_visit_action.idvisit = log_visit.idvisit";
  294. } else if ($conversionsAvailable && $table == "log_link_visit_action") {
  295. // have conversions, need actions => join on idlink_va
  296. $join = "log_conversion.idlink_va = log_link_visit_action.idlink_va";
  297. } else if (($visitsAvailable && $table == "log_conversion")
  298. || ($conversionsAvailable && $table == "log_visit")
  299. ) {
  300. // have visits, need conversion (or vice versa) => join on idvisit
  301. // notice that joining conversions on visits has lower priority than joining it on actions
  302. $join = "log_conversion.idvisit = log_visit.idvisit";
  303. // if conversions are joined on visits, we need a complex join
  304. if ($table == "log_conversion") {
  305. $joinWithSubSelect = true;
  306. }
  307. } elseif ($conversionItemAvailable && $table === 'log_visit') {
  308. $join = "log_conversion_item.idvisit = log_visit.idvisit";
  309. } elseif ($conversionItemAvailable && $table === 'log_link_visit_action') {
  310. $join = "log_conversion_item.idvisit = log_link_visit_action.idvisit";
  311. } elseif ($conversionItemAvailable && $table === 'log_conversion') {
  312. $join = "log_conversion_item.idvisit = log_conversion.idvisit";
  313. } else {
  314. throw new Exception("Table '$table' can't be joined for segmentation");
  315. }
  316. // the join sql the default way
  317. $sql .= "
  318. LEFT JOIN $tableSql ON $join";
  319. }
  320. // remember which tables are available
  321. $visitsAvailable = ($visitsAvailable || $table == "log_visit");
  322. $actionsAvailable = ($actionsAvailable || $table == "log_link_visit_action");
  323. $conversionsAvailable = ($conversionsAvailable || $table == "log_conversion");
  324. $conversionItemAvailable = ($conversionItemAvailable || $table == "log_conversion_item");
  325. }
  326. $return = array(
  327. 'sql' => $sql,
  328. 'joinWithSubSelect' => $joinWithSubSelect
  329. );
  330. return $return;
  331. }
  332. /**
  333. * Build select query the normal way
  334. * @param string $select fieldlist to be selected
  335. * @param string $from tablelist to select from
  336. * @param string $where where clause
  337. * @param string $orderBy order by clause
  338. * @param string $groupBy group by clause
  339. * @return string
  340. */
  341. private function buildSelectQuery($select, $from, $where, $orderBy, $groupBy)
  342. {
  343. $sql = "
  344. SELECT
  345. $select
  346. FROM
  347. $from";
  348. if ($where) {
  349. $sql .= "
  350. WHERE
  351. $where";
  352. }
  353. if ($groupBy) {
  354. $sql .= "
  355. GROUP BY
  356. $groupBy";
  357. }
  358. if ($orderBy) {
  359. $sql .= "
  360. ORDER BY
  361. $orderBy";
  362. }
  363. return $sql;
  364. }
  365. /**
  366. * Build a select query where actions have to be joined on visits (or conversions)
  367. * In this case, the query gets wrapped in another query so that grouping by visit is possible
  368. * @param string $select
  369. * @param string $from
  370. * @param string $where
  371. * @param string $orderBy
  372. * @param string $groupBy
  373. * @throws Exception
  374. * @return string
  375. */
  376. private function buildWrappedSelectQuery($select, $from, $where, $orderBy, $groupBy)
  377. {
  378. $matchTables = "(log_visit|log_conversion_item|log_conversion|log_action)";
  379. preg_match_all("/". $matchTables ."\.[a-z0-9_\*]+/", $select, $matches);
  380. $neededFields = array_unique($matches[0]);
  381. if (count($neededFields) == 0) {
  382. throw new Exception("No needed fields found in select expression. "
  383. . "Please use a table prefix.");
  384. }
  385. $select = preg_replace('/'.$matchTables.'\./', 'log_inner.', $select);
  386. $orderBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $orderBy);
  387. $groupBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $groupBy);
  388. $from = "(
  389. SELECT
  390. " . implode(",
  391. ", $neededFields) . "
  392. FROM
  393. $from
  394. WHERE
  395. $where
  396. GROUP BY log_visit.idvisit
  397. ) AS log_inner";
  398. $where = false;
  399. $query = $this->buildSelectQuery($select, $from, $where, $orderBy, $groupBy);
  400. return $query;
  401. }
  402. }