PageRenderTime 56ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/core/DataAccess/LogAggregator.php

https://github.com/CodeYellowBV/piwik
PHP | 877 lines | 389 code | 67 blank | 421 comment | 42 complexity | f59ca70a23df381024c21148455a1457 MD5 | raw file
Possible License(s): LGPL-3.0, JSON, MIT, GPL-3.0, LGPL-2.1, GPL-2.0, AGPL-1.0, BSD-2-Clause, BSD-3-Clause
  1. <?php
  2. /**
  3. * Piwik - free/libre analytics platform
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. *
  8. */
  9. namespace Piwik\DataAccess;
  10. use Piwik\ArchiveProcessor\Parameters;
  11. use Piwik\DataArray;
  12. use Piwik\Db;
  13. use Piwik\Metrics;
  14. use Piwik\Tracker\GoalManager;
  15. /**
  16. * Contains methods that calculate metrics by aggregating log data (visits, actions, conversions,
  17. * ecommerce items).
  18. *
  19. * You can use the methods in this class within {@link Piwik\Plugin\Archiver Archiver} descendants
  20. * to aggregate log data without having to write SQL queries.
  21. *
  22. * ### Aggregation Dimension
  23. *
  24. * All aggregation methods accept a **dimension** parameter. These parameters are important as
  25. * they control how rows in a table are aggregated together.
  26. *
  27. * A **_dimension_** is just a table column. Rows that have the same values for these columns are
  28. * aggregated together. The result of these aggregations is a set of metrics for every recorded value
  29. * of a **dimension**.
  30. *
  31. * _Note: A dimension is essentially the same as a **GROUP BY** field._
  32. *
  33. * ### Examples
  34. *
  35. * **Aggregating visit data**
  36. *
  37. * $archiveProcessor = // ...
  38. * $logAggregator = $archiveProcessor->getLogAggregator();
  39. *
  40. * // get metrics for every used browser language of all visits by returning visitors
  41. * $query = $logAggregator->queryVisitsByDimension(
  42. * $dimensions = array('log_visit.location_browser_lang'),
  43. * $where = 'log_visit.visitor_returning = 1',
  44. *
  45. * // also count visits for each browser language that are not located in the US
  46. * $additionalSelects = array('sum(case when log_visit.location_country <> 'us' then 1 else 0 end) as nonus'),
  47. *
  48. * // we're only interested in visits, unique visitors & actions, so don't waste time calculating anything else
  49. * $metrics = array(Metrics::INDEX_NB_UNIQ_VISITORS, Metrics::INDEX_NB_VISITS, Metrics::INDEX_NB_ACTIONS),
  50. * );
  51. * if ($query === false) {
  52. * return;
  53. * }
  54. *
  55. * while ($row = $query->fetch()) {
  56. * $uniqueVisitors = $row[Metrics::INDEX_NB_UNIQ_VISITORS];
  57. * $visits = $row[Metrics::INDEX_NB_VISITS];
  58. * $actions = $row[Metrics::INDEX_NB_ACTIONS];
  59. *
  60. * // ... do something w/ calculated metrics ...
  61. * }
  62. *
  63. * **Aggregating conversion data**
  64. *
  65. * $archiveProcessor = // ...
  66. * $logAggregator = $archiveProcessor->getLogAggregator();
  67. *
  68. * // get metrics for ecommerce conversions for each country
  69. * $query = $logAggregator->queryConversionsByDimension(
  70. * $dimensions = array('log_conversion.location_country'),
  71. * $where = 'log_conversion.idgoal = 0', // 0 is the special ecommerceOrder idGoal value in the table
  72. *
  73. * // also calculate average tax and max shipping per country
  74. * $additionalSelects = array(
  75. * 'AVG(log_conversion.revenue_tax) as avg_tax',
  76. * 'MAX(log_conversion.revenue_shipping) as max_shipping'
  77. * )
  78. * );
  79. * if ($query === false) {
  80. * return;
  81. * }
  82. *
  83. * while ($row = $query->fetch()) {
  84. * $country = $row['location_country'];
  85. * $numEcommerceSales = $row[Metrics::INDEX_GOAL_NB_CONVERSIONS];
  86. * $numVisitsWithEcommerceSales = $row[Metrics::INDEX_GOAL_NB_VISITS_CONVERTED];
  87. * $avgTaxForCountry = $country['avg_tax'];
  88. * $maxShippingForCountry = $country['max_shipping'];
  89. *
  90. * // ... do something with aggregated data ...
  91. * }
  92. */
  93. class LogAggregator
  94. {
  95. const LOG_VISIT_TABLE = 'log_visit';
  96. const LOG_ACTIONS_TABLE = 'log_link_visit_action';
  97. const LOG_CONVERSION_TABLE = "log_conversion";
  98. const REVENUE_SUBTOTAL_FIELD = 'revenue_subtotal';
  99. const REVENUE_TAX_FIELD = 'revenue_tax';
  100. const REVENUE_SHIPPING_FIELD = 'revenue_shipping';
  101. const REVENUE_DISCOUNT_FIELD = 'revenue_discount';
  102. const TOTAL_REVENUE_FIELD = 'revenue';
  103. const ITEMS_COUNT_FIELD = "items";
  104. const CONVERSION_DATETIME_FIELD = "server_time";
  105. const ACTION_DATETIME_FIELD = "server_time";
  106. const VISIT_DATETIME_FIELD = 'visit_last_action_time';
  107. const IDGOAL_FIELD = 'idgoal';
  108. const FIELDS_SEPARATOR = ", \n\t\t\t";
  109. /** @var \Piwik\Date */
  110. protected $dateStart;
  111. /** @var \Piwik\Date */
  112. protected $dateEnd;
  113. /** @var \Piwik\Site */
  114. protected $site;
  115. /** @var \Piwik\Segment */
  116. protected $segment;
  117. /**
  118. * Constructor.
  119. *
  120. * @param \Piwik\ArchiveProcessor\Parameters $params
  121. */
  122. public function __construct(Parameters $params)
  123. {
  124. $this->dateStart = $params->getDateStart();
  125. $this->dateEnd = $params->getDateEnd();
  126. $this->segment = $params->getSegment();
  127. $this->site = $params->getSite();
  128. }
  129. public function generateQuery($select, $from, $where, $groupBy, $orderBy)
  130. {
  131. $bind = $this->getBindDatetimeSite();
  132. $query = $this->segment->getSelectQuery($select, $from, $where, $bind, $orderBy, $groupBy);
  133. return $query;
  134. }
  135. protected function getVisitsMetricFields()
  136. {
  137. return array(
  138. Metrics::INDEX_NB_UNIQ_VISITORS => "count(distinct " . self::LOG_VISIT_TABLE . ".idvisitor)",
  139. Metrics::INDEX_NB_VISITS => "count(*)",
  140. Metrics::INDEX_NB_ACTIONS => "sum(" . self::LOG_VISIT_TABLE . ".visit_total_actions)",
  141. Metrics::INDEX_MAX_ACTIONS => "max(" . self::LOG_VISIT_TABLE . ".visit_total_actions)",
  142. Metrics::INDEX_SUM_VISIT_LENGTH => "sum(" . self::LOG_VISIT_TABLE . ".visit_total_time)",
  143. Metrics::INDEX_BOUNCE_COUNT => "sum(case " . self::LOG_VISIT_TABLE . ".visit_total_actions when 1 then 1 when 0 then 1 else 0 end)",
  144. Metrics::INDEX_NB_VISITS_CONVERTED => "sum(case " . self::LOG_VISIT_TABLE . ".visit_goal_converted when 1 then 1 else 0 end)",
  145. );
  146. }
  147. static public function getConversionsMetricFields()
  148. {
  149. return array(
  150. Metrics::INDEX_GOAL_NB_CONVERSIONS => "count(*)",
  151. Metrics::INDEX_GOAL_NB_VISITS_CONVERTED => "count(distinct " . self::LOG_CONVERSION_TABLE . ".idvisit)",
  152. Metrics::INDEX_GOAL_REVENUE => self::getSqlConversionRevenueSum(self::TOTAL_REVENUE_FIELD),
  153. Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_SUBTOTAL => self::getSqlConversionRevenueSum(self::REVENUE_SUBTOTAL_FIELD),
  154. Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_TAX => self::getSqlConversionRevenueSum(self::REVENUE_TAX_FIELD),
  155. Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_SHIPPING => self::getSqlConversionRevenueSum(self::REVENUE_SHIPPING_FIELD),
  156. Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_DISCOUNT => self::getSqlConversionRevenueSum(self::REVENUE_DISCOUNT_FIELD),
  157. Metrics::INDEX_GOAL_ECOMMERCE_ITEMS => "SUM(" . self::LOG_CONVERSION_TABLE . "." . self::ITEMS_COUNT_FIELD . ")",
  158. );
  159. }
  160. static private function getSqlConversionRevenueSum($field)
  161. {
  162. return self::getSqlRevenue('SUM(' . self::LOG_CONVERSION_TABLE . '.' . $field . ')');
  163. }
  164. static public function getSqlRevenue($field)
  165. {
  166. return "ROUND(" . $field . "," . GoalManager::REVENUE_PRECISION . ")";
  167. }
  168. /**
  169. * Helper function that returns an array with common metrics for a given log_visit field distinct values.
  170. *
  171. * The statistics returned are:
  172. * - number of unique visitors
  173. * - number of visits
  174. * - number of actions
  175. * - maximum number of action for a visit
  176. * - sum of the visits' length in sec
  177. * - count of bouncing visits (visits with one page view)
  178. *
  179. * For example if $dimension = 'config_os' it will return the statistics for every distinct Operating systems
  180. * The returned array will have a row per distinct operating systems,
  181. * and a column per stat (nb of visits, max actions, etc)
  182. *
  183. * 'label' Metrics::INDEX_NB_UNIQ_VISITORS Metrics::INDEX_NB_VISITS etc.
  184. * Linux 27 66 ...
  185. * Windows XP 12 ...
  186. * Mac OS 15 36 ...
  187. *
  188. * @param string $dimension Table log_visit field name to be use to compute common stats
  189. * @return DataArray
  190. */
  191. public function getMetricsFromVisitByDimension($dimension)
  192. {
  193. if (!is_array($dimension)) {
  194. $dimension = array($dimension);
  195. }
  196. if (count($dimension) == 1) {
  197. $dimension = array("label" => reset($dimension));
  198. }
  199. $query = $this->queryVisitsByDimension($dimension);
  200. $metrics = new DataArray();
  201. while ($row = $query->fetch()) {
  202. $metrics->sumMetricsVisits($row["label"], $row);
  203. }
  204. return $metrics;
  205. }
  206. /**
  207. * Executes and returns a query aggregating visit logs, optionally grouping by some dimension. Returns
  208. * a DB statement that can be used to iterate over the result
  209. *
  210. * **Result Set**
  211. *
  212. * The following columns are in each row of the result set:
  213. *
  214. * - **{@link Piwik\Metrics::INDEX_NB_UNIQ_VISITORS}**: The total number of unique visitors in this group
  215. * of aggregated visits.
  216. * - **{@link Piwik\Metrics::INDEX_NB_VISITS}**: The total number of visits aggregated.
  217. * - **{@link Piwik\Metrics::INDEX_NB_ACTIONS}**: The total number of actions performed in this group of
  218. * aggregated visits.
  219. * - **{@link Piwik\Metrics::INDEX_MAX_ACTIONS}**: The maximum actions perfomred in one visit for this group of
  220. * visits.
  221. * - **{@link Piwik\Metrics::INDEX_SUM_VISIT_LENGTH}**: The total amount of time spent on the site for this
  222. * group of visits.
  223. * - **{@link Piwik\Metrics::INDEX_BOUNCE_COUNT}**: The total number of bounced visits in this group of
  224. * visits.
  225. * - **{@link Piwik\Metrics::INDEX_NB_VISITS_CONVERTED}**: The total number of visits for which at least one
  226. * conversion occurred, for this group of visits.
  227. *
  228. * Additional data can be selected by setting the `$additionalSelects` parameter.
  229. *
  230. * _Note: The metrics returned by this query can be customized by the `$metrics` parameter._
  231. *
  232. * @param array|string $dimensions `SELECT` fields (or just one field) that will be grouped by,
  233. * eg, `'referrer_name'` or `array('referrer_name', 'referrer_keyword')`.
  234. * The metrics retrieved from the query will be specific to combinations
  235. * of these fields. So if `array('referrer_name', 'referrer_keyword')`
  236. * is supplied, the query will aggregate visits for each referrer/keyword
  237. * combination.
  238. * @param bool|string $where Additional condition for the `WHERE` clause. Can be used to filter
  239. * the set of visits that are considered for aggregation.
  240. * @param array $additionalSelects Additional `SELECT` fields that are not included in the group by
  241. * clause. These can be aggregate expressions, eg, `SUM(somecol)`.
  242. * @param bool|array $metrics The set of metrics to calculate and return. If false, the query will select
  243. * all of them. The following values can be used:
  244. *
  245. * - {@link Piwik\Metrics::INDEX_NB_UNIQ_VISITORS}
  246. * - {@link Piwik\Metrics::INDEX_NB_VISITS}
  247. * - {@link Piwik\Metrics::INDEX_NB_ACTIONS}
  248. * - {@link Piwik\Metrics::INDEX_MAX_ACTIONS}
  249. * - {@link Piwik\Metrics::INDEX_SUM_VISIT_LENGTH}
  250. * - {@link Piwik\Metrics::INDEX_BOUNCE_COUNT}
  251. * - {@link Piwik\Metrics::INDEX_NB_VISITS_CONVERTED}
  252. * @param bool|\Piwik\RankingQuery $rankingQuery
  253. * A pre-configured ranking query instance that will be used to limit the result.
  254. * If set, the return value is the array returned by {@link Piwik\RankingQuery::execute()}.
  255. * @return mixed A Zend_Db_Statement if `$rankingQuery` isn't supplied, otherwise the result of
  256. * {@link Piwik\RankingQuery::execute()}. Read {@link queryVisitsByDimension() this}
  257. * to see what aggregate data is calculated by the query.
  258. * @api
  259. */
  260. public function queryVisitsByDimension(array $dimensions = array(), $where = false, array $additionalSelects = array(),
  261. $metrics = false, $rankingQuery = false)
  262. {
  263. $tableName = self::LOG_VISIT_TABLE;
  264. $availableMetrics = $this->getVisitsMetricFields();
  265. $select = $this->getSelectStatement($dimensions, $tableName, $additionalSelects, $availableMetrics, $metrics);
  266. $from = array($tableName);
  267. $where = $this->getWhereStatement($tableName, self::VISIT_DATETIME_FIELD, $where);
  268. $groupBy = $this->getGroupByStatement($dimensions, $tableName);
  269. $orderBy = false;
  270. if ($rankingQuery) {
  271. $orderBy = '`' . Metrics::INDEX_NB_VISITS . '` DESC';
  272. }
  273. $query = $this->generateQuery($select, $from, $where, $groupBy, $orderBy);
  274. if ($rankingQuery) {
  275. unset($availableMetrics[Metrics::INDEX_MAX_ACTIONS]);
  276. $sumColumns = array_keys($availableMetrics);
  277. if ($metrics) {
  278. $sumColumns = array_intersect($sumColumns, $metrics);
  279. }
  280. $rankingQuery->addColumn($sumColumns, 'sum');
  281. if ($this->isMetricRequested(Metrics::INDEX_MAX_ACTIONS, $metrics)) {
  282. $rankingQuery->addColumn(Metrics::INDEX_MAX_ACTIONS, 'max');
  283. }
  284. return $rankingQuery->execute($query['sql'], $query['bind']);
  285. }
  286. return $this->getDb()->query($query['sql'], $query['bind']);
  287. }
  288. protected function getSelectsMetrics($metricsAvailable, $metricsRequested = false)
  289. {
  290. $selects = array();
  291. foreach ($metricsAvailable as $metricId => $statement) {
  292. if ($this->isMetricRequested($metricId, $metricsRequested)) {
  293. $aliasAs = $this->getSelectAliasAs($metricId);
  294. $selects[] = $statement . $aliasAs;
  295. }
  296. }
  297. return $selects;
  298. }
  299. protected function getSelectStatement($dimensions, $tableName, $additionalSelects, array $availableMetrics, $requestedMetrics = false)
  300. {
  301. $dimensionsToSelect = $this->getDimensionsToSelect($dimensions, $additionalSelects);
  302. $selects = array_merge(
  303. $this->getSelectDimensions($dimensionsToSelect, $tableName),
  304. $this->getSelectsMetrics($availableMetrics, $requestedMetrics),
  305. !empty($additionalSelects) ? $additionalSelects : array()
  306. );
  307. $select = implode(self::FIELDS_SEPARATOR, $selects);
  308. return $select;
  309. }
  310. /**
  311. * Will return the subset of $dimensions that are not found in $additionalSelects
  312. *
  313. * @param $dimensions
  314. * @param array $additionalSelects
  315. * @return array
  316. */
  317. protected function getDimensionsToSelect($dimensions, $additionalSelects)
  318. {
  319. if (empty($additionalSelects)) {
  320. return $dimensions;
  321. }
  322. $dimensionsToSelect = array();
  323. foreach ($dimensions as $selectAs => $dimension) {
  324. $asAlias = $this->getSelectAliasAs($dimension);
  325. foreach ($additionalSelects as $additionalSelect) {
  326. if (strpos($additionalSelect, $asAlias) === false) {
  327. $dimensionsToSelect[$selectAs] = $dimension;
  328. }
  329. }
  330. }
  331. $dimensionsToSelect = array_unique($dimensionsToSelect);
  332. return $dimensionsToSelect;
  333. }
  334. /**
  335. * Returns the dimensions array, where
  336. * (1) the table name is prepended to the field
  337. * (2) the "AS `label` " is appended to the field
  338. *
  339. * @param $dimensions
  340. * @param $tableName
  341. * @param bool $appendSelectAs
  342. * @return mixed
  343. */
  344. protected function getSelectDimensions($dimensions, $tableName, $appendSelectAs = true)
  345. {
  346. foreach ($dimensions as $selectAs => &$field) {
  347. $selectAsString = $field;
  348. if (!is_numeric($selectAs)) {
  349. $selectAsString = $selectAs;
  350. } else {
  351. // if function, do not alias or prefix
  352. if ($this->isFieldFunctionOrComplexExpression($field)) {
  353. $selectAsString = $appendSelectAs = false;
  354. }
  355. }
  356. $isKnownField = !in_array($field, array('referrer_data'));
  357. if ($selectAsString == $field
  358. && $isKnownField
  359. ) {
  360. $field = $this->prefixColumn($field, $tableName);
  361. }
  362. if ($appendSelectAs && $selectAsString) {
  363. $field = $this->prefixColumn($field, $tableName) . $this->getSelectAliasAs($selectAsString);
  364. }
  365. }
  366. return $dimensions;
  367. }
  368. /**
  369. * Prefixes a column name with a table name if not already done.
  370. *
  371. * @param string $column eg, 'location_provider'
  372. * @param string $tableName eg, 'log_visit'
  373. * @return string eg, 'log_visit.location_provider'
  374. */
  375. private function prefixColumn($column, $tableName)
  376. {
  377. if (strpos($column, '.') === false) {
  378. return $tableName . '.' . $column;
  379. } else {
  380. return $column;
  381. }
  382. }
  383. protected function isFieldFunctionOrComplexExpression($field)
  384. {
  385. return strpos($field, "(") !== false
  386. || strpos($field, "CASE") !== false;
  387. }
  388. protected function getSelectAliasAs($metricId)
  389. {
  390. return " AS `" . $metricId . "`";
  391. }
  392. protected function isMetricRequested($metricId, $metricsRequested)
  393. {
  394. return $metricsRequested === false
  395. || in_array($metricId, $metricsRequested);
  396. }
  397. protected function getWhereStatement($tableName, $datetimeField, $extraWhere = false)
  398. {
  399. $where = "$tableName.$datetimeField >= ?
  400. AND $tableName.$datetimeField <= ?
  401. AND $tableName.idsite = ?";
  402. if (!empty($extraWhere)) {
  403. $extraWhere = sprintf($extraWhere, $tableName, $tableName);
  404. $where .= ' AND ' . $extraWhere;
  405. }
  406. return $where;
  407. }
  408. protected function getGroupByStatement($dimensions, $tableName)
  409. {
  410. $dimensions = $this->getSelectDimensions($dimensions, $tableName, $appendSelectAs = false);
  411. $groupBy = implode(", ", $dimensions);
  412. return $groupBy;
  413. }
  414. protected function getBindDatetimeSite()
  415. {
  416. return array($this->dateStart->getDateStartUTC(), $this->dateEnd->getDateEndUTC(), $this->site->getId());
  417. }
  418. /**
  419. * Executes and returns a query aggregating ecommerce item data (everything stored in the
  420. * **log\_conversion\_item** table) and returns a DB statement that can be used to iterate over the result
  421. *
  422. * <a name="queryEcommerceItems-result-set"></a>
  423. * **Result Set**
  424. *
  425. * Each row of the result set represents an aggregated group of ecommerce items. The following
  426. * columns are in each row of the result set:
  427. *
  428. * - **{@link Piwik\Metrics::INDEX_ECOMMERCE_ITEM_REVENUE}**: The total revenue for the group of items.
  429. * - **{@link Piwik\Metrics::INDEX_ECOMMERCE_ITEM_QUANTITY}**: The total number of items in this group.
  430. * - **{@link Piwik\Metrics::INDEX_ECOMMERCE_ITEM_PRICE}**: The total price for the group of items.
  431. * - **{@link Piwik\Metrics::INDEX_ECOMMERCE_ORDERS}**: The total number of orders this group of items
  432. * belongs to. This will be <= to the total number
  433. * of items in this group.
  434. * - **{@link Piwik\Metrics::INDEX_NB_VISITS}**: The total number of visits that caused these items to be logged.
  435. * - **ecommerceType**: Either {@link Piwik\Tracker\GoalManager::IDGOAL_CART} if the items in this group were
  436. * abandoned by a visitor, or {@link Piwik\Tracker\GoalManager::IDGOAL_ORDER} if they
  437. * were ordered by a visitor.
  438. *
  439. * **Limitations**
  440. *
  441. * Segmentation is not yet supported for this aggregation method.
  442. *
  443. * @param string $dimension One or more **log\_conversion\_item** columns to group aggregated data by.
  444. * Eg, `'idaction_sku'` or `'idaction_sku, idaction_category'`.
  445. * @return Zend_Db_Statement A statement object that can be used to iterate through the query's
  446. * result set. See [above](#queryEcommerceItems-result-set) to learn more
  447. * about what this query selects.
  448. * @api
  449. */
  450. public function queryEcommerceItems($dimension)
  451. {
  452. $query = $this->generateQuery(
  453. // SELECT ...
  454. implode(
  455. ', ',
  456. array(
  457. "log_action.name AS label",
  458. sprintf("log_conversion_item.%s AS labelIdAction", $dimension),
  459. sprintf(
  460. '%s AS `%d`',
  461. self::getSqlRevenue('SUM(log_conversion_item.quantity * log_conversion_item.price)'),
  462. Metrics::INDEX_ECOMMERCE_ITEM_REVENUE
  463. ),
  464. sprintf(
  465. '%s AS `%d`',
  466. self::getSqlRevenue('SUM(log_conversion_item.quantity)'),
  467. Metrics::INDEX_ECOMMERCE_ITEM_QUANTITY
  468. ),
  469. sprintf(
  470. '%s AS `%d`',
  471. self::getSqlRevenue('SUM(log_conversion_item.price)'),
  472. Metrics::INDEX_ECOMMERCE_ITEM_PRICE
  473. ),
  474. sprintf(
  475. 'COUNT(distinct log_conversion_item.idorder) AS `%d`',
  476. Metrics::INDEX_ECOMMERCE_ORDERS
  477. ),
  478. sprintf(
  479. 'COUNT(distinct log_conversion_item.idvisit) AS `%d`',
  480. Metrics::INDEX_NB_VISITS
  481. ),
  482. sprintf(
  483. 'CASE log_conversion_item.idorder WHEN \'0\' THEN %d ELSE %d END AS ecommerceType',
  484. GoalManager::IDGOAL_CART,
  485. GoalManager::IDGOAL_ORDER
  486. )
  487. )
  488. ),
  489. // FROM ...
  490. array(
  491. "log_conversion_item",
  492. array(
  493. "table" => "log_action",
  494. "joinOn" => sprintf("log_conversion_item.%s = log_action.idaction", $dimension)
  495. )
  496. ),
  497. // WHERE ... AND ...
  498. implode(
  499. ' AND ',
  500. array(
  501. 'log_conversion_item.server_time >= ?',
  502. 'log_conversion_item.server_time <= ?',
  503. 'log_conversion_item.idsite = ?',
  504. 'log_conversion_item.deleted = 0'
  505. )
  506. ),
  507. // GROUP BY ...
  508. sprintf(
  509. "ecommerceType, log_conversion_item.%s",
  510. $dimension
  511. ),
  512. // ORDER ...
  513. false
  514. );
  515. return $this->getDb()->query($query['sql'], $query['bind']);
  516. }
  517. /**
  518. * Executes and returns a query aggregating action data (everything in the log_action table) and returns
  519. * a DB statement that can be used to iterate over the result
  520. *
  521. * <a name="queryActionsByDimension-result-set"></a>
  522. * **Result Set**
  523. *
  524. * Each row of the result set represents an aggregated group of actions. The following columns
  525. * are in each aggregate row:
  526. *
  527. * - **{@link Piwik\Metrics::INDEX_NB_UNIQ_VISITORS}**: The total number of unique visitors that performed
  528. * the actions in this group.
  529. * - **{@link Piwik\Metrics::INDEX_NB_VISITS}**: The total number of visits these actions belong to.
  530. * - **{@link Piwik\Metrics::INDEX_NB_ACTIONS}**: The total number of actions in this aggregate group.
  531. *
  532. * Additional data can be selected through the `$additionalSelects` parameter.
  533. *
  534. * _Note: The metrics calculated by this query can be customized by the `$metrics` parameter._
  535. *
  536. * @param array|string $dimensions One or more SELECT fields that will be used to group the log_action
  537. * rows by. This parameter determines which log_action rows will be
  538. * aggregated together.
  539. * @param bool|string $where Additional condition for the WHERE clause. Can be used to filter
  540. * the set of visits that are considered for aggregation.
  541. * @param array $additionalSelects Additional SELECT fields that are not included in the group by
  542. * clause. These can be aggregate expressions, eg, `SUM(somecol)`.
  543. * @param bool|array $metrics The set of metrics to calculate and return. If `false`, the query will select
  544. * all of them. The following values can be used:
  545. *
  546. * - {@link Piwik\Metrics::INDEX_NB_UNIQ_VISITORS}
  547. * - {@link Piwik\Metrics::INDEX_NB_VISITS}
  548. * - {@link Piwik\Metrics::INDEX_NB_ACTIONS}
  549. * @param bool|\Piwik\RankingQuery $rankingQuery
  550. * A pre-configured ranking query instance that will be used to limit the result.
  551. * If set, the return value is the array returned by {@link Piwik\RankingQuery::execute()}.
  552. * @param bool|string $joinLogActionOnColumn One or more columns from the **log_link_visit_action** table that
  553. * log_action should be joined on. The table alias used for each join
  554. * is `"log_action$i"` where `$i` is the index of the column in this
  555. * array.
  556. *
  557. * If a string is used for this parameter, the table alias is not
  558. * suffixed (since there is only one column).
  559. * @return mixed A Zend_Db_Statement if `$rankingQuery` isn't supplied, otherwise the result of
  560. * {@link Piwik\RankingQuery::execute()}. Read [this](#queryEcommerceItems-result-set)
  561. * to see what aggregate data is calculated by the query.
  562. * @api
  563. */
  564. public function queryActionsByDimension($dimensions, $where = '', $additionalSelects = array(), $metrics = false, $rankingQuery = null, $joinLogActionOnColumn = false)
  565. {
  566. $tableName = self::LOG_ACTIONS_TABLE;
  567. $availableMetrics = $this->getActionsMetricFields();
  568. $select = $this->getSelectStatement($dimensions, $tableName, $additionalSelects, $availableMetrics, $metrics);
  569. $from = array($tableName);
  570. $where = $this->getWhereStatement($tableName, self::ACTION_DATETIME_FIELD, $where);
  571. $groupBy = $this->getGroupByStatement($dimensions, $tableName);
  572. $orderBy = false;
  573. if ($joinLogActionOnColumn !== false) {
  574. $multiJoin = is_array($joinLogActionOnColumn);
  575. if (!$multiJoin) {
  576. $joinLogActionOnColumn = array($joinLogActionOnColumn);
  577. }
  578. foreach ($joinLogActionOnColumn as $i => $joinColumn) {
  579. $tableAlias = 'log_action' . ($multiJoin ? $i + 1 : '');
  580. if (strpos($joinColumn, ' ') === false) {
  581. $joinOn = $tableAlias . '.idaction = ' . $tableName . '.' . $joinColumn;
  582. } else {
  583. // more complex join column like IF(...)
  584. $joinOn = $tableAlias . '.idaction = ' . $joinColumn;
  585. }
  586. $from[] = array(
  587. 'table' => 'log_action',
  588. 'tableAlias' => $tableAlias,
  589. 'joinOn' => $joinOn
  590. );
  591. }
  592. }
  593. if ($rankingQuery) {
  594. $orderBy = '`' . Metrics::INDEX_NB_ACTIONS . '` DESC';
  595. }
  596. $query = $this->generateQuery($select, $from, $where, $groupBy, $orderBy);
  597. if ($rankingQuery !== null) {
  598. $sumColumns = array_keys($availableMetrics);
  599. if ($metrics) {
  600. $sumColumns = array_intersect($sumColumns, $metrics);
  601. }
  602. $rankingQuery->addColumn($sumColumns, 'sum');
  603. return $rankingQuery->execute($query['sql'], $query['bind']);
  604. }
  605. return $this->getDb()->query($query['sql'], $query['bind']);
  606. }
  607. protected function getActionsMetricFields()
  608. {
  609. return $availableMetrics = array(
  610. Metrics::INDEX_NB_VISITS => "count(distinct " . self::LOG_ACTIONS_TABLE . ".idvisit)",
  611. Metrics::INDEX_NB_UNIQ_VISITORS => "count(distinct " . self::LOG_ACTIONS_TABLE . ".idvisitor)",
  612. Metrics::INDEX_NB_ACTIONS => "count(*)",
  613. );
  614. }
  615. /**
  616. * Executes a query aggregating conversion data (everything in the **log_conversion** table) and returns
  617. * a DB statement that can be used to iterate over the result.
  618. *
  619. * <a name="queryConversionsByDimension-result-set"></a>
  620. * **Result Set**
  621. *
  622. * Each row of the result set represents an aggregated group of conversions. The
  623. * following columns are in each aggregate row:
  624. *
  625. * - **{@link Piwik\Metrics::INDEX_GOAL_NB_CONVERSIONS}**: The total number of conversions in this aggregate
  626. * group.
  627. * - **{@link Piwik\Metrics::INDEX_GOAL_NB_VISITS_CONVERTED}**: The total number of visits during which these
  628. * conversions were converted.
  629. * - **{@link Piwik\Metrics::INDEX_GOAL_REVENUE}**: The total revenue generated by these conversions. This value
  630. * includes the revenue from individual ecommerce items.
  631. * - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_SUBTOTAL}**: The total cost of all ecommerce items sold
  632. * within these conversions. This value does not
  633. * include tax, shipping or any applied discount.
  634. *
  635. * _This metric is only applicable to the special
  636. * **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
  637. * - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_TAX}**: The total tax applied to every transaction in these
  638. * conversions.
  639. *
  640. * _This metric is only applicable to the special
  641. * **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
  642. * - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_SHIPPING}**: The total shipping cost for every transaction
  643. * in these conversions.
  644. *
  645. * _This metric is only applicable to the special
  646. * **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
  647. * - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_REVENUE_DISCOUNT}**: The total discount applied to every transaction
  648. * in these conversions.
  649. *
  650. * _This metric is only applicable to the special
  651. * **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
  652. * - **{@link Piwik\Metrics::INDEX_GOAL_ECOMMERCE_ITEMS}**: The total number of ecommerce items sold in each transaction
  653. * in these conversions.
  654. *
  655. * _This metric is only applicable to the special
  656. * **ecommerce** goal (where `idGoal == 'ecommerceOrder'`)._
  657. *
  658. * Additional data can be selected through the `$additionalSelects` parameter.
  659. *
  660. * _Note: This method will only query the **log_conversion** table. Other tables cannot be joined
  661. * using this method._
  662. *
  663. * @param array|string $dimensions One or more **SELECT** fields that will be used to group the log_conversion
  664. * rows by. This parameter determines which **log_conversion** rows will be
  665. * aggregated together.
  666. * @param bool|string $where An optional SQL expression used in the SQL's **WHERE** clause.
  667. * @param array $additionalSelects Additional SELECT fields that are not included in the group by
  668. * clause. These can be aggregate expressions, eg, `SUM(somecol)`.
  669. * @return Zend_Db_Statement
  670. */
  671. public function queryConversionsByDimension($dimensions = array(), $where = false, $additionalSelects = array())
  672. {
  673. $dimensions = array_merge(array(self::IDGOAL_FIELD), $dimensions);
  674. $availableMetrics = $this->getConversionsMetricFields();
  675. $tableName = self::LOG_CONVERSION_TABLE;
  676. $select = $this->getSelectStatement($dimensions, $tableName, $additionalSelects, $availableMetrics);
  677. $from = array($tableName);
  678. $where = $this->getWhereStatement($tableName, self::CONVERSION_DATETIME_FIELD, $where);
  679. $groupBy = $this->getGroupByStatement($dimensions, $tableName);
  680. $orderBy = false;
  681. $query = $this->generateQuery($select, $from, $where, $groupBy, $orderBy);
  682. return $this->getDb()->query($query['sql'], $query['bind']);
  683. }
  684. /**
  685. * Creates and returns an array of SQL `SELECT` expressions that will each count how
  686. * many rows have a column whose value is within a certain range.
  687. *
  688. * **Note:** The result of this function is meant for use in the `$additionalSelects` parameter
  689. * in one of the query... methods (for example {@link queryVisitsByDimension()}).
  690. *
  691. * **Example**
  692. *
  693. * // summarize one column
  694. * $visitTotalActionsRanges = array(
  695. * array(1, 1),
  696. * array(2, 10),
  697. * array(10)
  698. * );
  699. * $selects = LogAggregator::getSelectsFromRangedColumn('visit_total_actions', $visitTotalActionsRanges, 'log_visit', 'vta');
  700. *
  701. * // summarize another column in the same request
  702. * $visitCountVisitsRanges = array(
  703. * array(1, 1),
  704. * array(2, 20),
  705. * array(20)
  706. * );
  707. * $selects = array_merge(
  708. * $selects,
  709. * LogAggregator::getSelectsFromRangedColumn('visitor_count_visits', $visitCountVisitsRanges, 'log_visit', 'vcv')
  710. * );
  711. *
  712. * // perform the query
  713. * $logAggregator = // get the LogAggregator somehow
  714. * $query = $logAggregator->queryVisitsByDimension($dimensions = array(), $where = false, $selects);
  715. * $tableSummary = $query->fetch();
  716. *
  717. * $numberOfVisitsWithOneAction = $tableSummary['vta0'];
  718. * $numberOfVisitsBetweenTwoAnd10 = $tableSummary['vta1'];
  719. *
  720. * $numberOfVisitsWithVisitCountOfOne = $tableSummary['vcv0'];
  721. *
  722. * @param string $column The name of a column in `$table` that will be summarized.
  723. * @param array $ranges The array of ranges over which the data in the table
  724. * will be summarized. For example,
  725. * ```
  726. * array(
  727. * array(1, 1),
  728. * array(2, 2),
  729. * array(3, 8),
  730. * array(8) // everything over 8
  731. * )
  732. * ```
  733. * @param string $table The unprefixed name of the table whose rows will be summarized.
  734. * @param string $selectColumnPrefix The prefix to prepend to each SELECT expression. This
  735. * prefix is used to differentiate different sets of
  736. * range summarization SELECTs. You can supply different
  737. * values to this argument to summarize several columns
  738. * in one query (see above for an example).
  739. * @param bool $restrictToReturningVisitors Whether to only summarize rows that belong to
  740. * visits of returning visitors or not. If this
  741. * argument is true, then the SELECT expressions
  742. * returned can only be used with the
  743. * {@link queryVisitsByDimension()} method.
  744. * @return array An array of SQL SELECT expressions, for example,
  745. * ```
  746. * array(
  747. * 'sum(case when log_visit.visit_total_actions between 0 and 2 then 1 else 0 end) as vta0',
  748. * 'sum(case when log_visit.visit_total_actions > 2 then 1 else 0 end) as vta1'
  749. * )
  750. * ```
  751. * @api
  752. */
  753. public static function getSelectsFromRangedColumn($column, $ranges, $table, $selectColumnPrefix, $restrictToReturningVisitors = false)
  754. {
  755. $selects = array();
  756. $extraCondition = '';
  757. if ($restrictToReturningVisitors) {
  758. // extra condition for the SQL SELECT that makes sure only returning visits are counted
  759. // when creating the 'days since last visit' report
  760. $extraCondition = 'and log_visit.visitor_returning = 1';
  761. $extraSelect = "sum(case when log_visit.visitor_returning = 0 then 1 else 0 end) "
  762. . " as `" . $selectColumnPrefix . 'General_NewVisits' . "`";
  763. $selects[] = $extraSelect;
  764. }
  765. foreach ($ranges as $gap) {
  766. if (count($gap) == 2) {
  767. $lowerBound = $gap[0];
  768. $upperBound = $gap[1];
  769. $selectAs = "$selectColumnPrefix$lowerBound-$upperBound";
  770. $selects[] = "sum(case when $table.$column between $lowerBound and $upperBound $extraCondition" .
  771. " then 1 else 0 end) as `$selectAs`";
  772. } else {
  773. $lowerBound = $gap[0];
  774. $selectAs = $selectColumnPrefix . ($lowerBound + 1) . urlencode('+');
  775. $selects[] = "sum(case when $table.$column > $lowerBound $extraCondition then 1 else 0 end) as `$selectAs`";
  776. }
  777. }
  778. return $selects;
  779. }
  780. /**
  781. * Clean up the row data and return values.
  782. * $lookForThisPrefix can be used to make sure only SOME of the data in $row is used.
  783. *
  784. * The array will have one column $columnName
  785. *
  786. * @param $row
  787. * @param $columnName
  788. * @param bool $lookForThisPrefix A string that identifies which elements of $row to use
  789. * in the result. Every key of $row that starts with this
  790. * value is used.
  791. * @return array
  792. */
  793. static public function makeArrayOneColumn($row, $columnName, $lookForThisPrefix = false)
  794. {
  795. $cleanRow = array();
  796. foreach ($row as $label => $count) {
  797. if (empty($lookForThisPrefix)
  798. || strpos($label, $lookForThisPrefix) === 0
  799. ) {
  800. $cleanLabel = substr($label, strlen($lookForThisPrefix));
  801. $cleanRow[$cleanLabel] = array($columnName => $count);
  802. }
  803. }
  804. return $cleanRow;
  805. }
  806. public function getDb()
  807. {
  808. return Db::get();
  809. }
  810. }