 d34d7ad763
			
		
	
	d34d7ad763
	
	
	
		
			
			All of these dashboards are the same, and have mostly copied all the same issues with them. This makes updating anything a massive pain. This implements a single dashboard template with a small script to create individual dashboards for each provider and its regions. I have included a range of fixes. The y-axis format has changed in later versions of grafana. The API time tracking is no longer scaled, but we just tell grafana it is in ms and it displays it correctly. The test nodes history graph is moved to the top, as it is probably the most interesting graph (note this splits itself out per region, if mulitple regions are selected). Values for "null as zero" are consistently set. Various formatting fixes for the labels are included. Change-Id: I5fbffaec3c82aa1fce0947f771de67edd15f7dfc
		
			
				
	
	
		
			198 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			198 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| #
 | |
| # NOTE: EDIT THE TEMPLATE FILE AND RUN create-nodepool.sh
 | |
| #
 | |
| 
 | |
| dashboard:
 | |
|   title: 'Nodepool: Rackspace'
 | |
|   templating:
 | |
|     - name: region
 | |
|       includeAll: true
 | |
|       multi: true
 | |
|       query: stats.gauges.nodepool.provider.rax-*
 | |
|       refresh: true
 | |
|       type: query
 | |
|   rows:
 | |
|     - title: Description
 | |
|       height: 150px
 | |
|       panels:
 | |
|         - title: Description
 | |
|           content: |
 | |
|             Rackspace Nodepool Status
 | |
|             ==========================
 | |
| 
 | |
|             This dashboard monitors the status of the nodepool environment for Rackspace.
 | |
| 
 | |
|             **This dashboard is managed by [Grafyaml](http://docs.openstack.org/infra/system-config/grafyaml.html).**
 | |
|             If you would like to make changes to this dashboard, please see the template in the `grafana` directory in
 | |
|             [project-config](https://git.openstack.org/cgit/openstack-infra/project-config/tree/grafana/nodepool.template).
 | |
| 
 | |
|           type: text
 | |
| 
 | |
|     - title: Nodes
 | |
|       showTitle: true
 | |
|       height: 150px
 | |
|       panels:
 | |
|         - title: Building
 | |
|           span: 3
 | |
|           sparkline:
 | |
|             full: true
 | |
|             show: true
 | |
|           targets:
 | |
|             - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.building)
 | |
|           type: singlestat
 | |
|           valueName: current
 | |
|         - title: Ready
 | |
|           span: 3
 | |
|           sparkline:
 | |
|             full: true
 | |
|             show: true
 | |
|           targets:
 | |
|             - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready)
 | |
|           type: singlestat
 | |
|           valueName: current
 | |
|         - title: In Use
 | |
|           span: 3
 | |
|           sparkline:
 | |
|             full: true
 | |
|             show: true
 | |
|           targets:
 | |
|             - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use)
 | |
|           type: singlestat
 | |
|           valueName: current
 | |
|         - title: Deleting
 | |
|           span: 3
 | |
|           sparkline:
 | |
|             full: true
 | |
|             show: true
 | |
|           targets:
 | |
|               - target: sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting)
 | |
|           type: singlestat
 | |
|           valueName: current
 | |
| 
 | |
|     - title: Test Nodes
 | |
|       height: 400px
 | |
|       panels:
 | |
|         - title: Test Node History - $region
 | |
|           type: graph
 | |
|           span: 12
 | |
|           stack: true
 | |
|           repeat: region
 | |
|           minSpan: 4
 | |
|           tooltip:
 | |
|             value_type: individual
 | |
|           yaxes:
 | |
|             - label: "nodes"
 | |
|             - show: false
 | |
|           targets:
 | |
|             - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.building), 'Building')
 | |
|             - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.ready), 'Available')
 | |
|             - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.in-use), 'In Use')
 | |
|             - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.used), 'Used')
 | |
|             - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.nodes.deleting), 'Deleting')
 | |
|             - target: alias(sumSeries(stats.gauges.nodepool.provider.$region.max_servers), 'Max')
 | |
|           seriesOverrides:
 | |
|             - alias: Max
 | |
|               stack: False
 | |
| 
 | |
| 
 | |
|     - title: Node Launches
 | |
|       showTitle: true
 | |
|       height: 250px
 | |
|       panels:
 | |
|         - title: Ready Node Launch Attempts
 | |
|           type: graph
 | |
|           span: 4
 | |
|           lines: false
 | |
|           bars: true
 | |
|           nullPointMode: null as zero
 | |
|           yaxes:
 | |
|             - label: "events / min"
 | |
|             - show: false
 | |
|           targets:
 | |
|             - target: aliasSub(summarize(stats_counts.nodepool.launch.provider.$region.ready, '1m'), '.*stats_counts.nodepool.launch.provider.(.*).ready.*', '\1')
 | |
|         - title: Time to Ready
 | |
|           type: graph
 | |
|           span: 4
 | |
|           lines: false
 | |
|           bars: true
 | |
|           nullPointMode: null as zero
 | |
|           yaxes:
 | |
|             - label: "time"
 | |
|               format: ms
 | |
|             - show: false
 | |
|           targets:
 | |
|             - target: aliasByNode(stats.timers.nodepool.launch.provider.$region.ready.mean, 5)
 | |
|         - title: Error Node Launch Attempts
 | |
|           type: graph
 | |
|           span: 4
 | |
|           lines: false
 | |
|           bars: true
 | |
|           nullPointMode: null as zero
 | |
|           yaxes:
 | |
|             - label: "events / min"
 | |
|             - show: false
 | |
|           targets:
 | |
|             - target: alias(smartSummarize(sumSeries(stats_counts.nodepool.launch.provider.$region.error.*), '1m'), "All Errors")
 | |
| 
 | |
|     - title: API Operations
 | |
|       showTitle: true
 | |
|       height: 250px
 | |
|       panels:
 | |
|         - title: Create Server
 | |
|           type: graph
 | |
|           lines: true
 | |
|           nullPointMode: connected
 | |
|           span: 4
 | |
|           yaxes:
 | |
|             - format: ms
 | |
|               label: Time
 | |
|             - show: false
 | |
|           targets:
 | |
|             - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4)
 | |
|         - title: Get Server
 | |
|           type: graph
 | |
|           span: 4
 | |
|           lines: true
 | |
|           nullPointMode: connected
 | |
|           span: 4
 | |
|           yaxes:
 | |
|             - format: ms
 | |
|               label: Time
 | |
|             - show: false
 | |
|           targets:
 | |
|             - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4)
 | |
|         - title: Delete Server
 | |
|           type: graph
 | |
|           span: 4
 | |
|           lines: true
 | |
|           nullPointMode: connected
 | |
|           span: 4
 | |
|           yaxes:
 | |
|             - format: ms
 | |
|               label: Time
 | |
|             - show: false
 | |
|           targets:
 | |
|             - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4)
 | |
|         - title: List Servers
 | |
|           type: graph
 | |
|           lines: true
 | |
|           nullPointMode: connected
 | |
|           span: 4
 | |
|           yaxes:
 | |
|             - format: ms
 | |
|               label: Time
 | |
|             - show: false
 | |
|           targets:
 | |
|             - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4)
 | |
|         - title: Get Limits
 | |
|           type: graph
 | |
|           lines: true
 | |
|           nullPointMode: connected
 | |
|           span: 4
 | |
|           yaxes:
 | |
|             - format: ms
 | |
|               label: Time
 | |
|             - show: false
 | |
|           targets:
 | |
|             - target: aliasByNode(stats.timers.nodepool.task.$region.ComputePostServers.mean, 4)
 |