Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support alarm convergence to deal with duplicate alarm info #1033

Merged
merged 6 commits into from
Jun 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
import com.googlecode.aviator.exception.CompileExpressionErrorException;
import com.googlecode.aviator.exception.ExpressionRuntimeException;
import com.googlecode.aviator.exception.ExpressionSyntaxErrorException;
import org.dromara.hertzbeat.alert.AlerterProperties;
import org.dromara.hertzbeat.alert.AlerterWorkerPool;
import org.dromara.hertzbeat.alert.reduce.AlarmCommonReduce;
import org.dromara.hertzbeat.common.queue.CommonDataQueue;
import org.dromara.hertzbeat.alert.dao.AlertMonitorDao;
import org.dromara.hertzbeat.common.entity.alerter.Alert;
Expand Down Expand Up @@ -59,24 +59,20 @@ public class CalculateAlarm {
* key - monitorId 为监控状态可用性可达性告警 | Indicates the monitoring status availability reachability alarm
*/
public Map<String, Alert> triggeredAlertMap;

public Set<Long> unAvailableMonitors;

private final AlerterWorkerPool workerPool;
private final CommonDataQueue dataQueue;
private final AlertDefineService alertDefineService;
private final AlerterProperties alerterProperties;
private final SilenceAlarm silenceAlarm;
private final AlarmCommonReduce alarmCommonReduce;
private final ResourceBundle bundle;

public CalculateAlarm (AlerterWorkerPool workerPool, CommonDataQueue dataQueue, SilenceAlarm silenceAlarm,
public CalculateAlarm (AlerterWorkerPool workerPool, CommonDataQueue dataQueue,
AlertDefineService alertDefineService, AlertMonitorDao monitorDao,
AlerterProperties alerterProperties) {
AlarmCommonReduce alarmCommonReduce) {
this.workerPool = workerPool;
this.dataQueue = dataQueue;
this.silenceAlarm = silenceAlarm;
this.alarmCommonReduce = alarmCommonReduce;
this.alertDefineService = alertDefineService;
this.alerterProperties = alerterProperties;
this.bundle = ResourceBundleUtil.getBundle("alerter");
this.triggeredAlertMap = new ConcurrentHashMap<>(128);
this.unAvailableMonitors = Collections.synchronizedSet(new HashSet<>(16));
Expand Down Expand Up @@ -172,13 +168,14 @@ private void calculate(CollectRep.MetricsData metricsData) {
String monitorAlertKey = String.valueOf(monitorId) + define.getId();
Alert triggeredAlert = triggeredAlertMap.get(monitorAlertKey);
if (triggeredAlert != null) {
int times = triggeredAlert.getTimes() + 1;
triggeredAlert.setTimes(times);
triggeredAlert.setLastTriggerTime(currentTimeMilli);
int times = triggeredAlert.getTriggerTimes() + 1;
triggeredAlert.setTriggerTimes(times);
triggeredAlert.setFirstAlarmTime(currentTimeMilli);
triggeredAlert.setLastAlarmTime(currentTimeMilli);
int defineTimes = define.getTimes() == null ? 1 : define.getTimes();
if (times >= defineTimes) {
triggeredAlertMap.remove(monitorAlertKey);
silenceAlarm.filterSilenceAndSendData(triggeredAlert);
alarmCommonReduce.reduceAndSendAlarm(triggeredAlert);
}
} else {
fieldValueMap.put("app", app);
Expand All @@ -193,16 +190,16 @@ private void calculate(CollectRep.MetricsData metricsData) {
.priority(define.getPriority())
.status(CommonConstants.ALERT_STATUS_CODE_PENDING)
.target(app + "." + metrics + "." + define.getField())
.times(1)
.firstTriggerTime(currentTimeMilli)
.lastTriggerTime(currentTimeMilli)
.triggerTimes(1)
.firstAlarmTime(currentTimeMilli)
.lastAlarmTime(currentTimeMilli)
// Keyword matching and substitution in the template
// 模板中关键字匹配替换
.content(AlertTemplateUtil.render(define.getTemplate(), fieldValueMap))
.build();
int defineTimes = define.getTimes() == null ? 1 : define.getTimes();
if (1 >= defineTimes) {
silenceAlarm.filterSilenceAndSendData(alert);
alarmCommonReduce.reduceAndSendAlarm(alert);
} else {
triggeredAlertMap.put(monitorAlertKey, alert);
}
Expand Down Expand Up @@ -261,11 +258,11 @@ private void handlerAvailableMetrics(long monitorId, String app, String metrics,
.content(content)
.priority(CommonConstants.ALERT_PRIORITY_CODE_WARNING)
.status(CommonConstants.ALERT_STATUS_CODE_RESTORED)
.firstTriggerTime(currentTimeMilli)
.lastTriggerTime(currentTimeMilli)
.times(1)
.firstAlarmTime(currentTimeMilli)
.lastAlarmTime(currentTimeMilli)
.triggerTimes(1)
.build();
silenceAlarm.filterSilenceAndSendData(resumeAlert);
alarmCommonReduce.reduceAndSendAlarm(resumeAlert);
}
}
}
Expand All @@ -291,30 +288,30 @@ private void handlerMonitorAvailableAlert(long monitorId, String app, CollectRep
.status(CommonConstants.ALERT_STATUS_CODE_PENDING)
.target(CommonConstants.AVAILABILITY)
.content(AlertTemplateUtil.render(avaAlertDefine.getTemplate(), valueMap))
.firstTriggerTime(currentTimeMill)
.lastTriggerTime(currentTimeMill)
.nextEvalInterval(alerterProperties.getAlertEvalIntervalBase())
.times(1);
.firstAlarmTime(currentTimeMill)
.lastAlarmTime(currentTimeMill)
.triggerTimes(1);
if (avaAlertDefine.getTimes() == null || avaAlertDefine.getTimes() <= 1) {
silenceAlarm.filterSilenceAndSendData(alertBuilder.build().clone());
alarmCommonReduce.reduceAndSendAlarm(alertBuilder.build().clone());
unAvailableMonitors.add(monitorId);
} else {
alertBuilder.status(CommonConstants.ALERT_STATUS_CODE_NOT_REACH);
}
triggeredAlertMap.put(String.valueOf(monitorId), alertBuilder.build());
} else {
int times = preAlert.getTimes() + 1;
int times = preAlert.getTriggerTimes() + 1;
if (preAlert.getStatus() == CommonConstants.ALERT_STATUS_CODE_PENDING) {
times = 1;
preAlert.setContent(AlertTemplateUtil.render(avaAlertDefine.getTemplate(), valueMap));
preAlert.setTags(tags);
}
preAlert.setTimes(times);
preAlert.setLastTriggerTime(currentTimeMill);
preAlert.setTriggerTimes(times);
preAlert.setFirstAlarmTime(currentTimeMill);
preAlert.setLastAlarmTime(currentTimeMill);
int defineTimes = avaAlertDefine.getTimes() == null ? 1 : avaAlertDefine.getTimes();
if (times >= defineTimes) {
preAlert.setStatus(CommonConstants.ALERT_STATUS_CODE_PENDING);
silenceAlarm.filterSilenceAndSendData(preAlert);
alarmCommonReduce.reduceAndSendAlarm(preAlert);
unAvailableMonitors.add(monitorId);
} else {
preAlert.setStatus(CommonConstants.ALERT_STATUS_CODE_NOT_REACH);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.dromara.hertzbeat.alert.controller;

import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import org.dromara.hertzbeat.alert.service.AlertConvergeService;
import org.dromara.hertzbeat.common.entity.alerter.AlertConverge;
import org.dromara.hertzbeat.common.entity.dto.Message;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;

import javax.validation.Valid;

import static org.dromara.hertzbeat.common.constants.CommonConstants.MONITOR_NOT_EXIST_CODE;
import static org.springframework.http.MediaType.APPLICATION_JSON_VALUE;

/**
* Alarm Converge management API
* 告警收敛管理API
* @author tom
*
*/
@Tag(name = "Alert Converge API | 告警收敛管理API")
@RestController
@RequestMapping(path = "/api/alert/converge", produces = {APPLICATION_JSON_VALUE})
public class AlertConvergeController {

@Autowired
private AlertConvergeService alertConvergeService;

@PostMapping
@Operation(summary = "New Alarm Converge | 新增告警收敛", description = "Added an alarm Converge | 新增一个告警收敛")
public ResponseEntity<Message<Void>> addNewAlertConverge(@Valid @RequestBody AlertConverge alertConverge) {
alertConvergeService.validate(alertConverge, false);
alertConvergeService.addAlertConverge(alertConverge);
return ResponseEntity.ok(new Message<>("Add success"));
}

@PutMapping
@Operation(summary = "Modifying an Alarm Converge | 修改告警收敛", description = "Modify an existing alarm Converge | 修改一个已存在告警收敛")
public ResponseEntity<Message<Void>> modifyAlertConverge(@Valid @RequestBody AlertConverge alertConverge) {
alertConvergeService.validate(alertConverge, true);
alertConvergeService.modifyAlertConverge(alertConverge);
return ResponseEntity.ok(new Message<>("Modify success"));
}

@GetMapping(path = "/{id}")
@Operation(summary = "Querying Alarm Converge | 查询告警收敛",
description = "You can obtain alarm Converge information based on the alarm Converge ID | 根据告警收敛ID获取告警收敛信息")
public ResponseEntity<Message<AlertConverge>> getAlertConverge(
@Parameter(description = "Alarm Converge ID | 告警收敛ID", example = "6565463543") @PathVariable("id") long id) {
AlertConverge alertConverge = alertConvergeService.getAlertConverge(id);
Message.MessageBuilder<AlertConverge> messageBuilder = Message.builder();
if (alertConverge == null) {
messageBuilder.code(MONITOR_NOT_EXIST_CODE).msg("AlertConverge not exist.");
} else {
messageBuilder.data(alertConverge);
}
return ResponseEntity.ok(messageBuilder.build());
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.dromara.hertzbeat.alert.controller;

import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import org.dromara.hertzbeat.alert.service.AlertConvergeService;
import org.dromara.hertzbeat.common.entity.alerter.AlertConverge;
import org.dromara.hertzbeat.common.entity.dto.Message;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Sort;
import org.springframework.data.jpa.domain.Specification;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;

import javax.persistence.criteria.CriteriaBuilder;
import javax.persistence.criteria.Predicate;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;

import static org.springframework.http.MediaType.APPLICATION_JSON_VALUE;

/**
* Converge the batch API for alarms
* 收敛告警批量API
* @author tom
*
*/
@Tag(name = "Alert Converge Batch API | 告警收敛管理API")
@RestController
@RequestMapping(path = "/api/alert/converges", produces = {APPLICATION_JSON_VALUE})
public class AlertConvergesController {

@Autowired
private AlertConvergeService alertConvergeService;

@GetMapping
@Operation(summary = "Query the alarm converge list | 查询告警收敛列表",
description = "You can obtain the list of alarm converge by querying filter items | 根据查询过滤项获取告警收敛信息列表")
public ResponseEntity<Message<Page<AlertConverge>>> getAlertConverges(
@Parameter(description = "Alarm Converge ID | 告警收敛ID", example = "6565463543") @RequestParam(required = false) List<Long> ids,
@Parameter(description = "Sort field, default id | 排序字段,默认id", example = "id") @RequestParam(defaultValue = "id") String sort,
@Parameter(description = "Sort mode: asc: ascending, desc: descending | 排序方式,asc:升序,desc:降序", example = "desc") @RequestParam(defaultValue = "desc") String order,
@Parameter(description = "List current page | 列表当前分页", example = "0") @RequestParam(defaultValue = "0") int pageIndex,
@Parameter(description = "Number of list pages | 列表分页数量", example = "8") @RequestParam(defaultValue = "8") int pageSize) {

Specification<AlertConverge> specification = (root, query, criteriaBuilder) -> {
List<Predicate> andList = new ArrayList<>();
if (ids != null && !ids.isEmpty()) {
CriteriaBuilder.In<Long> inPredicate= criteriaBuilder.in(root.get("id"));
for (long id : ids) {
inPredicate.value(id);
}
andList.add(inPredicate);
}
Predicate[] predicates = new Predicate[andList.size()];
return criteriaBuilder.and(andList.toArray(predicates));
};
Sort sortExp = Sort.by(new Sort.Order(Sort.Direction.fromString(order), sort));
PageRequest pageRequest = PageRequest.of(pageIndex, pageSize, sortExp);
Page<AlertConverge> alertConvergePage = alertConvergeService.getAlertConverges(specification,pageRequest);
Message<Page<AlertConverge>> message = new Message<>(alertConvergePage);
return ResponseEntity.ok(message);
}

@DeleteMapping
@Operation(summary = "Delete alarm converge in batches | 批量删除告警收敛",
description = "Delete alarm converge in batches based on the alarm converge ID list | 根据告警收敛ID列表批量删除告警收敛")
public ResponseEntity<Message<Void>> deleteAlertDefines(
@Parameter(description = "Alarm Converge IDs | 告警收敛IDs", example = "6565463543") @RequestParam(required = false) List<Long> ids
) {
if (ids != null && !ids.isEmpty()) {
alertConvergeService.deleteAlertConverges(new HashSet<>(ids));
}
Message<Void> message = new Message<>();
return ResponseEntity.ok(message);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.dromara.hertzbeat.alert.dao;

import org.dromara.hertzbeat.common.entity.alerter.AlertConverge;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.JpaSpecificationExecutor;
import org.springframework.data.jpa.repository.Modifying;

import java.util.Set;

/**
* AlertConverge 数据库操作
* @author tom
*
*/
public interface AlertConvergeDao extends JpaRepository<AlertConverge, Long>, JpaSpecificationExecutor<AlertConverge> {

/**
* Delete alarm converge based on the ID list
* @param convergeIds alert converge id list
*/
@Modifying
void deleteAlertConvergesByIdIn(Set<Long> convergeIds);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package org.dromara.hertzbeat.alert.reduce;

import lombok.RequiredArgsConstructor;
import org.dromara.hertzbeat.common.entity.alerter.Alert;
import org.dromara.hertzbeat.common.queue.CommonDataQueue;
import org.springframework.stereotype.Service;

/**
* reduce alarm and send alert data
*
* @author tom
*/
@Service
@RequiredArgsConstructor
public class AlarmCommonReduce {

private final AlarmSilenceReduce alarmSilenceReduce;

private final AlarmConvergeReduce alarmConvergeReduce;

private final CommonDataQueue dataQueue;

public void reduceAndSendAlarm(Alert alert) {
alert.setTimes(1);
// converge -> silence
if (alarmConvergeReduce.filterConverge(alert) && alarmSilenceReduce.filterSilence(alert)) {
dataQueue.sendAlertsData(alert);
}
}

}
Loading