github-actions[bot] commited on
Commit
5abc469
·
1 Parent(s): 8cd8047

Sync turing folder from GitHub

Browse files
turing/CLI_runner/verify_drift_detection.py CHANGED
@@ -57,7 +57,7 @@ def load_training_data(dataset_name: str, language: str):
57
  return X_train, y_train
58
 
59
 
60
- def print_drift_report(drift_results: dict, drift_type: str, report_lines: list = None):
61
  """
62
  Format and display drift detection results for a specific drift type.
63
 
@@ -218,6 +218,8 @@ def verify(
218
  """
219
  Verify drift detection on best model's training dataset.
220
  """
 
 
221
  logger.info("Starting drift detection verification...")
222
  logger.info("Configuration:")
223
  logger.info(f" Language: {language}")
@@ -225,7 +227,29 @@ def verify(
225
  logger.info(f" Alert threshold: {config.DRIFT_ALERT_THRESHOLD}")
226
  logger.info(f" Baseline cache: {config.BASELINE_CACHE_DIR}")
227
 
228
- dagshub.init(repo_owner=repo_owner, repo_name=repo_name, mlflow=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
  logger.info(f"\n[1/6] Searching for best model for {language}...")
231
  best_model_info = get_best_model_by_tag(language=language)
@@ -250,6 +274,7 @@ def verify(
250
  logger.info("\n[3/6] Loading training data...")
251
  try:
252
  X_train, y_train = load_training_data(dataset_name, language)
 
253
  except Exception as e:
254
  logger.error(f"Failed to load training data: {e}")
255
  return
 
57
  return X_train, y_train
58
 
59
 
60
+ def print_drift_report(drift_results: dict, drift_type: str, report_lines: list | None = None):
61
  """
62
  Format and display drift detection results for a specific drift type.
63
 
 
218
  """
219
  Verify drift detection on best model's training dataset.
220
  """
221
+ import os
222
+
223
  logger.info("Starting drift detection verification...")
224
  logger.info("Configuration:")
225
  logger.info(f" Language: {language}")
 
227
  logger.info(f" Alert threshold: {config.DRIFT_ALERT_THRESHOLD}")
228
  logger.info(f" Baseline cache: {config.BASELINE_CACHE_DIR}")
229
 
230
+ # Setup DagsHub credentials from environment variables
231
+ dagshub_username = os.getenv("DAGSHUB_USERNAME")
232
+ dagshub_token = os.getenv("DAGSHUB_TOKEN")
233
+ mlflow_uri = os.getenv("MLFLOW_TRACKING_URI")
234
+ is_ci_environment = os.getenv("CI") or os.getenv("GITHUB_ACTIONS")
235
+
236
+ if dagshub_username and dagshub_token and mlflow_uri:
237
+ # Use environment credentials for non-interactive mode (GitHub Actions)
238
+ logger.info("Using DagsHub credentials from environment variables")
239
+ os.environ["MLFLOW_TRACKING_USERNAME"] = dagshub_username
240
+ os.environ["MLFLOW_TRACKING_PASSWORD"] = dagshub_token
241
+ # Don't call dagshub.init() - credentials are already set via environment
242
+ logger.info("Skipping dagshub.init() - using environment credentials directly")
243
+ elif is_ci_environment:
244
+ # In CI without credentials, skip OAuth and log warning
245
+ logger.warning("CI environment detected but credentials not found. Proceeding without dagshub.init()")
246
+ else:
247
+ # Interactive mode - try to initialize with OAuth
248
+ logger.info("Initializing DagsHub interactively")
249
+ try:
250
+ dagshub.init(repo_owner=repo_owner, repo_name=repo_name, mlflow=True)
251
+ except Exception as e:
252
+ logger.warning(f"DagsHub initialization failed: {e}")
253
 
254
  logger.info(f"\n[1/6] Searching for best model for {language}...")
255
  best_model_info = get_best_model_by_tag(language=language)
 
274
  logger.info("\n[3/6] Loading training data...")
275
  try:
276
  X_train, y_train = load_training_data(dataset_name, language)
277
+ y_train = np.asarray(y_train) # Ensure y_train is np.ndarray
278
  except Exception as e:
279
  logger.error(f"Failed to load training data: {e}")
280
  return
turing/config.py CHANGED
@@ -4,9 +4,13 @@ from pathlib import Path
4
  import sys
5
 
6
  from dotenv import load_dotenv
7
- from logtail import LogtailHandler
8
  from loguru import logger
9
 
 
 
 
 
 
10
  # Load environment variables from .env file if it exists
11
  load_dotenv()
12
 
@@ -120,23 +124,24 @@ except (ModuleNotFoundError, ValueError):
120
 
121
  # setup logging for Better Stack using LogtailHandler
122
  try:
123
- better_stack_handler = LogtailHandler(
124
- source_token=os.getenv("BETTER_STACK_TOKEN"),
125
- host=os.getenv("BETTER_STACK_HOST"),
126
- )
 
127
 
128
- root_logger = logging.getLogger()
129
- root_logger.setLevel(logging.INFO)
130
 
131
- console_handler = logging.StreamHandler(sys.stdout)
132
- console_handler.setLevel(logging.DEBUG)
133
 
134
- better_stack_handler.setLevel(logging.WARNING)
135
 
136
- root_logger.addHandler(console_handler)
137
- root_logger.addHandler(better_stack_handler)
138
 
139
- logging.info("LogtailHandler for Better Stack configured successfully.")
140
 
141
  except Exception as e:
142
  logging.error(f"Failed to configure LogtailHandler: {e}")
 
4
  import sys
5
 
6
  from dotenv import load_dotenv
 
7
  from loguru import logger
8
 
9
+ try:
10
+ from logtail import LogtailHandler
11
+ except ImportError:
12
+ LogtailHandler = None # Logtail not available in this environment
13
+
14
  # Load environment variables from .env file if it exists
15
  load_dotenv()
16
 
 
124
 
125
  # setup logging for Better Stack using LogtailHandler
126
  try:
127
+ if LogtailHandler and os.getenv("BETTER_STACK_TOKEN") and os.getenv("BETTER_STACK_HOST"):
128
+ better_stack_handler = LogtailHandler(
129
+ source_token=os.getenv("BETTER_STACK_TOKEN", ""),
130
+ host=os.getenv("BETTER_STACK_HOST", ""),
131
+ )
132
 
133
+ root_logger = logging.getLogger()
134
+ root_logger.setLevel(logging.INFO)
135
 
136
+ console_handler = logging.StreamHandler(sys.stdout)
137
+ console_handler.setLevel(logging.DEBUG)
138
 
139
+ better_stack_handler.setLevel(logging.WARNING)
140
 
141
+ root_logger.addHandler(console_handler)
142
+ root_logger.addHandler(better_stack_handler)
143
 
144
+ logging.info("LogtailHandler for Better Stack configured successfully.")
145
 
146
  except Exception as e:
147
  logging.error(f"Failed to configure LogtailHandler: {e}")
turing/monitoring/drift_detector.py CHANGED
@@ -18,6 +18,7 @@ try:
18
  from deepchecks.nlp.checks import Drift, TextPropertyDrift
19
  except ImportError:
20
  logger.warning("Deepchecks not installed. Install with: pip install deepchecks[nlp]")
 
21
  Drift = None
22
  TextPropertyDrift = None
23
 
 
18
  from deepchecks.nlp.checks import Drift, TextPropertyDrift
19
  except ImportError:
20
  logger.warning("Deepchecks not installed. Install with: pip install deepchecks[nlp]")
21
+ SingleDataset = None
22
  Drift = None
23
  TextPropertyDrift = None
24
 
turing/monitoring/synthetic_data_generator.py CHANGED
@@ -75,7 +75,7 @@ class SyntheticDataGenerator:
75
  def generate_corrupted_vocabulary(
76
  self,
77
  reference_texts: List[str],
78
- corruption_rate: float = 0.2,
79
  n_samples: int = 100,
80
  ) -> List[str]:
81
  """
 
75
  def generate_corrupted_vocabulary(
76
  self,
77
  reference_texts: List[str],
78
+ corruption_rate: float = 0.5,
79
  n_samples: int = 100,
80
  ) -> List[str]:
81
  """